diff --git a/.clang-format b/.clang-format index fe1aa1a30d4026..86c20ee744dec1 100644 --- a/.clang-format +++ b/.clang-format @@ -92,6 +92,7 @@ ForEachMacros: - '__rq_for_each_bio' - '__shost_for_each_device' - '__sym_for_each' + - '_for_each_counter' - 'apei_estatus_for_each_section' - 'ata_for_each_dev' - 'ata_for_each_link' @@ -141,11 +142,14 @@ ForEachMacros: - 'damon_for_each_target_safe' - 'damos_for_each_filter' - 'damos_for_each_filter_safe' + - 'damos_for_each_ops_filter' + - 'damos_for_each_ops_filter_safe' - 'damos_for_each_quota_goal' - 'damos_for_each_quota_goal_safe' - 'data__for_each_file' - 'data__for_each_file_new' - 'data__for_each_file_start' + - 'def_for_each_cpu' - 'device_for_each_child_node' - 'device_for_each_child_node_scoped' - 'dma_fence_array_for_each' @@ -176,6 +180,7 @@ ForEachMacros: - 'drm_for_each_privobj' - 'drm_gem_for_each_gpuvm_bo' - 'drm_gem_for_each_gpuvm_bo_safe' + - 'drm_gpusvm_for_each_range' - 'drm_gpuva_for_each_op' - 'drm_gpuva_for_each_op_from_reverse' - 'drm_gpuva_for_each_op_reverse' @@ -216,8 +221,10 @@ ForEachMacros: - 'for_each_active_dev_scope' - 'for_each_active_drhd_unit' - 'for_each_active_iommu' + - 'for_each_active_irq' - 'for_each_active_route' - 'for_each_aggr_pgid' + - 'for_each_alloc_capable_rdt_resource' - 'for_each_and_bit' - 'for_each_andnot_bit' - 'for_each_available_child_of_node' @@ -228,6 +235,7 @@ ForEachMacros: - 'for_each_btf_ext_rec' - 'for_each_btf_ext_sec' - 'for_each_bvec' + - 'for_each_capable_rdt_resource' - 'for_each_card_auxs' - 'for_each_card_auxs_safe' - 'for_each_card_components' @@ -241,6 +249,7 @@ ForEachMacros: - 'for_each_cgroup_storage_type' - 'for_each_child_of_node' - 'for_each_child_of_node_scoped' + - 'for_each_child_of_node_with_prefix' - 'for_each_clear_bit' - 'for_each_clear_bit_from' - 'for_each_clear_bitrange' @@ -296,6 +305,7 @@ ForEachMacros: - 'for_each_group_member_head' - 'for_each_hstate' - 'for_each_hwgpio' + - 'for_each_hwgpio_in_range' - 'for_each_if' - 'for_each_inject_fn' - 'for_each_insn' @@ -304,6 +314,7 @@ ForEachMacros: - 'for_each_intid' - 'for_each_iommu' - 'for_each_ip_tunnel_rcu' + - 'for_each_irq_desc' - 'for_each_irq_nr' - 'for_each_lang' - 'for_each_link_ch_maps' @@ -324,6 +335,8 @@ ForEachMacros: - 'for_each_missing_reg' - 'for_each_mle_subelement' - 'for_each_mod_mem_type' + - 'for_each_mon_capable_rdt_resource' + - 'for_each_mp_bvec' - 'for_each_net' - 'for_each_net_continue_reverse' - 'for_each_net_rcu' @@ -351,6 +364,7 @@ ForEachMacros: - 'for_each_node_by_name' - 'for_each_node_by_type' - 'for_each_node_mask' + - 'for_each_node_numadist' - 'for_each_node_state' - 'for_each_node_with_cpus' - 'for_each_node_with_property' @@ -359,6 +373,8 @@ ForEachMacros: - 'for_each_of_allnodes' - 'for_each_of_allnodes_from' - 'for_each_of_cpu_node' + - 'for_each_of_graph_port' + - 'for_each_of_graph_port_endpoint' - 'for_each_of_pci_range' - 'for_each_old_connector_in_state' - 'for_each_old_crtc_in_state' @@ -372,9 +388,11 @@ ForEachMacros: - 'for_each_oldnew_plane_in_state_reverse' - 'for_each_oldnew_private_obj_in_state' - 'for_each_online_cpu' + - 'for_each_online_cpu_wrap' - 'for_each_online_node' - 'for_each_online_pgdat' - 'for_each_or_bit' + - 'for_each_page_ext' - 'for_each_path' - 'for_each_pci_bridge' - 'for_each_pci_dev' @@ -382,8 +400,10 @@ ForEachMacros: - 'for_each_physmem_range' - 'for_each_populated_zone' - 'for_each_possible_cpu' + - 'for_each_possible_cpu_wrap' - 'for_each_present_blessed_reg' - 'for_each_present_cpu' + - 'for_each_present_section_nr' - 'for_each_prime_number' - 'for_each_prime_number_from' - 'for_each_probe_cache_entry' @@ -396,6 +416,7 @@ ForEachMacros: - 'for_each_prop_dlc_cpus' - 'for_each_prop_dlc_platforms' - 'for_each_property_of_node' + - 'for_each_rdt_resource' - 'for_each_reg' - 'for_each_reg_filtered' - 'for_each_reloc' @@ -434,10 +455,10 @@ ForEachMacros: - 'for_each_subelement_id' - 'for_each_sublist' - 'for_each_subsystem' + - 'for_each_suite' - 'for_each_supported_activate_fn' - 'for_each_supported_inject_fn' - 'for_each_sym' - - 'for_each_test' - 'for_each_thread' - 'for_each_token' - 'for_each_unicast_dest_pgid' @@ -499,8 +520,10 @@ ForEachMacros: - 'idr_for_each_entry_continue' - 'idr_for_each_entry_continue_ul' - 'idr_for_each_entry_ul' + - 'iio_for_each_active_channel' - 'in_dev_for_each_ifa_rcu' - 'in_dev_for_each_ifa_rtnl' + - 'in_dev_for_each_ifa_rtnl_net' - 'inet_bind_bucket_for_each' - 'interval_tree_for_each_span' - 'intlist__for_each_entry' @@ -542,7 +565,6 @@ ForEachMacros: - 'list_for_each_prev' - 'list_for_each_prev_safe' - 'list_for_each_rcu' - - 'list_for_each_reverse' - 'list_for_each_safe' - 'llist_for_each' - 'llist_for_each_entry' @@ -552,6 +574,7 @@ ForEachMacros: - 'map__for_each_symbol' - 'map__for_each_symbol_by_name' - 'mas_for_each' + - 'mas_for_each_rev' - 'mci_for_each_dimm' - 'media_device_for_each_entity' - 'media_device_for_each_intf' @@ -561,10 +584,15 @@ ForEachMacros: - 'media_pipeline_for_each_entity' - 'media_pipeline_for_each_pad' - 'mlx5_lag_for_each_peer_mdev' + - 'mptcp_for_each_subflow' - 'msi_domain_for_each_desc' - 'msi_for_each_desc' - 'mt_for_each' + - 'nanddev_io_for_each_block' - 'nanddev_io_for_each_page' + - 'neigh_for_each_in_bucket' + - 'neigh_for_each_in_bucket_rcu' + - 'neigh_for_each_in_bucket_safe' - 'netdev_for_each_lower_dev' - 'netdev_for_each_lower_private' - 'netdev_for_each_lower_private_rcu' @@ -604,11 +632,11 @@ ForEachMacros: - 'perf_evlist__for_each_entry_safe' - 'perf_evlist__for_each_evsel' - 'perf_evlist__for_each_mmap' + - 'perf_evsel_for_each_per_thread_period_safe' - 'perf_hpp_list__for_each_format' - 'perf_hpp_list__for_each_format_safe' - 'perf_hpp_list__for_each_sort_list' - 'perf_hpp_list__for_each_sort_list_safe' - - 'perf_tool_event__for_each_event' - 'plist_for_each' - 'plist_for_each_continue' - 'plist_for_each_entry' @@ -627,7 +655,6 @@ ForEachMacros: - 'rdma_for_each_block' - 'rdma_for_each_port' - 'rdma_umem_for_each_dma_block' - - 'resort_rb__for_each_entry' - 'resource_list_for_each_entry' - 'resource_list_for_each_entry_safe' - 'rhl_for_each_entry_rcu' @@ -658,6 +685,7 @@ ForEachMacros: - 'shost_for_each_device' - 'sk_for_each' - 'sk_for_each_bound' + - 'sk_for_each_bound_safe' - 'sk_for_each_entry_offset_rcu' - 'sk_for_each_from' - 'sk_for_each_rcu' @@ -680,7 +708,11 @@ ForEachMacros: - 'tb_property_for_each' - 'tcf_act_for_each_action' - 'tcf_exts_for_each_action' + - 'test_suite__for_each_test_case' + - 'tool_pmu__for_each_event' + - 'ttm_bo_lru_for_each_reserved_guarded' - 'ttm_resource_manager_for_each_res' + - 'udp_lrpa_for_each_entry_rcu' - 'udp_portaddr_for_each_entry' - 'udp_portaddr_for_each_entry_rcu' - 'usb_hub_for_each_child' @@ -691,6 +723,7 @@ ForEachMacros: - 'v4l2_m2m_for_each_src_buf_safe' - 'virtio_device_for_each_vq' - 'while_for_each_ftrace_op' + - 'workloads__for_each' - 'xa_for_each' - 'xa_for_each_marked' - 'xa_for_each_range' diff --git a/.clippy.toml b/.clippy.toml index 815c94732ed785..137f41d203de37 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -7,5 +7,5 @@ check-private-items = true disallowed-macros = [ # The `clippy::dbg_macro` lint only works with `std::dbg!`, thus we simulate # it here, see: https://github.com/rust-lang/rust-clippy/issues/11303. - { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool" }, + { path = "kernel::dbg", reason = "the `dbg!` macro is intended as a debugging tool", allow-invalid = true }, ] diff --git a/.gitignore b/.gitignore index f2f63e47fb8868..b83a68185ef469 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ # .* *.a +*.a_thinlto_native *.asn1.[ch] *.bin *.bz2 @@ -39,6 +40,7 @@ *.mod.c *.o *.o.* +*.o_thinlto_native *.patch *.rmeta *.rpm @@ -64,6 +66,7 @@ modules.order /vmlinux /vmlinux.32 /vmlinux.map +/vmlinux.thinlink /vmlinux.symvers /vmlinux.unstripped /vmlinux-gdb.py diff --git a/.mailmap b/.mailmap index 4f7cd8e231778c..a885e2eefc6979 100644 --- a/.mailmap +++ b/.mailmap @@ -102,6 +102,7 @@ Ard Biesheuvel Arnaud Patard Arnd Bergmann Arun Kumar Neelakantam +Asahi Lina Ashok Raj Nagarajan Ashwin Chaugule Asutosh Das @@ -312,6 +313,7 @@ Jan Glauber Jan Kuliga Jarkko Sakkinen Jarkko Sakkinen +Jarkko Sakkinen Jason Gunthorpe Jason Gunthorpe Jason Gunthorpe @@ -322,6 +324,7 @@ Jayachandran C Jayachandran C Jayachandran C +Jean-Michel Hautbois Jean Tourrilhes Jeevan Shriram Jeff Garzik @@ -438,12 +441,16 @@ Linus Lüssing Li Yang Li Yang Lior David +Loic Poulain +Loic Poulain Lorenzo Pieralisi Lorenzo Stoakes Luca Ceresoli Luca Weiss Lukasz Luba Luo Jie +Lance Yang +Lance Yang Maciej W. Rozycki Maciej W. Rozycki Maharaja Kennadyrajan @@ -480,6 +487,7 @@ Matthias Fuchs Matthieu Baerts Matthieu CASTET Matti Vaittinen +Mattijs Korpershoek Matt Ranostay Matt Ranostay Matt Ranostay Matthew Ranostay @@ -685,6 +693,8 @@ Simon Wunderlich Simon Wunderlich Simon Wunderlich Sricharan Ramabadhran +Srinivas Kandagatla +Srinivas Kandagatla Srinivas Ramana Sriram R Sriram Yagnaraman @@ -744,6 +754,7 @@ Tvrtko Ursulin Tycho Andersen Tzung-Bi Shih Uwe Kleine-König +Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König diff --git a/CREDITS b/CREDITS index 1b77fba6c27ecb..f74d230992d6cb 100644 --- a/CREDITS +++ b/CREDITS @@ -2071,6 +2071,10 @@ S: 660 Harvard Ave. #7 S: Santa Clara, CA 95051 S: USA +N: Joonsoo Kim +E: iamjoonsoo.kim@lge.com +D: Slab allocators + N: Kukjin Kim E: kgene@kernel.org D: Samsung S3C, S5P and Exynos ARM architectures diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 3879963f0f01e5..11545c9e2e93f2 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -77,7 +77,7 @@ Description: What: /sys/block//diskseq Date: February 2021 -Contact: Matteo Croce +Contact: Matteo Croce Description: The /sys/block//diskseq files reports the disk sequence number, which is a monotonically increasing diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 206079d3bd5b12..6a1acabb29d85f 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -511,6 +511,7 @@ Description: information about CPUs heterogeneity. What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/gather_data_sampling + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection /sys/devices/system/cpu/vulnerabilities/itlb_multihit /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds diff --git a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd index 2a19584d091e48..8c9718d83e9d71 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd +++ b/Documentation/ABI/testing/sysfs-driver-hid-appletb-kbd @@ -1,6 +1,6 @@ What: /sys/bus/hid/drivers/hid-appletb-kbd//mode -Date: September, 2023 -KernelVersion: 6.5 +Date: March, 2025 +KernelVersion: 6.15 Contact: linux-input@vger.kernel.org Description: The set of keys displayed on the Touch Bar. diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 9bce281314dfde..6fbab98fb639df 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. Package temperature in millidegree Celsius. @@ -119,8 +119,32 @@ Description: RO. Package temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan1_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 1 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan2_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 2 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. + +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/fan3_input +Date: March 2025 +KernelVersion: 6.14 +Contact: intel-xe@lists.freedesktop.org +Description: RO. Fan 3 speed in RPM. + + Only supported for particular Intel Xe graphics platforms. diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index ae0191295d29b8..e36d2de16cbdad 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -1604,3 +1604,35 @@ Description: prevent the UFS from frequently performing clock gating/ungating. The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_count +What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_count +Date: March 2025 +Contact: Bao D. Nguyen +Description: + This attribute is applicable to ufs devices compliant to the + JEDEC specifications version 4.1 or later. The + device_lvl_exception_count is a counter indicating the number of + times the device level exceptions have occurred since the last + time this variable is reset. Writing a 0 value to this + attribute will reset the device_lvl_exception_count. If the + device_lvl_exception_count reads a positive value, the user + application should read the device_lvl_exception_id attribute to + know more information about the exception. + + The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/device_lvl_exception_id +What: /sys/bus/platform/devices/*.ufs/device_lvl_exception_id +Date: March 2025 +Contact: Bao D. Nguyen +Description: + Reading the device_lvl_exception_id returns the + qDeviceLevelExceptionID attribute of the ufs device JEDEC + specification version 4.1. The definition of the + qDeviceLevelExceptionID is the ufs device vendor specific + implementation. Refer to the device manufacturer datasheet for + more information on the meaning of the qDeviceLevelExceptionID + attribute value. + + The attribute is read only. diff --git a/Documentation/ABI/testing/sysfs-kernel-reboot b/Documentation/ABI/testing/sysfs-kernel-reboot index e117aba46be0e8..52571fd5ddba51 100644 --- a/Documentation/ABI/testing/sysfs-kernel-reboot +++ b/Documentation/ABI/testing/sysfs-kernel-reboot @@ -1,7 +1,7 @@ What: /sys/kernel/reboot Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Interface to set the kernel reboot behavior, similarly to what can be done via the reboot= cmdline option. (see Documentation/admin-guide/kernel-parameters.txt) @@ -9,25 +9,25 @@ Description: Interface to set the kernel reboot behavior, similarly to What: /sys/kernel/reboot/mode Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Reboot mode. Valid values are: cold warm hard soft gpio What: /sys/kernel/reboot/type Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Reboot type. Valid values are: bios acpi kbd triple efi pci What: /sys/kernel/reboot/cpu Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: CPU number to use to reboot. What: /sys/kernel/reboot/force Date: November 2020 KernelVersion: 5.11 -Contact: Matteo Croce +Contact: Matteo Croce Description: Don't wait for any other CPUs on reboot and avoid anything that could hang. diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ff0b440ef2dc90..ce296b8430fc98 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -22,3 +22,5 @@ are configurable at compile, boot or run time. srso gather_data_sampling reg-file-data-sampling + rsb + indirect-target-selection diff --git a/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst new file mode 100644 index 00000000000000..d9ca64108d2332 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/indirect-target-selection.rst @@ -0,0 +1,168 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Indirect Target Selection (ITS) +=============================== + +ITS is a vulnerability in some Intel CPUs that support Enhanced IBRS and were +released before Alder Lake. ITS may allow an attacker to control the prediction +of indirect branches and RETs located in the lower half of a cacheline. + +ITS is assigned CVE-2024-28956 with a CVSS score of 4.7 (Medium). + +Scope of Impact +--------------- +- **eIBRS Guest/Host Isolation**: Indirect branches in KVM/kernel may still be + predicted with unintended target corresponding to a branch in the guest. + +- **Intra-Mode BTI**: In-kernel training such as through cBPF or other native + gadgets. + +- **Indirect Branch Prediction Barrier (IBPB)**: After an IBPB, indirect + branches may still be predicted with targets corresponding to direct branches + executed prior to the IBPB. This is fixed by the IPU 2025.1 microcode, which + should be available via distro updates. Alternatively microcode can be + obtained from Intel's github repository [#f1]_. + +Affected CPUs +------------- +Below is the list of ITS affected CPUs [#f2]_ [#f3]_: + + ======================== ============ ==================== =============== + Common name Family_Model eIBRS Intra-mode BTI + Guest/Host Isolation + ======================== ============ ==================== =============== + SKYLAKE_X (step >= 6) 06_55H Affected Affected + ICELAKE_X 06_6AH Not affected Affected + ICELAKE_D 06_6CH Not affected Affected + ICELAKE_L 06_7EH Not affected Affected + TIGERLAKE_L 06_8CH Not affected Affected + TIGERLAKE 06_8DH Not affected Affected + KABYLAKE_L (step >= 12) 06_8EH Affected Affected + KABYLAKE (step >= 13) 06_9EH Affected Affected + COMETLAKE 06_A5H Affected Affected + COMETLAKE_L 06_A6H Affected Affected + ROCKETLAKE 06_A7H Not affected Affected + ======================== ============ ==================== =============== + +- All affected CPUs enumerate Enhanced IBRS feature. +- IBPB isolation is affected on all ITS affected CPUs, and need a microcode + update for mitigation. +- None of the affected CPUs enumerate BHI_CTRL which was introduced in Golden + Cove (Alder Lake and Sapphire Rapids). This can help guests to determine the + host's affected status. +- Intel Atom CPUs are not affected by ITS. + +Mitigation +---------- +As only the indirect branches and RETs that have their last byte of instruction +in the lower half of the cacheline are vulnerable to ITS, the basic idea behind +the mitigation is to not allow indirect branches in the lower half. + +This is achieved by relying on existing retpoline support in the kernel, and in +compilers. ITS-vulnerable retpoline sites are runtime patched to point to newly +added ITS-safe thunks. These safe thunks consists of indirect branch in the +second half of the cacheline. Not all retpoline sites are patched to thunks, if +a retpoline site is evaluated to be ITS-safe, it is replaced with an inline +indirect branch. + +Dynamic thunks +~~~~~~~~~~~~~~ +From a dynamically allocated pool of safe-thunks, each vulnerable site is +replaced with a new thunk, such that they get a unique address. This could +improve the branch prediction accuracy. Also, it is a defense-in-depth measure +against aliasing. + +Note, for simplicity, indirect branches in eBPF programs are always replaced +with a jump to a static thunk in __x86_indirect_its_thunk_array. If required, +in future this can be changed to use dynamic thunks. + +All vulnerable RETs are replaced with a static thunk, they do not use dynamic +thunks. This is because RETs get their prediction from RSB mostly that does not +depend on source address. RETs that underflow RSB may benefit from dynamic +thunks. But, RETs significantly outnumber indirect branches, and any benefit +from a unique source address could be outweighed by the increased icache +footprint and iTLB pressure. + +Retpoline +~~~~~~~~~ +Retpoline sequence also mitigates ITS-unsafe indirect branches. For this +reason, when retpoline is enabled, ITS mitigation only relocates the RETs to +safe thunks. Unless user requested the RSB-stuffing mitigation. + +RSB Stuffing +~~~~~~~~~~~~ +RSB-stuffing via Call Depth Tracking is a mitigation for Retbleed RSB-underflow +attacks. And it also mitigates RETs that are vulnerable to ITS. + +Mitigation in guests +^^^^^^^^^^^^^^^^^^^^ +All guests deploy ITS mitigation by default, irrespective of eIBRS enumeration +and Family/Model of the guest. This is because eIBRS feature could be hidden +from a guest. One exception to this is when a guest enumerates BHI_DIS_S, which +indicates that the guest is running on an unaffected host. + +To prevent guests from unnecessarily deploying the mitigation on unaffected +platforms, Intel has defined ITS_NO bit(62) in MSR IA32_ARCH_CAPABILITIES. When +a guest sees this bit set, it should not enumerate the ITS bug. Note, this bit +is not set by any hardware, but is **intended for VMMs to synthesize** it for +guests as per the host's affected status. + +Mitigation options +^^^^^^^^^^^^^^^^^^ +The ITS mitigation can be controlled using the "indirect_target_selection" +kernel parameter. The available options are: + + ======== =================================================================== + on (default) Deploy the "Aligned branch/return thunks" mitigation. + If spectre_v2 mitigation enables retpoline, aligned-thunks are only + deployed for the affected RET instructions. Retpoline mitigates + indirect branches. + + off Disable ITS mitigation. + + vmexit Equivalent to "=on" if the CPU is affected by guest/host isolation + part of ITS. Otherwise, mitigation is not deployed. This option is + useful when host userspace is not in the threat model, and only + attacks from guest to host are considered. + + stuff Deploy RSB-fill mitigation when retpoline is also deployed. + Otherwise, deploy the default mitigation. When retpoline mitigation + is enabled, RSB-stuffing via Call-Depth-Tracking also mitigates + ITS. + + force Force the ITS bug and deploy the default mitigation. + ======== =================================================================== + +Sysfs reporting +--------------- + +The sysfs file showing ITS mitigation status is: + + /sys/devices/system/cpu/vulnerabilities/indirect_target_selection + +Note, microcode mitigation status is not reported in this file. + +The possible values in this file are: + +.. list-table:: + + * - Not affected + - The processor is not vulnerable. + * - Vulnerable + - System is vulnerable and no mitigation has been applied. + * - Vulnerable, KVM: Not affected + - System is vulnerable to intra-mode BTI, but not affected by eIBRS + guest/host isolation. + * - Mitigation: Aligned branch/return thunks + - The mitigation is enabled, affected indirect branches and RETs are + relocated to safe thunks. + * - Mitigation: Retpolines, Stuffing RSB + - The mitigation is enabled using retpoline and RSB stuffing. + +References +---------- +.. [#f1] Microcode repository - https://github.com/intel/Intel-Linux-Processor-Microcode-Data-Files + +.. [#f2] Affected Processors list - https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html + +.. [#f3] Affected Processors list (machine readable) - https://github.com/intel/Intel-affected-processor-list diff --git a/Documentation/admin-guide/hw-vuln/rsb.rst b/Documentation/admin-guide/hw-vuln/rsb.rst new file mode 100644 index 00000000000000..21dbf9cf25f8bd --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/rsb.rst @@ -0,0 +1,268 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +RSB-related mitigations +======================= + +.. warning:: + Please keep this document up-to-date, otherwise you will be + volunteered to update it and convert it to a very long comment in + bugs.c! + +Since 2018 there have been many Spectre CVEs related to the Return Stack +Buffer (RSB) (sometimes referred to as the Return Address Stack (RAS) or +Return Address Predictor (RAP) on AMD). + +Information about these CVEs and how to mitigate them is scattered +amongst a myriad of microarchitecture-specific documents. + +This document attempts to consolidate all the relevant information in +once place and clarify the reasoning behind the current RSB-related +mitigations. It's meant to be as concise as possible, focused only on +the current kernel mitigations: what are the RSB-related attack vectors +and how are they currently being mitigated? + +It's *not* meant to describe how the RSB mechanism operates or how the +exploits work. More details about those can be found in the references +below. + +Rather, this is basically a glorified comment, but too long to actually +be one. So when the next CVE comes along, a kernel developer can +quickly refer to this as a refresher to see what we're actually doing +and why. + +At a high level, there are two classes of RSB attacks: RSB poisoning +(Intel and AMD) and RSB underflow (Intel only). They must each be +considered individually for each attack vector (and microarchitecture +where applicable). + +---- + +RSB poisoning (Intel and AMD) +============================= + +SpectreRSB +~~~~~~~~~~ + +RSB poisoning is a technique used by SpectreRSB [#spectre-rsb]_ where +an attacker poisons an RSB entry to cause a victim's return instruction +to speculate to an attacker-controlled address. This can happen when +there are unbalanced CALLs/RETs after a context switch or VMEXIT. + +* All attack vectors can potentially be mitigated by flushing out any + poisoned RSB entries using an RSB filling sequence + [#intel-rsb-filling]_ [#amd-rsb-filling]_ when transitioning between + untrusted and trusted domains. But this has a performance impact and + should be avoided whenever possible. + + .. DANGER:: + **FIXME**: Currently we're flushing 32 entries. However, some CPU + models have more than 32 entries. The loop count needs to be + increased for those. More detailed information is needed about RSB + sizes. + +* On context switch, the user->user mitigation requires ensuring the + RSB gets filled or cleared whenever IBPB gets written [#cond-ibpb]_ + during a context switch: + + * AMD: + On Zen 4+, IBPB (or SBPB [#amd-sbpb]_ if used) clears the RSB. + This is indicated by IBPB_RET in CPUID [#amd-ibpb-rsb]_. + + On Zen < 4, the RSB filling sequence [#amd-rsb-filling]_ must be + always be done in addition to IBPB [#amd-ibpb-no-rsb]_. This is + indicated by X86_BUG_IBPB_NO_RET. + + * Intel: + IBPB always clears the RSB: + + "Software that executed before the IBPB command cannot control + the predicted targets of indirect branches executed after the + command on the same logical processor. The term indirect branch + in this context includes near return instructions, so these + predicted targets may come from the RSB." [#intel-ibpb-rsb]_ + +* On context switch, user->kernel attacks are prevented by SMEP. User + space can only insert user space addresses into the RSB. Even + non-canonical addresses can't be inserted due to the page gap at the + end of the user canonical address space reserved by TASK_SIZE_MAX. + A SMEP #PF at instruction fetch prevents the kernel from speculatively + executing user space. + + * AMD: + "Finally, branches that are predicted as 'ret' instructions get + their predicted targets from the Return Address Predictor (RAP). + AMD recommends software use a RAP stuffing sequence (mitigation + V2-3 in [2]) and/or Supervisor Mode Execution Protection (SMEP) + to ensure that the addresses in the RAP are safe for + speculation. Collectively, we refer to these mitigations as "RAP + Protection"." [#amd-smep-rsb]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits." [#intel-smep-rsb]_ + +* On VMEXIT, guest->host attacks are mitigated by eIBRS (and PBRSB + mitigation if needed): + + * AMD: + "When Automatic IBRS is enabled, the internal return address + stack used for return address predictions is cleared on VMEXIT." + [#amd-eibrs-vmexit]_ + + * Intel: + "On processors with enhanced IBRS, an RSB overwrite sequence may + not suffice to prevent the predicted target of a near return + from using an RSB entry created in a less privileged predictor + mode. Software can prevent this by enabling SMEP (for + transitions from user mode to supervisor mode) and by having + IA32_SPEC_CTRL.IBRS set during VM exits. Processors with + enhanced IBRS still support the usage model where IBRS is set + only in the OS/VMM for OSes that enable SMEP. To do this, such + processors will ensure that guest behavior cannot control the + RSB after a VM exit once IBRS is set, even if IBRS was not set + at the time of the VM exit." [#intel-eibrs-vmexit]_ + + Note that some Intel CPUs are susceptible to Post-barrier Return + Stack Buffer Predictions (PBRSB) [#intel-pbrsb]_, where the last + CALL from the guest can be used to predict the first unbalanced RET. + In this case the PBRSB mitigation is needed in addition to eIBRS. + +AMD RETBleed / SRSO / Branch Type Confusion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On AMD, poisoned RSB entries can also be created by the AMD RETBleed +variant [#retbleed-paper]_ [#amd-btc]_ or by Speculative Return Stack +Overflow [#amd-srso]_ (Inception [#inception-paper]_). The kernel +protects itself by replacing every RET in the kernel with a branch to a +single safe RET. + +---- + +RSB underflow (Intel only) +========================== + +RSB Alternate (RSBA) ("Intel Retbleed") +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some Intel Skylake-generation CPUs are susceptible to the Intel variant +of RETBleed [#retbleed-paper]_ (Return Stack Buffer Underflow +[#intel-rsbu]_). If a RET is executed when the RSB buffer is empty due +to mismatched CALLs/RETs or returning from a deep call stack, the branch +predictor can fall back to using the Branch Target Buffer (BTB). If a +user forces a BTB collision then the RET can speculatively branch to a +user-controlled address. + +* Note that RSB filling doesn't fully mitigate this issue. If there + are enough unbalanced RETs, the RSB may still underflow and fall back + to using a poisoned BTB entry. + +* On context switch, user->user underflow attacks are mitigated by the + conditional IBPB [#cond-ibpb]_ on context switch which effectively + clears the BTB: + + * "The indirect branch predictor barrier (IBPB) is an indirect branch + control mechanism that establishes a barrier, preventing software + that executed before the barrier from controlling the predicted + targets of indirect branches executed after the barrier on the same + logical processor." [#intel-ibpb-btb]_ + +* On context switch and VMEXIT, user->kernel and guest->host RSB + underflows are mitigated by IBRS or eIBRS: + + * "Enabling IBRS (including enhanced IBRS) will mitigate the "RSBU" + attack demonstrated by the researchers. As previously documented, + Intel recommends the use of enhanced IBRS, where supported. This + includes any processor that enumerates RRSBA but not RRSBA_DIS_S." + [#intel-rsbu]_ + + However, note that eIBRS and IBRS do not mitigate intra-mode attacks. + Like RRSBA below, this is mitigated by clearing the BHB on kernel + entry. + + As an alternative to classic IBRS, call depth tracking (combined with + retpolines) can be used to track kernel returns and fill the RSB when + it gets close to being empty. + +Restricted RSB Alternate (RRSBA) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some newer Intel CPUs have Restricted RSB Alternate (RRSBA) behavior, +which, similar to RSBA described above, also falls back to using the BTB +on RSB underflow. The only difference is that the predicted targets are +restricted to the current domain when eIBRS is enabled: + +* "Restricted RSB Alternate (RRSBA) behavior allows alternate branch + predictors to be used by near RET instructions when the RSB is + empty. When eIBRS is enabled, the predicted targets of these + alternate predictors are restricted to those belonging to the + indirect branch predictor entries of the current prediction domain. + [#intel-eibrs-rrsba]_ + +When a CPU with RRSBA is vulnerable to Branch History Injection +[#bhi-paper]_ [#intel-bhi]_, an RSB underflow could be used for an +intra-mode BTI attack. This is mitigated by clearing the BHB on +kernel entry. + +However if the kernel uses retpolines instead of eIBRS, it needs to +disable RRSBA: + +* "Where software is using retpoline as a mitigation for BHI or + intra-mode BTI, and the processor both enumerates RRSBA and + enumerates RRSBA_DIS controls, it should disable this behavior." + [#intel-retpoline-rrsba]_ + +---- + +References +========== + +.. [#spectre-rsb] `Spectre Returns! Speculation Attacks using the Return Stack Buffer `_ + +.. [#intel-rsb-filling] "Empty RSB Mitigation on Skylake-generation" in `Retpoline: A Branch Target Injection Mitigation `_ + +.. [#amd-rsb-filling] "Mitigation V2-3" in `Software Techniques for Managing Speculation `_ + +.. [#cond-ibpb] Whether IBPB is written depends on whether the prev and/or next task is protected from Spectre attacks. It typically requires opting in per task or system-wide. For more details see the documentation for the ``spectre_v2_user`` cmdline option in Documentation/admin-guide/kernel-parameters.txt. + +.. [#amd-sbpb] IBPB without flushing of branch type predictions. Only exists for AMD. + +.. [#amd-ibpb-rsb] "Function 8000_0008h -- Processor Capacity Parameters and Extended Feature Identification" in `AMD64 Architecture Programmer's Manual Volume 3: General-Purpose and System Instructions `_. SBPB behaves the same way according to `this email `_. + +.. [#amd-ibpb-no-rsb] `Spectre Attacks: Exploiting Speculative Execution `_ + +.. [#intel-ibpb-rsb] "Introduction" in `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#amd-smep-rsb] "Existing Mitigations" in `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#intel-smep-rsb] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#amd-eibrs-vmexit] "Extended Feature Enable Register (EFER)" in `AMD64 Architecture Programmer's Manual Volume 2: System Programming `_ + +.. [#intel-eibrs-vmexit] "Enhanced IBRS" in `Indirect Branch Restricted Speculation `_ + +.. [#intel-pbrsb] `Post-barrier Return Stack Buffer Predictions / CVE-2022-26373 / INTEL-SA-00706 `_ + +.. [#retbleed-paper] `RETBleed: Arbitrary Speculative Code Execution with Return Instruction `_ + +.. [#amd-btc] `Technical Guidance for Mitigating Branch Type Confusion `_ + +.. [#amd-srso] `Technical Update Regarding Speculative Return Stack Overflow `_ + +.. [#inception-paper] `Inception: Exposing New Attack Surfaces with Training in Transient Execution `_ + +.. [#intel-rsbu] `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#intel-ibpb-btb] `Indirect Branch Predictor Barrier' `_ + +.. [#intel-eibrs-rrsba] "Guidance for RSBU" in `Return Stack Buffer Underflow / Return Stack Buffer Underflow / CVE-2022-29901, CVE-2022-28693 / INTEL-SA-00702 `_ + +.. [#bhi-paper] `Branch History Injection: On the Effectiveness of Hardware Mitigations Against Cross-Privilege Spectre-v2 Attacks `_ + +.. [#intel-bhi] `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ + +.. [#intel-retpoline-rrsba] "Retpoline" in `Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 `_ diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 76e538c77e3161..c865280efa8ebb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1407,18 +1407,15 @@ earlyprintk=serial[,0x...[,baudrate]] earlyprintk=ttySn[,baudrate] earlyprintk=dbgp[debugController#] + earlyprintk=mmio32,membase[,{nocfg|baudrate}] earlyprintk=pciserial[,force],bus:device.function[,{nocfg|baudrate}] earlyprintk=xdbc[xhciController#] earlyprintk=bios - earlyprintk=mmio,membase[,{nocfg|baudrate}] earlyprintk is useful when the kernel crashes before the normal console is initialized. It is not enabled by default because it has some cosmetic problems. - Only 32-bit memory addresses are supported for "mmio" - and "pciserial" devices. - Use "nocfg" to skip UART configuration, assume BIOS/firmware has configured UART correctly. @@ -2205,6 +2202,23 @@ different crypto accelerators. This option can be used to achieve best performance for particular HW. + indirect_target_selection= [X86,Intel] Mitigation control for Indirect + Target Selection(ITS) bug in Intel CPUs. Updated + microcode is also required for a fix in IBPB. + + on: Enable mitigation (default). + off: Disable mitigation. + force: Force the ITS bug and deploy default + mitigation. + vmexit: Only deploy mitigation if CPU is affected by + guest/host isolation part of ITS. + stuff: Deploy RSB-fill mitigation when retpoline is + also deployed. Otherwise, deploy the default + mitigation. + + For details see: + Documentation/admin-guide/hw-vuln/indirect-target-selection.rst + init= [KNL] Format: Run specified binary instead of /sbin/init as init @@ -2304,6 +2318,9 @@ disable Do not enable intel_pstate as the default scaling driver for the supported processors + enable + Enable intel_pstate in-case "disable" was passed + previously in the kernel boot parameters active Use intel_pstate driver to bypass the scaling governors layer of cpufreq and provides it own @@ -3696,6 +3713,7 @@ expose users to several CPU vulnerabilities. Equivalent to: if nokaslr then kpti=0 [ARM64] gather_data_sampling=off [X86] + indirect_target_selection=off [X86] kvm.nx_huge_pages=off [X86] l1tf=off [X86] mds=off [X86] @@ -4674,6 +4692,15 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + pcie_acs_override = + [PCIE] Override missing PCIe ACS support for: + downstream + All downstream ports - full ACS capabilities + multfunction + All multifunction devices - multifunction ACS subset + id:nnnn:nnnn + Specfic device - full ACS capabilities + Specified as vid:did (vendor/device ID) in hex noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 8290177b4f7589..2a94faa5a4cae3 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -25,6 +25,9 @@ files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: - admin_reserve_kbytes +- anon_min_ratio +- clean_low_ratio +- clean_min_ratio - compact_memory - compaction_proactiveness - compact_unevictable_allowed @@ -109,6 +112,67 @@ On x86_64 this is about 128MB. Changing this takes effect whenever an application requests memory. +anon_min_ratio +============== + +This knob provides *hard* protection of anonymous pages. The anonymous pages +on the current node won't be reclaimed under any conditions when their amount +is below vm.anon_min_ratio. + +This knob may be used to prevent excessive swap thrashing when anonymous +memory is low (for example, when memory is going to be overfilled by +compressed data of zram module). + +Setting this value too high (close to 100) can result in inability to +swap and can lead to early OOM under memory pressure. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 1. + + +clean_low_ratio +================ + +This knob provides *best-effort* protection of clean file pages. The file pages +on the current node won't be reclaimed under memory pressure when the amount of +clean file pages is below vm.clean_low_ratio *unless* we threaten to OOM. + +Protection of clean file pages using this knob may be used when swapping is +still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory pressure. + +Setting it to a high value may result in a early eviction of anonymous pages +into the swap space by attempting to hold the protected amount of clean file +pages in memory. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 15. + + +clean_min_ratio +================ + +This knob provides *hard* protection of clean file pages. The file pages on the +current node won't be reclaimed under memory pressure when the amount of clean +file pages is below vm.clean_min_ratio. + +Hard protection of clean file pages using this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free swap space; + - improve performance in disk cache-bound tasks under memory pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + +Setting it to a high value may result in a early out-of-memory condition due to +the inability to reclaim the protected amount of clean file pages when other +types of pages cannot be reclaimed. + +The unit of measurement is the percentage of the total memory of the node. + +The default value is 4. + + compact_memory ============== @@ -973,6 +1037,14 @@ be 133 (x + 2x = 200, 2x = 133.33). At 0, the kernel will not initiate swap until the amount of free and file-backed pages is less than the high watermark in a zone. +This knob has no effect if the amount of clean file pages on the current +node is below vm.clean_low_ratio or vm.clean_min_ratio. In this case, +only anonymous pages can be reclaimed. + +If the number of anonymous pages on the current node is below +vm.anon_min_ratio, then only file pages can be reclaimed with +any vm.swappiness value. + unprivileged_userfaultfd ======================== diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index b67772cf36d6dc..5becb441c3cba0 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -124,6 +124,14 @@ When mounting an XFS filesystem, the following options are accepted. controls the size of each buffer and so is also relevant to this case. + lifetime (default) or nolifetime + Enable data placement based on write life time hints provided + by the user. This turns on co-allocation of data of similar + life times when statistically favorable to reduce garbage + collection cost. + + These options are only available for zoned rt file systems. + logbsize=value Set the size of each in-memory log buffer. The size may be specified in bytes, or in kilobytes with a "k" suffix. @@ -143,6 +151,14 @@ When mounting an XFS filesystem, the following options are accepted. optional, and the log section can be separate from the data section or contained within it. + max_open_zones=value + Specify the max number of zones to keep open for writing on a + zoned rt device. Many open zones aids file data separation + but may impact performance on HDDs. + + If ``max_open_zones`` is not specified, the value is determined + by the capabilities and the size of the zoned rt device. + noalign Data allocations will not be aligned at stripe unit boundaries. This is only relevant to filesystems created @@ -542,3 +558,24 @@ The interesting knobs for XFS workqueues are as follows: nice Relative priority of scheduling the threads. These are the same nice levels that can be applied to userspace processes. ============ =========== + +Zoned Filesystems +================= + +For zoned file systems, the following attributes are exposed in: + + /sys/fs/xfs//zoned/ + + max_open_zones (Min: 1 Default: Varies Max: UINTMAX) + This read-only attribute exposes the maximum number of open zones + available for data placement. The value is determined at mount time and + is limited by the capabilities of the backing zoned device, file system + size and the max_open_zones mount option. + + zonegc_low_space (Min: 0 Default: 0 Max: 100) + Define a percentage for how much of the unused space that GC should keep + available for writing. A high value will reclaim more of the space + occupied by unused blocks, creating a larger buffer against write + bursts at the cost of increased write amplification. Regardless + of this value, garbage collection will always aim to free a minimum + amount of blocks to keep max_open_zones open for data placement purposes. diff --git a/Documentation/arch/openrisc/openrisc_port.rst b/Documentation/arch/openrisc/openrisc_port.rst index 1565b9546e38d7..a8f307a3b499cf 100644 --- a/Documentation/arch/openrisc/openrisc_port.rst +++ b/Documentation/arch/openrisc/openrisc_port.rst @@ -7,10 +7,10 @@ target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k). For information about OpenRISC processors and ongoing development: - ======= ============================= + ======= ============================== website https://openrisc.io - email openrisc@lists.librecores.org - ======= ============================= + email linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -27,11 +27,11 @@ Toolchain binaries can be obtained from openrisc.io or our github releases page. Instructions for building the different toolchains can be found on openrisc.io or Stafford's toolchain build and release scripts. - ========== ================================================= - binaries https://github.com/openrisc/or1k-gcc/releases + ========== ========================================================== + binaries https://github.com/stffrdhrn/or1k-toolchain-build/releases toolchains https://openrisc.io/software building https://github.com/stffrdhrn/or1k-toolchain-build - ========== ================================================= + ========== ========================================================== 2) Building diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 53607d962653b6..f60bf599175597 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -51,7 +51,7 @@ The following keys are defined: * :c:macro:`RISCV_HWPROBE_KEY_MARCHID`: Contains the value of ``marchid``, as defined by the RISC-V privileged architecture specification. -* :c:macro:`RISCV_HWPROBE_KEY_MIMPLID`: Contains the value of ``mimplid``, as +* :c:macro:`RISCV_HWPROBE_KEY_MIMPID`: Contains the value of ``mimpid``, as defined by the RISC-V privileged architecture specification. * :c:macro:`RISCV_HWPROBE_KEY_BASE_BEHAVIOR`: A bitmask containing the base diff --git a/Documentation/arch/x86/cpuinfo.rst b/Documentation/arch/x86/cpuinfo.rst index 6ef426a52cdc97..f80e2a558d2a60 100644 --- a/Documentation/arch/x86/cpuinfo.rst +++ b/Documentation/arch/x86/cpuinfo.rst @@ -79,8 +79,9 @@ feature flags. How are feature flags created? ============================== -a: Feature flags can be derived from the contents of CPUID leaves. ------------------------------------------------------------------- +Feature flags can be derived from the contents of CPUID leaves +-------------------------------------------------------------- + These feature definitions are organized mirroring the layout of CPUID leaves and grouped in words with offsets as mapped in enum cpuid_leafs in cpufeatures.h (see arch/x86/include/asm/cpufeatures.h for details). @@ -89,8 +90,9 @@ cpufeatures.h, and if it is detected at run time, the flags will be displayed accordingly in /proc/cpuinfo. For example, the flag "avx2" comes from X86_FEATURE_AVX2 in cpufeatures.h. -b: Flags can be from scattered CPUID-based features. ----------------------------------------------------- +Flags can be from scattered CPUID-based features +------------------------------------------------ + Hardware features enumerated in sparsely populated CPUID leaves get software-defined values. Still, CPUID needs to be queried to determine if a given feature is present. This is done in init_scattered_cpuid_features(). @@ -104,8 +106,9 @@ has only one feature and would waste 31 bits of space in the x86_capability[] array. Since there is a struct cpuinfo_x86 for each possible CPU, the wasted memory is not trivial. -c: Flags can be created synthetically under certain conditions for hardware features. -------------------------------------------------------------------------------------- +Flags can be created synthetically under certain conditions for hardware features +--------------------------------------------------------------------------------- + Examples of conditions include whether certain features are present in MSR_IA32_CORE_CAPS or specific CPU models are identified. If the needed conditions are met, the features are enabled by the set_cpu_cap or @@ -114,8 +117,8 @@ the feature X86_FEATURE_SPLIT_LOCK_DETECT will be enabled and "split_lock_detect" will be displayed. The flag "ring3mwait" will be displayed only when running on INTEL_XEON_PHI_[KNL|KNM] processors. -d: Flags can represent purely software features. ------------------------------------------------- +Flags can represent purely software features +-------------------------------------------- These flags do not represent hardware features. Instead, they represent a software feature implemented in the kernel. For example, Kernel Page Table Isolation is purely software feature and its feature flag X86_FEATURE_PTI is @@ -130,14 +133,18 @@ x86_cap/bug_flags[] arrays in kernel/cpu/capflags.c. The names in the resulting x86_cap/bug_flags[] are used to populate /proc/cpuinfo. The naming of flags in the x86_cap/bug_flags[] are as follows: -a: The name of the flag is from the string in X86_FEATURE_ by default. ----------------------------------------------------------------------------- -By default, the flag in /proc/cpuinfo is extracted from the respective -X86_FEATURE_ in cpufeatures.h. For example, the flag "avx2" is from -X86_FEATURE_AVX2. +Flags do not appear by default in /proc/cpuinfo +----------------------------------------------- + +Feature flags are omitted by default from /proc/cpuinfo as it does not make +sense for the feature to be exposed to userspace in most cases. For example, +X86_FEATURE_ALWAYS is defined in cpufeatures.h but that flag is an internal +kernel feature used in the alternative runtime patching functionality. So the +flag does not appear in /proc/cpuinfo. + +Specify a flag name if absolutely needed +---------------------------------------- -b: The naming can be overridden. --------------------------------- If the comment on the line for the #define X86_FEATURE_* starts with a double-quote character (""), the string inside the double-quote characters will be the name of the flags. For example, the flag "sse4_1" comes from @@ -148,36 +155,31 @@ needed. For instance, /proc/cpuinfo is a userspace interface and must remain constant. If, for some reason, the naming of X86_FEATURE_ changes, one shall override the new naming with the name already used in /proc/cpuinfo. -c: The naming override can be "", which means it will not appear in /proc/cpuinfo. ----------------------------------------------------------------------------------- -The feature shall be omitted from /proc/cpuinfo if it does not make sense for -the feature to be exposed to userspace. For example, X86_FEATURE_ALWAYS is -defined in cpufeatures.h but that flag is an internal kernel feature used -in the alternative runtime patching functionality. So, its name is overridden -with "". Its flag will not appear in /proc/cpuinfo. - Flags are missing when one or more of these happen ================================================== -a: The hardware does not enumerate support for it. --------------------------------------------------- +The hardware does not enumerate support for it +---------------------------------------------- + For example, when a new kernel is running on old hardware or the feature is not enabled by boot firmware. Even if the hardware is new, there might be a problem enabling the feature at run time, the flag will not be displayed. -b: The kernel does not know about the flag. -------------------------------------------- +The kernel does not know about the flag +--------------------------------------- + For example, when an old kernel is running on new hardware. -c: The kernel disabled support for it at compile-time. ------------------------------------------------------- +The kernel disabled support for it at compile-time +-------------------------------------------------- + For example, if 5-level-paging is not enabled when building (i.e., CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_. Even though the feature will still be detected via CPUID, the kernel disables it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57). -d: The feature is disabled at boot-time. ----------------------------------------- +The feature is disabled at boot-time +------------------------------------ A feature can be disabled either using a command-line parameter or because it failed to be enabled. The command-line parameter clearcpuid= can be used to disable features using the feature number as defined in @@ -190,8 +192,9 @@ disable specific features. The list of parameters includes, but is not limited to, nofsgsbase, nosgx, noxsave, etc. 5-level paging can also be disabled using "no5lvl". -e: The feature was known to be non-functional. ----------------------------------------------- +The feature was known to be non-functional +------------------------------------------ + The feature was known to be non-functional because a dependency was missing at runtime. For example, AVX flags will not show up if XSAVE feature is disabled since they depend on XSAVE feature. Another example would be broken diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index de27e1620821c4..0acb4c9b8d90f3 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -382,6 +382,14 @@ In case of new BPF instructions, once the changes have been accepted into the Linux kernel, please implement support into LLVM's BPF back end. See LLVM_ section below for further information. +Q: What "BPF_INTERNAL" symbol namespace is for? +----------------------------------------------- +A: Symbols exported as BPF_INTERNAL can only be used by BPF infrastructure +like preload kernel modules with light skeleton. Most symbols outside +of BPF_INTERNAL are not expected to be used by code outside of BPF either. +Symbols may lack the designation because they predate the namespaces, +or due to an oversight. + Stable submission ================= diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml index 6ad78429dc7467..c92341888a2880 100644 --- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml +++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Ceva AHCI SATA Controller maintainers: - - Mubin Sayyed - Radhey Shyam Pandey description: | diff --git a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml index 350fb8f400f022..5952e6448ed47e 100644 --- a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml @@ -111,11 +111,27 @@ properties: unevaluatedProperties: false port@1: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: DSI output port node to the panel or the next bridge in the chain + properties: + endpoint: + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + + properties: + data-lanes: + description: array of physical DSI data lane indexes. + minItems: 1 + items: + - const: 1 + - const: 2 + - const: 3 + - const: 4 + required: - port@0 - port@1 diff --git a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml index bb93baa888794b..e13e9d6dd148ae 100644 --- a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml +++ b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml @@ -12,7 +12,6 @@ description: PS_MODE). Every pin can be configured as input/output. maintainers: - - Mubin Sayyed - Radhey Shyam Pandey properties: diff --git a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml index 517a4ac1bea3df..e365413732e7b9 100644 --- a/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml +++ b/Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Mediatek's Keypad Controller maintainers: - - Mattijs Korpershoek + - Mattijs Korpershoek allOf: - $ref: /schemas/input/matrix-keymap.yaml# diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml b/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml index 6076ddf56bb5af..c49688be105819 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml @@ -19,6 +19,7 @@ properties: - fsl,imx8mp-irqsteer - fsl,imx8qm-irqsteer - fsl,imx8qxp-irqsteer + - fsl,imx94-irqsteer - const: fsl,imx-irqsteer reg: diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml index e0ec53bc10c6f9..1525a50ded476f 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/can/microchip,mcp2510.yaml# +$id: http://devicetree.org/schemas/net/can/microchip,mcp2510.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: Microchip MCP251X stand-alone CAN controller diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 45819b2358002b..a2d4c626f659a5 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -74,19 +74,17 @@ properties: - rev-rmii - moca - # RX and TX delays are added by the MAC when required + # RX and TX delays are provided by the PCB. See below - rgmii - # RGMII with internal RX and TX delays provided by the PHY, - # the MAC should not add the RX or TX delays in this case + # RX and TX delays are not provided by the PCB. This is the most + # frequent case. See below - rgmii-id - # RGMII with internal RX delay provided by the PHY, the MAC - # should not add an RX delay in this case + # TX delay is provided by the PCB. See below - rgmii-rxid - # RGMII with internal TX delay provided by the PHY, the MAC - # should not add an TX delay in this case + # RX delay is provided by the PCB. See below - rgmii-txid - rtbi - smii @@ -286,4 +284,89 @@ allOf: additionalProperties: true +# Informative +# =========== +# +# 'phy-modes' & 'phy-connection-type' properties 'rgmii', 'rgmii-id', +# 'rgmii-rxid', and 'rgmii-txid' are frequently used wrongly by +# developers. This informative section clarifies their usage. +# +# The RGMII specification requires a 2ns delay between the data and +# clock signals on the RGMII bus. How this delay is implemented is not +# specified. +# +# One option is to make the clock traces on the PCB longer than the +# data traces. A sufficiently difference in length can provide the 2ns +# delay. If both the RX and TX delays are implemented in this manner, +# 'rgmii' should be used, so indicating the PCB adds the delays. +# +# If the PCB does not add these delays via extra long traces, +# 'rgmii-id' should be used. Here, 'id' refers to 'internal delay', +# where either the MAC or PHY adds the delay. +# +# If only one of the two delays are implemented via extra long clock +# lines, either 'rgmii-rxid' or 'rgmii-txid' should be used, +# indicating the MAC or PHY should implement one of the delays +# internally, while the PCB implements the other delay. +# +# Device Tree describes hardware, and in this case, it describes the +# PCB between the MAC and the PHY, if the PCB implements delays or +# not. +# +# In practice, very few PCBs make use of extra long clock lines. Hence +# any RGMII phy mode other than 'rgmii-id' is probably wrong, and is +# unlikely to be accepted during review without details provided in +# the commit description and comments in the .dts file. +# +# When the PCB does not implement the delays, the MAC or PHY must. As +# such, this is software configuration, and so not described in Device +# Tree. +# +# The following describes how Linux implements the configuration of +# the MAC and PHY to add these delays when the PCB does not. As stated +# above, developers often get this wrong, and the aim of this section +# is reduce the frequency of these errors by Linux developers. Other +# users of the Device Tree may implement it differently, and still be +# consistent with both the normative and informative description +# above. +# +# By default in Linux, when using phylib/phylink, the MAC is expected +# to read the 'phy-mode' from Device Tree, not implement any delays, +# and pass the value to the PHY. The PHY will then implement delays as +# specified by the 'phy-mode'. The PHY should always be reconfigured +# to implement the needed delays, replacing any setting performed by +# strapping or the bootloader, etc. +# +# Experience to date is that all PHYs which implement RGMII also +# implement the ability to add or not add the needed delays. Hence +# this default is expected to work in all cases. Ignoring this default +# is likely to be questioned by Reviews, and require a strong argument +# to be accepted. +# +# There are a small number of cases where the MAC has hard coded +# delays which cannot be disabled. The 'phy-mode' only describes the +# PCB. The inability to disable the delays in the MAC does not change +# the meaning of 'phy-mode'. It does however mean that a 'phy-mode' of +# 'rgmii' is now invalid, it cannot be supported, since both the PCB +# and the MAC and PHY adding delays cannot result in a functional +# link. Thus the MAC should report a fatal error for any modes which +# cannot be supported. When the MAC implements the delay, it must +# ensure that the PHY does not also implement the same delay. So it +# must modify the phy-mode it passes to the PHY, removing the delay it +# has added. Failure to remove the delay will result in a +# non-functioning link. +# +# Sometimes there is a need to fine tune the delays. Often the MAC or +# PHY can perform this fine tuning. In the MAC node, the Device Tree +# properties 'rx-internal-delay-ps' and 'tx-internal-delay-ps' should +# be used to indicate fine tuning performed by the MAC. The values +# expected here are small. A value of 2000ps, i.e 2ns, and a phy-mode +# of 'rgmii' will not be accepted by Reviewers. +# +# If the PHY is to perform fine tuning, the properties +# 'rx-internal-delay-ps' and 'tx-internal-delay-ps' in the PHY node +# should be used. When the PHY is implementing delays, e.g. 'rgmii-id' +# these properties should have a value near to 2000ps. If the PCB is +# implementing delays, e.g. 'rgmii', a small value can be used to fine +# tune the delay added by the PCB. ... diff --git a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml index 8b3826243dddfc..38e3ad50ff4fb6 100644 --- a/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml +++ b/Documentation/devicetree/bindings/nvmem/layouts/fixed-cell.yaml @@ -27,7 +27,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array items: - minimum: 0 - maximum: 7 + maximum: 31 description: Offset in bit within the address range specified by reg. - minimum: 1 diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index 39c209249c9c0b..3f6dc6a3a9f1ad 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -19,6 +19,7 @@ properties: - enum: - qcom,apq8064-qfprom - qcom,apq8084-qfprom + - qcom,ipq5018-qfprom - qcom,ipq5332-qfprom - qcom,ipq5424-qfprom - qcom,ipq6018-qfprom @@ -28,6 +29,8 @@ properties: - qcom,msm8226-qfprom - qcom,msm8916-qfprom - qcom,msm8917-qfprom + - qcom,msm8937-qfprom + - qcom,msm8960-qfprom - qcom,msm8974-qfprom - qcom,msm8976-qfprom - qcom,msm8996-qfprom @@ -51,6 +54,7 @@ properties: - qcom,sm8450-qfprom - qcom,sm8550-qfprom - qcom,sm8650-qfprom + - qcom,x1e80100-qfprom - const: qcom,qfprom reg: diff --git a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml index a44d44b328091d..dc89020b095067 100644 --- a/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml +++ b/Documentation/devicetree/bindings/nvmem/rockchip,otp.yaml @@ -14,6 +14,7 @@ properties: enum: - rockchip,px30-otp - rockchip,rk3308-otp + - rockchip,rk3576-otp - rockchip,rk3588-otp reg: @@ -62,12 +63,34 @@ allOf: properties: clocks: maxItems: 3 + clock-names: + maxItems: 3 resets: maxItems: 1 reset-names: items: - const: phy + - if: + properties: + compatible: + contains: + enum: + - rockchip,rk3576-otp + then: + properties: + clocks: + maxItems: 3 + clock-names: + maxItems: 3 + resets: + minItems: 2 + maxItems: 2 + reset-names: + items: + - const: otp + - const: apb + - if: properties: compatible: @@ -78,6 +101,8 @@ allOf: properties: clocks: minItems: 4 + clock-names: + minItems: 4 resets: minItems: 3 reset-names: diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index daee0c0fc91539..c468207eb95168 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -63,8 +63,7 @@ properties: fsl,phy-tx-vboost-level-microvolt: description: Adjust the boosted transmit launch pk-pk differential amplitude - minimum: 880 - maximum: 1120 + enum: [844, 1008, 1156] fsl,phy-comp-dis-tune-percent: description: diff --git a/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml b/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml index 45e112d0efb466..5575c58357d6e7 100644 --- a/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml +++ b/Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml @@ -30,11 +30,19 @@ properties: const: 3 clocks: - maxItems: 1 + minItems: 1 + maxItems: 2 + + clock-names: + minItems: 1 + items: + - const: axi + - const: ext required: - reg - clocks + - clock-names unevaluatedProperties: false @@ -43,6 +51,7 @@ examples: pwm@44b00000 { compatible = "adi,axi-pwmgen-2.00.a"; reg = <0x44b00000 0x1000>; - clocks = <&spi_clk>; + clocks = <&fpga_clk>, <&spi_clk>; + clock-names = "axi", "ext"; #pwm-cells = <3>; }; diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml index a4dfa09344dd72..f85ee5d20ccbb3 100644 --- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml @@ -9,15 +9,6 @@ title: Renesas R-Car Timer Pulse Unit PWM Controller maintainers: - Laurent Pinchart -select: - properties: - compatible: - contains: - const: renesas,tpu - required: - - compatible - - '#pwm-cells' - properties: compatible: items: diff --git a/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml b/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml index 6327bb2f6ee080..698266c09e2535 100644 --- a/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/mediatek,mt6357-regulator.yaml @@ -33,7 +33,7 @@ patternProperties: "^ldo-v(camio18|aud28|aux18|io18|io28|rf12|rf18|cn18|cn28|fe28)$": type: object - $ref: fixed-regulator.yaml# + $ref: regulator.yaml# unevaluatedProperties: false description: Properties for single fixed LDO regulator. @@ -112,7 +112,6 @@ examples: regulator-enable-ramp-delay = <220>; }; mt6357_vfe28_reg: ldo-vfe28 { - compatible = "regulator-fixed"; regulator-name = "vfe28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -125,14 +124,12 @@ examples: regulator-enable-ramp-delay = <110>; }; mt6357_vrf18_reg: ldo-vrf18 { - compatible = "regulator-fixed"; regulator-name = "vrf18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-enable-ramp-delay = <110>; }; mt6357_vrf12_reg: ldo-vrf12 { - compatible = "regulator-fixed"; regulator-name = "vrf12"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -157,14 +154,12 @@ examples: regulator-enable-ramp-delay = <264>; }; mt6357_vcn28_reg: ldo-vcn28 { - compatible = "regulator-fixed"; regulator-name = "vcn28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vcn18_reg: ldo-vcn18 { - compatible = "regulator-fixed"; regulator-name = "vcn18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -183,7 +178,6 @@ examples: regulator-enable-ramp-delay = <264>; }; mt6357_vcamio_reg: ldo-vcamio18 { - compatible = "regulator-fixed"; regulator-name = "vcamio"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -212,28 +206,24 @@ examples: regulator-always-on; }; mt6357_vaux18_reg: ldo-vaux18 { - compatible = "regulator-fixed"; regulator-name = "vaux18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vaud28_reg: ldo-vaud28 { - compatible = "regulator-fixed"; regulator-name = "vaud28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vio28_reg: ldo-vio28 { - compatible = "regulator-fixed"; regulator-name = "vio28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-enable-ramp-delay = <264>; }; mt6357_vio18_reg: ldo-vio18 { - compatible = "regulator-fixed"; regulator-name = "vio18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml index 56ff6386534ddf..5dcc2a32c08004 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml @@ -16,6 +16,9 @@ description: properties: compatible: enum: + - qcom,sc8180x-adsp-pas + - qcom,sc8180x-cdsp-pas + - qcom,sc8180x-slpi-pas - qcom,sm8150-adsp-pas - qcom,sm8150-cdsp-pas - qcom,sm8150-mpss-pas diff --git a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml index 1f1b42dde94d50..1db85fc9966f13 100644 --- a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml +++ b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Zynq UltraScale+ MPSoC and Versal reset maintainers: - - Mubin Sayyed - Radhey Shyam Pandey description: | diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml index 31295be910130c..234089b5954ddb 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Layerscape Reset Registers Module maintainers: - - Frank Li + - Frank Li description: Reset Module includes chip reset, service processor control and Reset Control diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml index de0b4ae740ff23..a975bce599750e 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,qman-fqd.yaml @@ -50,7 +50,7 @@ required: - compatible allOf: - - $ref: reserved-memory.yaml + - $ref: /schemas/reserved-memory/reserved-memory.yaml unevaluatedProperties: false @@ -61,7 +61,7 @@ examples: #size-cells = <2>; qman-fqd { - compatible = "shared-dma-pool"; + compatible = "fsl,qman-fqd"; size = <0 0x400000>; alignment = <0 0x400000>; no-map; diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index bccd00a1ddd0ad..53d00ca643b318 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -56,19 +56,18 @@ properties: enum: - snps,dw-apb-ssi - snps,dwc-ssi-1.01a - - description: Microsemi Ocelot/Jaguar2 SoC SPI Controller - items: - - enum: - - mscc,ocelot-spi - - mscc,jaguar2-spi - - const: snps,dw-apb-ssi - description: Microchip Sparx5 SoC SPI Controller const: microchip,sparx5-spi - description: Amazon Alpine SPI Controller const: amazon,alpine-dw-apb-ssi - - description: Renesas RZ/N1 SPI Controller + - description: Vendor controllers which use snps,dw-apb-ssi as fallback items: - - const: renesas,rzn1-spi + - enum: + - mscc,ocelot-spi + - mscc,jaguar2-spi + - renesas,rzn1-spi + - sophgo,sg2042-spi + - thead,th1520-spi - const: snps,dw-apb-ssi - description: Intel Keem Bay SPI Controller const: intel,keembay-ssi @@ -88,10 +87,6 @@ properties: - renesas,r9a06g032-spi # RZ/N1D - renesas,r9a06g033-spi # RZ/N1S - const: renesas,rzn1-spi # RZ/N1 - - description: T-HEAD TH1520 SoC SPI Controller - items: - - const: thead,th1520-spi - - const: snps,dw-apb-ssi reg: minItems: 1 diff --git a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml index 891cca00952815..6b80b060672e54 100644 --- a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml +++ b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml @@ -18,9 +18,14 @@ description: | properties: compatible: - enum: - - nxp,imx95-sysctr-timer - - nxp,sysctr-timer + oneOf: + - enum: + - nxp,imx95-sysctr-timer + - nxp,sysctr-timer + - items: + - enum: + - nxp,imx94-sysctr-timer + - const: nxp,imx95-sysctr-timer reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/timer/renesas,tpu.yaml b/Documentation/devicetree/bindings/timer/renesas,tpu.yaml deleted file mode 100644 index 7a473b30277515..00000000000000 --- a/Documentation/devicetree/bindings/timer/renesas,tpu.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/timer/renesas,tpu.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Renesas H8/300 Timer Pulse Unit - -maintainers: - - Yoshinori Sato - -description: - The TPU is a 16bit timer/counter with configurable clock inputs and - programmable compare match. - This implementation supports only cascade mode. - -select: - properties: - compatible: - contains: - const: renesas,tpu - '#pwm-cells': false - required: - - compatible - -properties: - compatible: - const: renesas,tpu - - reg: - items: - - description: First channel - - description: Second channel - - clocks: - maxItems: 1 - - clock-names: - const: fck - -required: - - compatible - - reg - - clocks - - clock-names - -additionalProperties: false - -examples: - - | - tpu: tpu@ffffe0 { - compatible = "renesas,tpu"; - reg = <0xffffe0 16>, <0xfffff0 12>; - clocks = <&pclk>; - clock-names = "fck"; - }; diff --git a/Documentation/devicetree/bindings/usb/cypress,hx3.yaml b/Documentation/devicetree/bindings/usb/cypress,hx3.yaml index 1033b7a4b8f953..d6eac1213228d2 100644 --- a/Documentation/devicetree/bindings/usb/cypress,hx3.yaml +++ b/Documentation/devicetree/bindings/usb/cypress,hx3.yaml @@ -14,9 +14,22 @@ allOf: properties: compatible: - enum: - - usb4b4,6504 - - usb4b4,6506 + oneOf: + - enum: + - usb4b4,6504 + - usb4b4,6506 + - items: + - enum: + - usb4b4,6500 + - usb4b4,6508 + - const: usb4b4,6504 + - items: + - enum: + - usb4b4,6502 + - usb4b4,6503 + - usb4b4,6507 + - usb4b4,650a + - const: usb4b4,6506 reg: true diff --git a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml index b5843f4d17d879..379dacacb52681 100644 --- a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml +++ b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx SuperSpeed DWC3 USB SoC controller maintainers: - - Mubin Sayyed - Radhey Shyam Pandey properties: diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml index e2a72deae77601..c68c04da339982 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml @@ -17,7 +17,6 @@ description: maintainers: - Michal Simek - - Mubin Sayyed - Radhey Shyam Pandey properties: diff --git a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml index a7f75fe366652b..f295aa9d9ee79f 100644 --- a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml +++ b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx udc controller maintainers: - - Mubin Sayyed - Radhey Shyam Pandey properties: diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 86f6a19b28ae21..190ab40cf23afc 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -864,6 +864,8 @@ patternProperties: description: Linux-specific binding "^linx,.*": description: Linx Technologies + "^liontron,.*": + description: Shenzhen Liontron Technology Co., Ltd "^liteon,.*": description: LITE-ON Technology Corp. "^litex,.*": diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index a1eb4a11a1d0f9..1b240661bfa306 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst @@ -328,9 +328,13 @@ The ext4 superblock is laid out as follows in - s_checksum_type - Metadata checksum algorithm type. The only valid value is 1 (crc32c). * - 0x176 - - __le16 - - s_reserved_pad - - + - \_\_u8 + - s\_encryption\_level + - Versioning level for encryption. + * - 0x177 + - \_\_u8 + - s\_reserved\_pad + - Padding to next 32bits. * - 0x178 - __le64 - s_kbytes_written @@ -466,9 +470,13 @@ The ext4 superblock is laid out as follows in - s_last_error_time_hi - Upper 8 bits of the s_last_error_time field. * - 0x27A - - __u8 - - s_pad[2] - - Zero padding. + - \_\_u8 + - s\_first\_error\_errcode + - + * - 0x27B + - \_\_u8 + - s\_last\_error\_errcode + - * - 0x27C - __le16 - s_encoding diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst index 8d8b53e96bcfee..ccb4b153e6f2dd 100644 --- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst +++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst @@ -12,11 +12,14 @@ ACPI in general allows referring to device objects in the tree only. Hierarchical data extension nodes may not be referred to directly, hence this document defines a scheme to implement such references. -A reference consist of the device object name followed by one or more -hierarchical data extension [dsd-guide] keys. Specifically, the hierarchical -data extension node which is referred to by the key shall lie directly under -the parent object i.e. either the device object or another hierarchical data -extension node. +A reference to a _DSD hierarchical data node is a string consisting of a +device object reference followed by a dot (".") and a relative path to a data +node object. Do not use non-string references as this will produce a copy of +the hierarchical data node, not a reference! + +The hierarchical data extension node which is referred to shall be located +directly under its parent object i.e. either the device object or another +hierarchical data extension node [dsd-guide]. The keys in the hierarchical data nodes shall consist of the name of the node, "@" character and the number of the node in hexadecimal notation (without pre- @@ -33,11 +36,9 @@ extension key. Example ======= -In the ASL snippet below, the "reference" _DSD property contains a -device object reference to DEV0 and under that device object, a -hierarchical data extension key "node@1" referring to the NOD1 object -and lastly, a hierarchical data extension key "anothernode" referring to -the ANOD object which is also the final target node of the reference. +In the ASL snippet below, the "reference" _DSD property contains a string +reference to a hierarchical data extension node ANOD under DEV0 under the parent +of DEV1. ANOD is also the final target node of the reference. :: Device (DEV0) @@ -76,10 +77,7 @@ the ANOD object which is also the final target node of the reference. Name (_DSD, Package () { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { - Package () { - "reference", Package () { - ^DEV0, "node@1", "anothernode" - } + Package () { "reference", "^DEV0.ANOD" } }, } }) diff --git a/Documentation/firmware-guide/acpi/dsd/graph.rst b/Documentation/firmware-guide/acpi/dsd/graph.rst index b9dbfc73ed25b6..d6ae5ffa748ca4 100644 --- a/Documentation/firmware-guide/acpi/dsd/graph.rst +++ b/Documentation/firmware-guide/acpi/dsd/graph.rst @@ -66,12 +66,9 @@ of that port shall be zero. Similarly, if a port may only have a single endpoint, the number of that endpoint shall be zero. The endpoint reference uses property extension with "remote-endpoint" property -name followed by a reference in the same package. Such references consist of -the remote device reference, the first package entry of the port data extension -reference under the device and finally the first package entry of the endpoint -data extension reference under the port. Individual references thus appear as:: +name followed by a string reference in the same package. [data-node-ref]:: - Package() { device, "port@X", "endpoint@Y" } + "device.datanode" In the above example, "X" is the number of the port and "Y" is the number of the endpoint. @@ -109,7 +106,7 @@ A simple example of this is show below:: ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { Package () { "reg", 0 }, - Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } }, + Package () { "remote-endpoint", "\\_SB.PCI0.ISP.EP40" }, } }) } @@ -141,7 +138,7 @@ A simple example of this is show below:: ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { Package () { "reg", 0 }, - Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } }, + Package () { "remote-endpoint", "\\_SB.PCI0.I2C2.CAM0.EP00" }, } }) } diff --git a/Documentation/firmware-guide/acpi/dsd/leds.rst b/Documentation/firmware-guide/acpi/dsd/leds.rst index 93db592c93c712..a97cd07d49be38 100644 --- a/Documentation/firmware-guide/acpi/dsd/leds.rst +++ b/Documentation/firmware-guide/acpi/dsd/leds.rst @@ -15,11 +15,6 @@ Referring to LEDs in Device tree is documented in [video-interfaces], in "flash-leds" property documentation. In short, LEDs are directly referred to by using phandles. -While Device tree allows referring to any node in the tree [devicetree], in -ACPI references are limited to device nodes only [acpi]. For this reason using -the same mechanism on ACPI is not possible. A mechanism to refer to non-device -ACPI nodes is documented in [data-node-ref]. - ACPI allows (as does DT) using integer arguments after the reference. A combination of the LED driver device reference and an integer argument, referring to the "reg" property of the relevant LED, is used to identify @@ -74,7 +69,7 @@ omitted. :: Package () { Package () { "flash-leds", - Package () { ^LED, "led@0", ^LED, "led@1" }, + Package () { "^LED.LED0", "^LED.LED1" }, } } }) diff --git a/Documentation/gpu/xe/index.rst b/Documentation/gpu/xe/index.rst index 92cfb25e64d327..b53a0cc7f66a36 100644 --- a/Documentation/gpu/xe/index.rst +++ b/Documentation/gpu/xe/index.rst @@ -16,6 +16,7 @@ DG2, etc is provided to prototype the driver. xe_migrate xe_cs xe_pm + xe_gt_freq xe_pcode xe_gt_mcr xe_wa diff --git a/Documentation/gpu/xe/xe_gt_freq.rst b/Documentation/gpu/xe/xe_gt_freq.rst new file mode 100644 index 00000000000000..c0811200e32755 --- /dev/null +++ b/Documentation/gpu/xe/xe_gt_freq.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========================== +Xe GT Frequency Management +========================== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c + :doc: Xe GT Frequency Management + +Internal API +============ + +.. kernel-doc:: drivers/gpu/drm/xe/xe_gt_freq.c + :internal: diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index a7762486c93fcd..f2dcc39044e66d 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -46,6 +46,21 @@ The kernel embeds the building user and host names in `KBUILD_BUILD_USER and KBUILD_BUILD_HOST`_ variables. If you are building from a git commit, you could use its committer address. +Absolute filenames +------------------ + +When the kernel is built out-of-tree, debug information may include +absolute filenames for the source files. This must be overridden by +including the ``-fdebug-prefix-map`` option in the `KCFLAGS`_ variable. + +Depending on the compiler used, the ``__FILE__`` macro may also expand +to an absolute filename in an out-of-tree build. Kbuild automatically +uses the ``-fmacro-prefix-map`` option to prevent this, if it is +supported. + +The Reproducible Builds web site has more information about these +`prefix-map options`_. + Generated files in source packages ---------------------------------- @@ -116,5 +131,7 @@ See ``scripts/setlocalversion`` for details. .. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp .. _KBUILD_BUILD_USER and KBUILD_BUILD_HOST: kbuild.html#kbuild-build-user-kbuild-build-host +.. _KCFLAGS: kbuild.html#kcflags +.. _prefix-map options: https://reproducible-builds.org/docs/build-path/ .. _Reproducible Builds project: https://reproducible-builds.org/ .. _SOURCE_DATE_EPOCH: https://reproducible-builds.org/docs/source-date-epoch/ diff --git a/Documentation/misc-devices/lis3lv02d.rst b/Documentation/misc-devices/lis3lv02d.rst index 959bd2b822cfa9..6b3b7405ebdf6e 100644 --- a/Documentation/misc-devices/lis3lv02d.rst +++ b/Documentation/misc-devices/lis3lv02d.rst @@ -22,10 +22,10 @@ sporting the feature officially called "HP Mobile Data Protection System 3D" or models (full list can be found in drivers/platform/x86/hp_accel.c) will have their axis automatically oriented on standard way (eg: you can directly play neverball). The accelerometer data is readable via -/sys/devices/platform/lis3lv02d. Reported values are scaled +/sys/devices/faux/lis3lv02d. Reported values are scaled to mg values (1/1000th of earth gravity). -Sysfs attributes under /sys/devices/platform/lis3lv02d/: +Sysfs attributes under /sys/devices/faux/lis3lv02d/: position - 3D position that the accelerometer reports. Format: "(x,y,z)" @@ -85,7 +85,7 @@ the accelerometer are converted into a "standard" organisation of the axes If your laptop model is not recognized (cf "dmesg"), you can send an email to the maintainer to add it to the database. When reporting a new laptop, please include the output of "dmidecode" plus the value of -/sys/devices/platform/lis3lv02d/position in these four cases. +/sys/devices/faux/lis3lv02d/position in these four cases. Q&A --- diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 655d8d10fe248d..c650cd3dcb80bc 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -89,8 +89,10 @@ definitions: doc: Group of short_detected states - name: phy-upstream-type - enum-name: + enum-name: phy-upstream + header: linux/ethtool.h type: enum + name-prefix: phy-upstream entries: [ mac, phy ] - name: tcp-data-split diff --git a/Documentation/netlink/specs/ovs_vport.yaml b/Documentation/netlink/specs/ovs_vport.yaml index 86ba9ac2a52103..b538bb99ee9b5f 100644 --- a/Documentation/netlink/specs/ovs_vport.yaml +++ b/Documentation/netlink/specs/ovs_vport.yaml @@ -123,12 +123,12 @@ attribute-sets: operations: name-prefix: ovs-vport-cmd- + fixed-header: ovs-header list: - name: new doc: Create a new OVS vport attribute-set: vport - fixed-header: ovs-header do: request: attributes: @@ -141,7 +141,6 @@ operations: name: del doc: Delete existing OVS vport from a data path attribute-set: vport - fixed-header: ovs-header do: request: attributes: @@ -152,7 +151,6 @@ operations: name: get doc: Get / dump OVS vport configuration and state attribute-set: vport - fixed-header: ovs-header do: &vport-get-op request: attributes: diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 31238455f8e9d2..2ac0e9fda1582d 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1113,11 +1113,10 @@ attribute-sets: - name: prop-list type: nest - nested-attributes: link-attrs + nested-attributes: prop-list-link-attrs - name: alt-ifname type: string - multi-attr: true - name: perm-address type: binary @@ -1163,6 +1162,13 @@ attribute-sets: - name: netns-immutable type: u8 + - + name: prop-list-link-attrs + subset-of: link-attrs + attributes: + - + name: alt-ifname + multi-attr: true - name: af-spec-attrs attributes: @@ -1585,7 +1591,7 @@ attribute-sets: name: nf-call-iptables type: u8 - - name: nf-call-ip6-tables + name: nf-call-ip6tables type: u8 - name: nf-call-arptables @@ -1772,15 +1778,19 @@ attribute-sets: - name: iflags type: u16 + byte-order: big-endian - name: oflags type: u16 + byte-order: big-endian - name: ikey type: u32 + byte-order: big-endian - name: okey type: u32 + byte-order: big-endian - name: local type: binary @@ -1800,10 +1810,11 @@ attribute-sets: type: u8 - name: encap-limit - type: u32 + type: u8 - name: flowinfo type: u32 + byte-order: big-endian - name: flags type: u32 @@ -1816,9 +1827,11 @@ attribute-sets: - name: encap-sport type: u16 + byte-order: big-endian - name: encap-dport type: u16 + byte-order: big-endian - name: collect-metadata type: flag @@ -1840,6 +1853,54 @@ attribute-sets: - name: erspan-hwid type: u16 + - + name: linkinfo-gre6-attrs + subset-of: linkinfo-gre-attrs + attributes: + - + name: link + - + name: iflags + - + name: oflags + - + name: ikey + - + name: okey + - + name: local + display-hint: ipv6 + - + name: remote + display-hint: ipv6 + - + name: ttl + - + name: encap-limit + - + name: flowinfo + - + name: flags + - + name: encap-type + - + name: encap-flags + - + name: encap-sport + - + name: encap-dport + - + name: collect-metadata + - + name: fwmark + - + name: erspan-index + - + name: erspan-ver + - + name: erspan-dir + - + name: erspan-hwid - name: linkinfo-vti-attrs name-prefix: ifla-vti- @@ -1850,9 +1911,11 @@ attribute-sets: - name: ikey type: u32 + byte-order: big-endian - name: okey type: u32 + byte-order: big-endian - name: local type: binary @@ -1902,6 +1965,7 @@ attribute-sets: - name: port type: u16 + byte-order: big-endian - name: collect-metadata type: flag @@ -1921,6 +1985,7 @@ attribute-sets: - name: label type: u32 + byte-order: big-endian - name: ttl-inherit type: u8 @@ -1961,9 +2026,11 @@ attribute-sets: - name: flowinfo type: u32 + byte-order: big-endian - name: flags type: u16 + byte-order: big-endian - name: proto type: u8 @@ -1993,9 +2060,11 @@ attribute-sets: - name: encap-sport type: u16 + byte-order: big-endian - name: encap-dport type: u16 + byte-order: big-endian - name: collect-metadata type: flag @@ -2077,7 +2146,7 @@ attribute-sets: name: id type: u16 - - name: flag + name: flags type: binary struct: ifla-vlan-flags - @@ -2165,7 +2234,7 @@ attribute-sets: type: binary struct: ifla-cacheinfo - - name: icmp6-stats + name: icmp6stats type: binary struct: ifla-icmp6-stats - @@ -2179,9 +2248,10 @@ attribute-sets: type: u32 - name: mctp-attrs + name-prefix: ifla-mctp- attributes: - - name: mctp-net + name: net type: u32 - name: phys-binding @@ -2292,6 +2362,9 @@ sub-messages: - value: gretap attribute-set: linkinfo-gre-attrs + - + value: ip6gre + attribute-set: linkinfo-gre6-attrs - value: geneve attribute-set: linkinfo-geneve-attrs @@ -2453,7 +2526,6 @@ operations: - min-mtu - max-mtu - prop-list - - alt-ifname - perm-address - proto-down-reason - parent-dev-name diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt_neigh.yaml index e670b6dc07be4f..a843caa72259e1 100644 --- a/Documentation/netlink/specs/rt_neigh.yaml +++ b/Documentation/netlink/specs/rt_neigh.yaml @@ -13,25 +13,25 @@ definitions: type: struct members: - - name: family + name: ndm-family type: u8 - - name: pad + name: ndm-pad type: pad len: 3 - - name: ifindex + name: ndm-ifindex type: s32 - - name: state + name: ndm-state type: u16 enum: nud-state - - name: flags + name: ndm-flags type: u8 enum: ntf-flags - - name: type + name: ndm-type type: u8 enum: rtm-type - @@ -189,7 +189,7 @@ attribute-sets: type: binary display-hint: ipv4 - - name: lladr + name: lladdr type: binary display-hint: mac - diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index aacccea5dfe42a..953aa837958b3f 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -2017,7 +2017,8 @@ attribute-sets: attributes: - name: act - type: nest + type: indexed-array + sub-type: nest nested-attributes: tc-act-attrs - name: police @@ -2250,7 +2251,8 @@ attribute-sets: attributes: - name: act - type: nest + type: indexed-array + sub-type: nest nested-attributes: tc-act-attrs - name: police @@ -2745,7 +2747,7 @@ attribute-sets: type: u16 byte-order: big-endian - - name: key-l2-tpv3-sid + name: key-l2tpv3-sid type: u32 byte-order: big-endian - @@ -3504,7 +3506,7 @@ attribute-sets: name: rate64 type: u64 - - name: prate4 + name: prate64 type: u64 - name: burst diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 6c2d8945f59798..eab601ab2db0b7 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -338,10 +338,11 @@ operations directly under the netdev instance lock. Devices drivers are encouraged to rely on the instance lock where possible. For the (mostly software) drivers that need to interact with the core stack, -there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., -``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle -acquiring the instance lock themselves, while the ``netif_xxx`` functions -assume that the driver has already acquired the instance lock. +there are two sets of interfaces: ``dev_xxx``/``netdev_xxx`` and ``netif_xxx`` +(e.g., ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx``/``netdev_xxx`` +functions handle acquiring the instance lock themselves, while the +``netif_xxx`` functions assume that the driver has already acquired +the instance lock. Notifiers and netdev instance lock ================================== @@ -354,6 +355,7 @@ For devices with locked ops, currently only the following notifiers are running under the lock: * ``NETDEV_REGISTER`` * ``NETDEV_UP`` +* ``NETDEV_CHANGE`` The following notifiers are running without the lock: * ``NETDEV_UNREGISTER`` diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index b8fef8101176dc..7aabead906487e 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -811,11 +811,9 @@ Documentation/devicetree/bindings/ptp/timestamper.txt for more details. 3.2.4 Other caveats for MAC drivers ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Stacked PHCs, especially DSA (but not only) - since that doesn't require any -modification to MAC drivers, so it is more difficult to ensure correctness of -all possible code paths - is that they uncover bugs which were impossible to -trigger before the existence of stacked PTP clocks. One example has to do with -this line of code, already presented earlier:: +The use of stacked PHCs may uncover MAC driver bugs which were impossible to +trigger without them. One example has to do with this line of code, already +presented earlier:: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; diff --git a/Documentation/networking/xfrm_device.rst b/Documentation/networking/xfrm_device.rst index 7f24c09f269431..122204da0fff69 100644 --- a/Documentation/networking/xfrm_device.rst +++ b/Documentation/networking/xfrm_device.rst @@ -65,9 +65,13 @@ Callbacks to implement /* from include/linux/netdevice.h */ struct xfrmdev_ops { /* Crypto and Packet offload callbacks */ - int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); - void (*xdo_dev_state_delete) (struct xfrm_state *x); - void (*xdo_dev_state_free) (struct xfrm_state *x); + int (*xdo_dev_state_add)(struct net_device *dev, + struct xfrm_state *x, + struct netlink_ext_ack *extack); + void (*xdo_dev_state_delete)(struct net_device *dev, + struct xfrm_state *x); + void (*xdo_dev_state_free)(struct net_device *dev, + struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index 12f429359a823e..63344bea839361 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -154,7 +154,7 @@ suspending the device are satisfied) and to queue up a suspend request for the device in that case. If there is no idle callback, or if the callback returns 0, then the PM core will attempt to carry out a runtime suspend of the device, also respecting devices configured for autosuspend. In essence this means a -call to __pm_runtime_autosuspend() (do note that drivers needs to update the +call to pm_runtime_autosuspend() (do note that drivers needs to update the device last busy mark, pm_runtime_mark_last_busy(), to control the delay under this circumstance). To prevent this (for example, if the callback routine has started a delayed suspend), the routine must return a non-zero value. Negative diff --git a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst index cadc580fa23b3b..d728e4db0b857a 100644 --- a/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_CN/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 关于OpenRISC处理器和正在进行中的开发的信息: - ======= ============================= + ======= ============================== 网站 https://openrisc.io - 邮箱 openrisc@lists.librecores.org - ======= ============================= + 邮箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -36,11 +36,11 @@ OpenRISC工具链和Linux的构建指南 工具链的构建指南可以在openrisc.io或Stafford的工具链构建和发布脚本 中找到。 - ====== ================================================= - 二进制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二进制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具链 https://openrisc.io/software 构建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 构建 diff --git a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst index 422fe9f7a3f23d..a1e4517dc601a1 100644 --- a/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst +++ b/Documentation/translations/zh_TW/arch/openrisc/openrisc_port.rst @@ -17,10 +17,10 @@ OpenRISC 1000系列(或1k)。 關於OpenRISC處理器和正在進行中的開發的信息: - ======= ============================= + ======= ============================== 網站 https://openrisc.io - 郵箱 openrisc@lists.librecores.org - ======= ============================= + 郵箱 linux-openrisc@vger.kernel.org + ======= ============================== --------------------------------------------------------------------- @@ -36,11 +36,11 @@ OpenRISC工具鏈和Linux的構建指南 工具鏈的構建指南可以在openrisc.io或Stafford的工具鏈構建和發佈腳本 中找到。 - ====== ================================================= - 二進制 https://github.com/openrisc/or1k-gcc/releases + ====== ========================================================== + 二進制 https://github.com/stffrdhrn/or1k-toolchain-build/releases 工具鏈 https://openrisc.io/software 構建 https://github.com/stffrdhrn/or1k-toolchain-build - ====== ================================================= + ====== ========================================================== 2) 構建 diff --git a/Documentation/userspace-api/mseal.rst b/Documentation/userspace-api/mseal.rst index 1dabfc29be0d1f..7195a7f911075c 100644 --- a/Documentation/userspace-api/mseal.rst +++ b/Documentation/userspace-api/mseal.rst @@ -27,7 +27,7 @@ SYSCALL ======= mseal syscall signature ----------------------- - ``int mseal(void \* addr, size_t len, unsigned long flags)`` + ``int mseal(void *addr, size_t len, unsigned long flags)`` **addr**/**len**: virtual memory address range. The address range set by **addr**/**len** must meet: diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1f8625b7646a2f..47c7c3f92314e5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7447,6 +7447,75 @@ Unused bitfields in the bitarrays must be set to zero. This capability connects the vcpu to an in-kernel XIVE device. +6.76 KVM_CAP_HYPERV_SYNIC +------------------------- + +:Architectures: x86 +:Target: vcpu + +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that the kernel has an implementation of the +Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is +used to support Windows Hyper-V based guest paravirt drivers(VMBus). + +In order to use SynIC, it has to be activated by setting this +capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this +will disable the use of APIC hardware virtualization even if supported +by the CPU, as it's incompatible with SynIC auto-EOI behavior. + +6.77 KVM_CAP_HYPERV_SYNIC2 +-------------------------- + +:Architectures: x86 +:Target: vcpu + +This capability enables a newer version of Hyper-V Synthetic interrupt +controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM +doesn't clear SynIC message and event flags pages when they are enabled by +writing to the respective MSRs. + +6.78 KVM_CAP_HYPERV_DIRECT_TLBFLUSH +----------------------------------- + +:Architectures: x86 +:Target: vcpu + +This capability indicates that KVM running on top of Hyper-V hypervisor +enables Direct TLB flush for its guests meaning that TLB flush +hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM. +Due to the different ABI for hypercall parameters between Hyper-V and +KVM, enabling this capability effectively disables all hypercall +handling by KVM (as some KVM hypercall may be mistakenly treated as TLB +flush hypercalls by Hyper-V) so userspace should disable KVM identification +in CPUID and only exposes Hyper-V identification. In this case, guest +thinks it's running on Hyper-V and only use Hyper-V hypercalls. + +6.79 KVM_CAP_HYPERV_ENFORCE_CPUID +--------------------------------- + +:Architectures: x86 +:Target: vcpu + +When enabled, KVM will disable emulated Hyper-V features provided to the +guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all +currently implemented Hyper-V features are provided unconditionally when +Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001) +leaf. + +6.80 KVM_CAP_ENFORCE_PV_FEATURE_CPUID +------------------------------------- + +:Architectures: x86 +:Target: vcpu + +When enabled, KVM will disable paravirtual features provided to the +guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf +(0x40000001). Otherwise, a guest may use the paravirtual features +regardless of what has actually been exposed through the CPUID leaf. + +.. _KVM_CAP_DIRTY_LOG_RING: + + .. _cap_enable_vm: 7. Capabilities that can be enabled on VMs @@ -7927,10 +7996,10 @@ by POWER10 processor. 7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM ------------------------------------- -Architectures: x86 SEV enabled -Type: vm -Parameters: args[0] is the fd of the source vm -Returns: 0 on success; ENOTTY on error +:Architectures: x86 SEV enabled +:Type: vm +:Parameters: args[0] is the fd of the source vm +:Returns: 0 on success; ENOTTY on error This capability enables userspace to copy encryption context from the vm indicated by the fd to the vm this is called on. @@ -7963,24 +8032,6 @@ default. See Documentation/arch/x86/sgx.rst for more details. -7.26 KVM_CAP_PPC_RPT_INVALIDATE -------------------------------- - -:Capability: KVM_CAP_PPC_RPT_INVALIDATE -:Architectures: ppc -:Type: vm - -This capability indicates that the kernel is capable of handling -H_RPT_INVALIDATE hcall. - -In order to enable the use of H_RPT_INVALIDATE in the guest, -user space might have to advertise it for the guest. For example, -IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is -present in the "ibm,hypertas-functions" device-tree property. - -This capability is enabled for hypervisors on platforms like POWER9 -that support radix MMU. - 7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE -------------------------------------- @@ -8038,24 +8089,9 @@ indicated by the fd to the VM this is called on. This is intended to support intra-host migration of VMs between userspace VMMs, upgrading the VMM process without interrupting the guest. -7.30 KVM_CAP_PPC_AIL_MODE_3 -------------------------------- - -:Capability: KVM_CAP_PPC_AIL_MODE_3 -:Architectures: ppc -:Type: vm - -This capability indicates that the kernel supports the mode 3 setting for the -"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location" -resource that is controlled with the H_SET_MODE hypercall. - -This capability allows a guest kernel to use a better-performance mode for -handling interrupts and system calls. - 7.31 KVM_CAP_DISABLE_QUIRKS2 ---------------------------- -:Capability: KVM_CAP_DISABLE_QUIRKS2 :Parameters: args[0] - set of KVM quirks to disable :Architectures: x86 :Type: vm @@ -8210,27 +8246,6 @@ This capability is aimed to mitigate the threat that malicious VMs can cause CPU stuck (due to event windows don't open up) and make the CPU unavailable to host or other VMs. -7.34 KVM_CAP_MEMORY_FAULT_INFO ------------------------------- - -:Architectures: x86 -:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP. - -The presence of this capability indicates that KVM_RUN will fill -kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if -there is a valid memslot but no backing VMA for the corresponding host virtual -address. - -The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns -an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set -to KVM_EXIT_MEMORY_FAULT. - -Note: Userspaces which attempt to resolve memory faults so that they can retry -KVM_RUN are encouraged to guard against repeatedly receiving the same -error/annotated fault. - -See KVM_EXIT_MEMORY_FAULT for more information. - 7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS ----------------------------------- @@ -8248,82 +8263,268 @@ by KVM_CHECK_EXTENSION. Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest. -7.36 KVM_CAP_X86_GUEST_MODE ------------------------------- - -:Architectures: x86 -:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP. +7.36 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL +---------------------------------------------------------- -The presence of this capability indicates that KVM_RUN will update the -KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the -vCPU was executing nested guest code when it exited. +:Architectures: x86, arm64 +:Type: vm +:Parameters: args[0] - size of the dirty log ring -KVM exits with the register state of either the L1 or L2 guest -depending on which executed at the time of an exit. Userspace must -take care to differentiate between these cases. +KVM is capable of tracking dirty memory using ring buffers that are +mmapped into userspace; there is one dirty ring per vcpu. -7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS -------------------------------------- +The dirty ring is available to userspace as an array of +``struct kvm_dirty_gfn``. Each dirty entry is defined as:: -:Architectures: arm64 -:Target: VM -:Parameters: None -:Returns: 0 on success, -EINVAL if vCPUs have been created before enabling this - capability. + struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; /* as_id | slot_id */ + __u64 offset; + }; -This capability changes the behavior of the registers that identify a PE -implementation of the Arm architecture: MIDR_EL1, REVIDR_EL1, and AIDR_EL1. -By default, these registers are visible to userspace but treated as invariant. +The following values are defined for the flags field to define the +current state of the entry:: -When this capability is enabled, KVM allows userspace to change the -aforementioned registers before the first KVM_RUN. These registers are VM -scoped, meaning that the same set of values are presented on all vCPUs in a -given VM. + #define KVM_DIRTY_GFN_F_DIRTY BIT(0) + #define KVM_DIRTY_GFN_F_RESET BIT(1) + #define KVM_DIRTY_GFN_F_MASK 0x3 -8. Other capabilities. -====================== +Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM +ioctl to enable this capability for the new guest and set the size of +the rings. Enabling the capability is only allowed before creating any +vCPU, and the size of the ring must be a power of two. The larger the +ring buffer, the less likely the ring is full and the VM is forced to +exit to userspace. The optimal size depends on the workload, but it is +recommended that it be at least 64 KiB (4096 entries). -This section lists capabilities that give information about other -features of the KVM implementation. +Just like for dirty page bitmaps, the buffer tracks writes to +all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was +set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered +with the flag set, userspace can start harvesting dirty pages from the +ring buffer. -8.1 KVM_CAP_PPC_HWRNG ---------------------- +An entry in the ring buffer can be unused (flag bits ``00``), +dirty (flag bits ``01``) or harvested (flag bits ``1X``). The +state machine for the entry is as follows:: -:Architectures: ppc + dirtied harvested reset + 00 -----------> 01 -------------> 1X -------+ + ^ | + | | + +------------------------------------------+ -This capability, if KVM_CHECK_EXTENSION indicates that it is -available, means that the kernel has an implementation of the -H_RANDOM hypercall backed by a hardware random-number generator. -If present, the kernel H_RANDOM handler can be enabled for guest use -with the KVM_CAP_PPC_ENABLE_HCALL capability. +To harvest the dirty pages, userspace accesses the mmapped ring buffer +to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage +the RESET bit must be cleared), then it means this GFN is a dirty GFN. +The userspace should harvest this GFN and mark the flags from state +``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set +to show that this GFN is harvested and waiting for a reset), and move +on to the next GFN. The userspace should continue to do this until the +flags of a GFN have the DIRTY bit cleared, meaning that it has harvested +all the dirty GFNs that were available. -8.2 KVM_CAP_HYPERV_SYNIC ------------------------- +Note that on weakly ordered architectures, userspace accesses to the +ring buffer (and more specifically the 'flags' field) must be ordered, +using load-acquire/store-release accessors when available, or any +other memory barrier that will ensure this ordering. -:Architectures: x86 +It's not necessary for userspace to harvest the all dirty GFNs at once. +However it must collect the dirty GFNs in sequence, i.e., the userspace +program cannot skip one dirty GFN to collect the one next to it. -This capability, if KVM_CHECK_EXTENSION indicates that it is -available, means that the kernel has an implementation of the -Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is -used to support Windows Hyper-V based guest paravirt drivers(VMBus). +After processing one or more entries in the ring buffer, userspace +calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about +it, so that the kernel will reprotect those collected GFNs. +Therefore, the ioctl must be called *before* reading the content of +the dirty pages. -In order to use SynIC, it has to be activated by setting this -capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this -will disable the use of APIC hardware virtualization even if supported -by the CPU, as it's incompatible with SynIC auto-EOI behavior. +The dirty ring can get full. When it happens, the KVM_RUN of the +vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL. -8.3 KVM_CAP_PPC_MMU_RADIX -------------------------- +The dirty ring interface has a major difference comparing to the +KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from +userspace, it's still possible that the kernel has not yet flushed the +processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the +flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one +needs to kick the vcpu out of KVM_RUN using a signal. The resulting +vmexit ensures that all dirty GFNs are flushed to the dirty rings. -:Architectures: ppc +NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that +should be exposed by weakly ordered architecture, in order to indicate +the additional memory ordering requirements imposed on userspace when +reading the state of an entry and mutating it from DIRTY to HARVESTED. +Architecture with TSO-like ordering (such as x86) are allowed to +expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL +to userspace. -This capability, if KVM_CHECK_EXTENSION indicates that it is -available, means that the kernel can support guests using the -radix MMU defined in Power ISA V3.00 (as implemented in the POWER9 -processor). +After enabling the dirty rings, the userspace needs to detect the +capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the +ring structures can be backed by per-slot bitmaps. With this capability +advertised, it means the architecture can dirty guest pages without +vcpu/ring context, so that some of the dirty information will still be +maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP +can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL +hasn't been enabled, or any memslot has been existing. -8.4 KVM_CAP_PPC_MMU_HASH_V3 ---------------------------- +Note that the bitmap here is only a backup of the ring structure. The +use of the ring and bitmap combination is only beneficial if there is +only a very small amount of memory that is dirtied out of vcpu/ring +context. Otherwise, the stand-alone per-slot bitmap mechanism needs to +be considered. + +To collect dirty bits in the backup bitmap, userspace can use the same +KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all +the generation of the dirty bits is done in a single pass. Collecting +the dirty bitmap should be the very last thing that the VMM does before +considering the state as complete. VMM needs to ensure that the dirty +state is final and avoid missing dirty pages from another ioctl ordered +after the bitmap collection. + +NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its +tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on +KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through +command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device +"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save +vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES} +command on KVM device "kvm-arm-vgic-v3". + +7.37 KVM_CAP_PMU_CAPABILITY +--------------------------- + +:Architectures: x86 +:Type: vm +:Parameters: arg[0] is bitmask of PMU virtualization capabilities. +:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits + +This capability alters PMU virtualization in KVM. + +Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of +PMU virtualization capabilities that can be adjusted on a VM. + +The argument to KVM_ENABLE_CAP is also a bitmask and selects specific +PMU virtualization capabilities to be applied to the VM. This can +only be invoked on a VM prior to the creation of VCPUs. + +At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting +this capability will disable PMU virtualization for that VM. Usermode +should adjust CPUID leaf 0xA to reflect that the PMU is disabled. + +7.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES +------------------------------------- + +:Architectures: x86 +:Type: vm +:Parameters: arg[0] must be 0. +:Returns: 0 on success, -EPERM if the userspace process does not + have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been + created. + +This capability disables the NX huge pages mitigation for iTLB MULTIHIT. + +The capability has no effect if the nx_huge_pages module parameter is not set. + +This capability may only be set before any vCPUs are created. + +7.39 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE +--------------------------------------- + +:Architectures: arm64 +:Type: vm +:Parameters: arg[0] is the new split chunk size. +:Returns: 0 on success, -EINVAL if any memslot was already created. + +This capability sets the chunk size used in Eager Page Splitting. + +Eager Page Splitting improves the performance of dirty-logging (used +in live migrations) when guest memory is backed by huge-pages. It +avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing +it eagerly when enabling dirty logging (with the +KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using +KVM_CLEAR_DIRTY_LOG. + +The chunk size specifies how many pages to break at a time, using a +single allocation for each chunk. Bigger the chunk size, more pages +need to be allocated ahead of time. + +The chunk size needs to be a valid block size. The list of acceptable +block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a +64-bit bitmap (each bit describing a block size). The default value is +0, to disable the eager page splitting. + +7.40 KVM_CAP_EXIT_HYPERCALL +--------------------------- + +:Architectures: x86 +:Type: vm + +This capability, if enabled, will cause KVM to exit to userspace +with KVM_EXIT_HYPERCALL exit reason to process some hypercalls. + +Calling KVM_CHECK_EXTENSION for this capability will return a bitmask +of hypercalls that can be configured to exit to userspace. +Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE. + +The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset +of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace +the hypercalls whose corresponding bit is in the argument, and return +ENOSYS for the others. + +7.41 KVM_CAP_ARM_SYSTEM_SUSPEND +------------------------------- + +:Architectures: arm64 +:Type: vm + +When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of +type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. + +7.37 KVM_CAP_ARM_WRITABLE_IMP_ID_REGS +------------------------------------- + +:Architectures: arm64 +:Target: VM +:Parameters: None +:Returns: 0 on success, -EINVAL if vCPUs have been created before enabling this + capability. + +This capability changes the behavior of the registers that identify a PE +implementation of the Arm architecture: MIDR_EL1, REVIDR_EL1, and AIDR_EL1. +By default, these registers are visible to userspace but treated as invariant. + +When this capability is enabled, KVM allows userspace to change the +aforementioned registers before the first KVM_RUN. These registers are VM +scoped, meaning that the same set of values are presented on all vCPUs in a +given VM. + +8. Other capabilities. +====================== + +This section lists capabilities that give information about other +features of the KVM implementation. + +8.1 KVM_CAP_PPC_HWRNG +--------------------- + +:Architectures: ppc + +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that the kernel has an implementation of the +H_RANDOM hypercall backed by a hardware random-number generator. +If present, the kernel H_RANDOM handler can be enabled for guest use +with the KVM_CAP_PPC_ENABLE_HCALL capability. + +8.3 KVM_CAP_PPC_MMU_RADIX +------------------------- + +:Architectures: ppc + +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that the kernel can support guests using the +radix MMU defined in Power ISA V3.00 (as implemented in the POWER9 +processor). + +8.4 KVM_CAP_PPC_MMU_HASH_V3 +--------------------------- :Architectures: ppc @@ -8362,20 +8563,6 @@ may be incompatible with the MIPS VZ ASE. virtualization, including standard guest virtual memory segments. == ========================================================================== -8.6 KVM_CAP_MIPS_TE -------------------- - -:Architectures: mips - -This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that -it is available, means that the trap & emulate implementation is available to -run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware -assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed -to KVM_CREATE_VM to create a VM which utilises it. - -If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is -available, it means that the VM is using trap & emulate. - 8.7 KVM_CAP_MIPS_64BIT ---------------------- @@ -8457,16 +8644,6 @@ virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N (counting from the right) is set, then a virtual SMT mode of 2^N is available. -8.11 KVM_CAP_HYPERV_SYNIC2 --------------------------- - -:Architectures: x86 - -This capability enables a newer version of Hyper-V Synthetic interrupt -controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM -doesn't clear SynIC message and event flags pages when they are enabled by -writing to the respective MSRs. - 8.12 KVM_CAP_HYPERV_VP_INDEX ---------------------------- @@ -8481,7 +8658,6 @@ capability is absent, userspace can still query this msr's value. ------------------------------- :Architectures: s390 -:Parameters: none This capability indicates if the flic device will be able to get/set the AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows @@ -8555,21 +8731,6 @@ This capability indicates that KVM supports paravirtualized Hyper-V IPI send hypercalls: HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx. -8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH ------------------------------------ - -:Architectures: x86 - -This capability indicates that KVM running on top of Hyper-V hypervisor -enables Direct TLB flush for its guests meaning that TLB flush -hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM. -Due to the different ABI for hypercall parameters between Hyper-V and -KVM, enabling this capability effectively disables all hypercall -handling by KVM (as some KVM hypercall may be mistakenly treated as TLB -flush hypercalls by Hyper-V) so userspace should disable KVM identification -in CPUID and only exposes Hyper-V identification. In this case, guest -thinks it's running on Hyper-V and only use Hyper-V hypercalls. - 8.22 KVM_CAP_S390_VCPU_RESETS ----------------------------- @@ -8647,142 +8808,6 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as limit the attack surface on KVM's MSR emulation code. -8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID -------------------------------------- - -Architectures: x86 - -When enabled, KVM will disable paravirtual features provided to the -guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf -(0x40000001). Otherwise, a guest may use the paravirtual features -regardless of what has actually been exposed through the CPUID leaf. - -.. _KVM_CAP_DIRTY_LOG_RING: - -8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL ----------------------------------------------------------- - -:Architectures: x86, arm64 -:Parameters: args[0] - size of the dirty log ring - -KVM is capable of tracking dirty memory using ring buffers that are -mmapped into userspace; there is one dirty ring per vcpu. - -The dirty ring is available to userspace as an array of -``struct kvm_dirty_gfn``. Each dirty entry is defined as:: - - struct kvm_dirty_gfn { - __u32 flags; - __u32 slot; /* as_id | slot_id */ - __u64 offset; - }; - -The following values are defined for the flags field to define the -current state of the entry:: - - #define KVM_DIRTY_GFN_F_DIRTY BIT(0) - #define KVM_DIRTY_GFN_F_RESET BIT(1) - #define KVM_DIRTY_GFN_F_MASK 0x3 - -Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM -ioctl to enable this capability for the new guest and set the size of -the rings. Enabling the capability is only allowed before creating any -vCPU, and the size of the ring must be a power of two. The larger the -ring buffer, the less likely the ring is full and the VM is forced to -exit to userspace. The optimal size depends on the workload, but it is -recommended that it be at least 64 KiB (4096 entries). - -Just like for dirty page bitmaps, the buffer tracks writes to -all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was -set in KVM_SET_USER_MEMORY_REGION. Once a memory region is registered -with the flag set, userspace can start harvesting dirty pages from the -ring buffer. - -An entry in the ring buffer can be unused (flag bits ``00``), -dirty (flag bits ``01``) or harvested (flag bits ``1X``). The -state machine for the entry is as follows:: - - dirtied harvested reset - 00 -----------> 01 -------------> 1X -------+ - ^ | - | | - +------------------------------------------+ - -To harvest the dirty pages, userspace accesses the mmapped ring buffer -to read the dirty GFNs. If the flags has the DIRTY bit set (at this stage -the RESET bit must be cleared), then it means this GFN is a dirty GFN. -The userspace should harvest this GFN and mark the flags from state -``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set -to show that this GFN is harvested and waiting for a reset), and move -on to the next GFN. The userspace should continue to do this until the -flags of a GFN have the DIRTY bit cleared, meaning that it has harvested -all the dirty GFNs that were available. - -Note that on weakly ordered architectures, userspace accesses to the -ring buffer (and more specifically the 'flags' field) must be ordered, -using load-acquire/store-release accessors when available, or any -other memory barrier that will ensure this ordering. - -It's not necessary for userspace to harvest the all dirty GFNs at once. -However it must collect the dirty GFNs in sequence, i.e., the userspace -program cannot skip one dirty GFN to collect the one next to it. - -After processing one or more entries in the ring buffer, userspace -calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about -it, so that the kernel will reprotect those collected GFNs. -Therefore, the ioctl must be called *before* reading the content of -the dirty pages. - -The dirty ring can get full. When it happens, the KVM_RUN of the -vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL. - -The dirty ring interface has a major difference comparing to the -KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from -userspace, it's still possible that the kernel has not yet flushed the -processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the -flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one -needs to kick the vcpu out of KVM_RUN using a signal. The resulting -vmexit ensures that all dirty GFNs are flushed to the dirty rings. - -NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that -should be exposed by weakly ordered architecture, in order to indicate -the additional memory ordering requirements imposed on userspace when -reading the state of an entry and mutating it from DIRTY to HARVESTED. -Architecture with TSO-like ordering (such as x86) are allowed to -expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL -to userspace. - -After enabling the dirty rings, the userspace needs to detect the -capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the -ring structures can be backed by per-slot bitmaps. With this capability -advertised, it means the architecture can dirty guest pages without -vcpu/ring context, so that some of the dirty information will still be -maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP -can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL -hasn't been enabled, or any memslot has been existing. - -Note that the bitmap here is only a backup of the ring structure. The -use of the ring and bitmap combination is only beneficial if there is -only a very small amount of memory that is dirtied out of vcpu/ring -context. Otherwise, the stand-alone per-slot bitmap mechanism needs to -be considered. - -To collect dirty bits in the backup bitmap, userspace can use the same -KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all -the generation of the dirty bits is done in a single pass. Collecting -the dirty bitmap should be the very last thing that the VMM does before -considering the state as complete. VMM needs to ensure that the dirty -state is final and avoid missing dirty pages from another ioctl ordered -after the bitmap collection. - -NOTE: Multiple examples of using the backup bitmap: (1) save vgic/its -tables through command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} on -KVM device "kvm-arm-vgic-its". (2) restore vgic/its tables through -command KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_RESTORE_TABLES} on KVM device -"kvm-arm-vgic-its". VGICv3 LPI pending status is restored. (3) save -vgic3 pending table through KVM_DEV_ARM_VGIC_{GRP_CTRL, SAVE_PENDING_TABLES} -command on KVM device "kvm-arm-vgic-v3". - 8.30 KVM_CAP_XEN_HVM -------------------- @@ -8847,10 +8872,9 @@ clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be done when the KVM_CAP_XEN_HVM ioctl sets the KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag. -8.31 KVM_CAP_PPC_MULTITCE -------------------------- +8.31 KVM_CAP_SPAPR_MULTITCE +--------------------------- -:Capability: KVM_CAP_PPC_MULTITCE :Architectures: ppc :Type: vm @@ -8882,72 +8906,9 @@ This capability indicates that the KVM virtual PTP service is supported in the host. A VMM can check whether the service is available to the guest on migration. -8.33 KVM_CAP_HYPERV_ENFORCE_CPUID ---------------------------------- - -Architectures: x86 - -When enabled, KVM will disable emulated Hyper-V features provided to the -guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all -currently implemented Hyper-V features are provided unconditionally when -Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001) -leaf. - -8.34 KVM_CAP_EXIT_HYPERCALL ---------------------------- - -:Capability: KVM_CAP_EXIT_HYPERCALL -:Architectures: x86 -:Type: vm - -This capability, if enabled, will cause KVM to exit to userspace -with KVM_EXIT_HYPERCALL exit reason to process some hypercalls. - -Calling KVM_CHECK_EXTENSION for this capability will return a bitmask -of hypercalls that can be configured to exit to userspace. -Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE. - -The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset -of the result of KVM_CHECK_EXTENSION. KVM will forward to userspace -the hypercalls whose corresponding bit is in the argument, and return -ENOSYS for the others. - -8.35 KVM_CAP_PMU_CAPABILITY ---------------------------- - -:Capability: KVM_CAP_PMU_CAPABILITY -:Architectures: x86 -:Type: vm -:Parameters: arg[0] is bitmask of PMU virtualization capabilities. -:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits - -This capability alters PMU virtualization in KVM. - -Calling KVM_CHECK_EXTENSION for this capability returns a bitmask of -PMU virtualization capabilities that can be adjusted on a VM. - -The argument to KVM_ENABLE_CAP is also a bitmask and selects specific -PMU virtualization capabilities to be applied to the VM. This can -only be invoked on a VM prior to the creation of VCPUs. - -At this time, KVM_PMU_CAP_DISABLE is the only capability. Setting -this capability will disable PMU virtualization for that VM. Usermode -should adjust CPUID leaf 0xA to reflect that the PMU is disabled. - -8.36 KVM_CAP_ARM_SYSTEM_SUSPEND -------------------------------- - -:Capability: KVM_CAP_ARM_SYSTEM_SUSPEND -:Architectures: arm64 -:Type: vm - -When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of -type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request. - 8.37 KVM_CAP_S390_PROTECTED_DUMP -------------------------------- -:Capability: KVM_CAP_S390_PROTECTED_DUMP :Architectures: s390 :Type: vm @@ -8957,27 +8918,9 @@ PV guests. The `KVM_PV_DUMP` command is available for the dump related UV data. Also the vcpu ioctl `KVM_S390_PV_CPU_COMMAND` is available and supports the `KVM_PV_DUMP_CPU` subcommand. -8.38 KVM_CAP_VM_DISABLE_NX_HUGE_PAGES -------------------------------------- - -:Capability: KVM_CAP_VM_DISABLE_NX_HUGE_PAGES -:Architectures: x86 -:Type: vm -:Parameters: arg[0] must be 0. -:Returns: 0 on success, -EPERM if the userspace process does not - have CAP_SYS_BOOT, -EINVAL if args[0] is not 0 or any vCPUs have been - created. - -This capability disables the NX huge pages mitigation for iTLB MULTIHIT. - -The capability has no effect if the nx_huge_pages module parameter is not set. - -This capability may only be set before any vCPUs are created. - 8.39 KVM_CAP_S390_CPU_TOPOLOGY ------------------------------ -:Capability: KVM_CAP_S390_CPU_TOPOLOGY :Architectures: s390 :Type: vm @@ -8999,37 +8942,9 @@ structure. When getting the Modified Change Topology Report value, the attr->addr must point to a byte where the value will be stored or retrieved from. -8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE ---------------------------------------- - -:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE -:Architectures: arm64 -:Type: vm -:Parameters: arg[0] is the new split chunk size. -:Returns: 0 on success, -EINVAL if any memslot was already created. - -This capability sets the chunk size used in Eager Page Splitting. - -Eager Page Splitting improves the performance of dirty-logging (used -in live migrations) when guest memory is backed by huge-pages. It -avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing -it eagerly when enabling dirty logging (with the -KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using -KVM_CLEAR_DIRTY_LOG. - -The chunk size specifies how many pages to break at a time, using a -single allocation for each chunk. Bigger the chunk size, more pages -need to be allocated ahead of time. - -The chunk size needs to be a valid block size. The list of acceptable -block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a -64-bit bitmap (each bit describing a block size). The default value is -0, to disable the eager page splitting. - 8.41 KVM_CAP_VM_TYPES --------------------- -:Capability: KVM_CAP_MEMORY_ATTRIBUTES :Architectures: x86 :Type: system ioctl @@ -9046,6 +8961,67 @@ Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in production. The behavior and effective ABI for software-protected VMs is unstable. +8.42 KVM_CAP_PPC_RPT_INVALIDATE +------------------------------- + +:Architectures: ppc + +This capability indicates that the kernel is capable of handling +H_RPT_INVALIDATE hcall. + +In order to enable the use of H_RPT_INVALIDATE in the guest, +user space might have to advertise it for the guest. For example, +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is +present in the "ibm,hypertas-functions" device-tree property. + +This capability is enabled for hypervisors on platforms like POWER9 +that support radix MMU. + +8.43 KVM_CAP_PPC_AIL_MODE_3 +--------------------------- + +:Architectures: ppc + +This capability indicates that the kernel supports the mode 3 setting for the +"Address Translation Mode on Interrupt" aka "Alternate Interrupt Location" +resource that is controlled with the H_SET_MODE hypercall. + +This capability allows a guest kernel to use a better-performance mode for +handling interrupts and system calls. + +8.44 KVM_CAP_MEMORY_FAULT_INFO +------------------------------ + +:Architectures: x86 + +The presence of this capability indicates that KVM_RUN will fill +kvm_run.memory_fault if KVM cannot resolve a guest page fault VM-Exit, e.g. if +there is a valid memslot but no backing VMA for the corresponding host virtual +address. + +The information in kvm_run.memory_fault is valid if and only if KVM_RUN returns +an error with errno=EFAULT or errno=EHWPOISON *and* kvm_run.exit_reason is set +to KVM_EXIT_MEMORY_FAULT. + +Note: Userspaces which attempt to resolve memory faults so that they can retry +KVM_RUN are encouraged to guard against repeatedly receiving the same +error/annotated fault. + +See KVM_EXIT_MEMORY_FAULT for more information. + +8.45 KVM_CAP_X86_GUEST_MODE +--------------------------- + +:Architectures: x86 + +The presence of this capability indicates that KVM_RUN will update the +KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the +vCPU was executing nested guest code when it exited. + +KVM exits with the register state of either the L1 or L2 guest +depending on which executed at the time of an exit. Userspace must +take care to differentiate between these cases. + 9. Known KVM API problems ========================= @@ -9076,9 +9052,10 @@ the local APIC. The same is true for the ``KVM_FEATURE_PV_UNHALT`` paravirtualized feature. -CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``. -It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel -has enabled in-kernel emulation of the local APIC. +On older versions of Linux, CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by +``KVM_GET_SUPPORTED_CPUID``, but it can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` +is present and the kernel has enabled in-kernel emulation of the local APIC. +On newer versions, ``KVM_GET_SUPPORTED_CPUID`` does report the bit as available. CPU topology ~~~~~~~~~~~~ diff --git a/Documentation/wmi/devices/msi-wmi-platform.rst b/Documentation/wmi/devices/msi-wmi-platform.rst index 31a13694289238..73197b31926a57 100644 --- a/Documentation/wmi/devices/msi-wmi-platform.rst +++ b/Documentation/wmi/devices/msi-wmi-platform.rst @@ -138,6 +138,10 @@ input data, the meaning of which depends on the subfeature being accessed. The output buffer contains a single byte which signals success or failure (``0x00`` on failure) and 31 bytes of output data, the meaning if which depends on the subfeature being accessed. +.. note:: + The ACPI control method responsible for handling the WMI method calls is not thread-safe. + This is a firmware bug that needs to be handled inside the driver itself. + WMI method Get_EC() ------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 96b82704950184..4feb3b9f8f739d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2519,6 +2519,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git F: arch/arm/boot/dts/nxp/imx/ F: arch/arm/boot/dts/nxp/mxs/ F: arch/arm64/boot/dts/freescale/ +X: Documentation/devicetree/bindings/media/i2c/ X: arch/arm64/boot/dts/freescale/fsl-* X: arch/arm64/boot/dts/freescale/qoriq-* X: drivers/media/i2c/ @@ -3191,6 +3192,12 @@ M: Dinh Nguyen S: Maintained F: drivers/clk/socfpga/ +ARM/SOCFPGA DWMAC GLUE LAYER +M: Maxime Chevallier +S: Maintained +F: Documentation/devicetree/bindings/net/socfpga-dwmac.txt +F: drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c + ARM/SOCFPGA EDAC BINDINGS M: Matthew Gerlach S: Maintained @@ -3867,8 +3874,9 @@ AUXILIARY BUS DRIVER M: Greg Kroah-Hartman R: Dave Ertman R: Ira Weiny +R: Leon Romanovsky S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/driver-api/auxiliary_bus.rst F: drivers/base/auxiliary.c F: include/linux/auxiliary_bus.h @@ -5782,6 +5790,11 @@ F: scripts/Makefile.clang F: scripts/clang-tools/ K: \b(?i:clang|llvm)\b +CLANG/LLVM THINLTO DISTRIBUTED BUILD +M: Rong Xu +S: Supported +F: scripts/Makefile.vmlinux_thinlink + CLK API M: Russell King L: linux-clk@vger.kernel.org @@ -6335,6 +6348,7 @@ F: Documentation/process/cve.rst CW1200 WLAN driver S: Orphan +L: linux-wireless@vger.kernel.org F: drivers/net/wireless/st/ F: include/linux/platform_data/net-cw1200.h @@ -7020,6 +7034,7 @@ L: rust-for-linux@vger.kernel.org S: Supported W: https://rust-for-linux.com T: git https://github.com/Rust-for-Linux/linux.git alloc-next +F: rust/helpers/dma.c F: rust/kernel/dma.rs F: samples/rust/rust_dma.rs @@ -7225,7 +7240,7 @@ M: Greg Kroah-Hartman M: "Rafael J. Wysocki" M: Danilo Krummrich S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: Documentation/core-api/kobject.rst F: drivers/base/ F: fs/debugfs/ @@ -8717,6 +8732,7 @@ M: Chao Yu R: Yue Hu R: Jeffle Xu R: Sandeep Dhavale +R: Hongbo Li L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org @@ -10136,6 +10152,13 @@ F: drivers/gpio/gpio-regmap.c F: include/linux/gpio/regmap.h K: (devm_)?gpio_regmap_(un)?register +GPIO SLOPPY LOGIC ANALYZER +M: Wolfram Sang +S: Supported +F: Documentation/dev-tools/gpio-sloppy-logic-analyzer.rst +F: drivers/gpio/gpio-sloppy-logic-analyzer.c +F: tools/gpio/gpio-sloppy-logic-analyzer.sh + GPIO SUBSYSTEM M: Linus Walleij M: Bartosz Golaszewski @@ -10151,6 +10174,8 @@ F: include/linux/gpio.h F: include/linux/gpio/ F: include/linux/of_gpio.h K: (devm_)?gpio_(request|free|direction|get|set) +K: GPIOD_FLAGS_BIT_NONEXCLUSIVE +K: devm_gpiod_unhinge GPIO UAPI M: Bartosz Golaszewski @@ -10453,14 +10478,20 @@ S: Supported F: drivers/infiniband/hw/hfi1 HFS FILESYSTEM +M: Viacheslav Dubeyko +M: John Paul Adrian Glaubitz +M: Yangtao Li L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfs.rst F: fs/hfs/ HFSPLUS FILESYSTEM +M: Viacheslav Dubeyko +M: John Paul Adrian Glaubitz +M: Yangtao Li L: linux-fsdevel@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/filesystems/hfsplus.rst F: fs/hfsplus/ @@ -10954,6 +10985,7 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song +R: Oscar Salvador L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -11079,6 +11111,14 @@ L: linuxppc-dev@lists.ozlabs.org S: Odd Fixes F: drivers/tty/hvc/ +HUNG TASK DETECTOR +M: Andrew Morton +R: Lance Yang +L: linux-kernel@vger.kernel.org +S: Maintained +F: include/linux/hung_task.h +F: kernel/hung_task.c + I2C ACPI SUPPORT M: Mika Westerberg L: linux-i2c@vger.kernel.org @@ -11217,7 +11257,6 @@ S: Maintained F: drivers/i2c/busses/i2c-cht-wc.c I2C/SMBUS ISMT DRIVER -M: Seth Heasley M: Neil Horman L: linux-i2c@vger.kernel.org F: Documentation/i2c/busses/i2c-ismt.rst @@ -12810,6 +12849,7 @@ F: lib/Kconfig.kcsan F: scripts/Makefile.kcsan KDUMP +M: Andrew Morton M: Baoquan He R: Vivek Goyal R: Dave Young @@ -13106,11 +13146,13 @@ KERNFS M: Greg Kroah-Hartman M: Tejun Heo S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git F: fs/kernfs/ F: include/linux/kernfs.h KEXEC +M: Andrew Morton +M: Baoquan He L: kexec@lists.infradead.org W: http://kernel.org/pub/linux/utils/kernel/kexec/ F: include/linux/kexec.h @@ -14283,6 +14325,7 @@ S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER +L: linux-wireless@vger.kernel.org L: libertas-dev@lists.infradead.org S: Orphan F: drivers/net/wireless/marvell/libertas/ @@ -15049,7 +15092,7 @@ F: Documentation/devicetree/bindings/media/mediatek-jpeg-*.yaml F: drivers/media/platform/mediatek/jpeg/ MEDIATEK KEYPAD DRIVER -M: Mattijs Korpershoek +M: Mattijs Korpershoek S: Supported F: Documentation/devicetree/bindings/input/mediatek,mt6779-keypad.yaml F: drivers/input/keyboard/mt6779-keypad.c @@ -15472,24 +15515,45 @@ F: Documentation/mm/ F: include/linux/gfp.h F: include/linux/gfp_types.h F: include/linux/memfd.h -F: include/linux/memory.h F: include/linux/memory_hotplug.h F: include/linux/memory-tiers.h F: include/linux/mempolicy.h F: include/linux/mempool.h F: include/linux/memremap.h -F: include/linux/mm.h -F: include/linux/mm_*.h F: include/linux/mmzone.h F: include/linux/mmu_notifier.h F: include/linux/pagewalk.h -F: include/linux/rmap.h F: include/trace/events/ksm.h F: mm/ F: tools/mm/ F: tools/testing/selftests/mm/ N: include/linux/page[-_]* +MEMORY MANAGEMENT - CORE +M: Andrew Morton +M: David Hildenbrand +R: Lorenzo Stoakes +R: Liam R. Howlett +R: Vlastimil Babka +R: Mike Rapoport +R: Suren Baghdasaryan +R: Michal Hocko +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/linux/memory.h +F: include/linux/mm.h +F: include/linux/mm_*.h +F: include/linux/mmdebug.h +F: include/linux/pagewalk.h +F: mm/Kconfig +F: mm/debug.c +F: mm/init-mm.c +F: mm/memory.c +F: mm/pagewalk.c +F: mm/util.c + MEMORY MANAGEMENT - EXECMEM M: Andrew Morton M: Mike Rapoport @@ -15498,6 +15562,53 @@ S: Maintained F: include/linux/execmem.h F: mm/execmem.c +MEMORY MANAGEMENT - GUP (GET USER PAGES) +M: Andrew Morton +M: David Hildenbrand +R: Jason Gunthorpe +R: John Hubbard +R: Peter Xu +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: mm/gup.c + +MEMORY MANAGEMENT - KSM (Kernel Samepage Merging) +M: Andrew Morton +M: David Hildenbrand +R: Xu Xin +R: Chengming Zhou +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/admin-guide/mm/ksm.rst +F: Documentation/mm/ksm.rst +F: include/linux/ksm.h +F: include/trace/events/ksm.h +F: mm/ksm.c + +MEMORY MANAGEMENT - MEMORY POLICY AND MIGRATION +M: Andrew Morton +M: David Hildenbrand +R: Zi Yan +R: Matthew Brost +R: Joshua Hahn +R: Rakie Kim +R: Byungchul Park +R: Gregory Price +R: Ying Huang +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/linux/mempolicy.h +F: include/linux/migrate.h +F: mm/mempolicy.c +F: mm/migrate.c +F: mm/migrate_device.c + MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION M: Andrew Morton M: Mike Rapoport @@ -15508,6 +15619,49 @@ F: mm/numa.c F: mm/numa_emulation.c F: mm/numa_memblks.c +MEMORY MANAGEMENT - PAGE ALLOCATOR +M: Andrew Morton +M: Vlastimil Babka +R: Suren Baghdasaryan +R: Michal Hocko +R: Brendan Jackman +R: Johannes Weiner +R: Zi Yan +L: linux-mm@kvack.org +S: Maintained +F: include/linux/compaction.h +F: include/linux/gfp.h +F: include/linux/page-isolation.h +F: mm/compaction.c +F: mm/page_alloc.c +F: mm/page_isolation.c + +MEMORY MANAGEMENT - RECLAIM +M: Andrew Morton +M: Johannes Weiner +R: David Hildenbrand +R: Michal Hocko +R: Qi Zheng +R: Shakeel Butt +R: Lorenzo Stoakes +L: linux-mm@kvack.org +S: Maintained +F: mm/pt_reclaim.c +F: mm/vmscan.c + +MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) +M: Andrew Morton +M: David Hildenbrand +M: Lorenzo Stoakes +R: Rik van Riel +R: Liam R. Howlett +R: Vlastimil Babka +R: Harry Yoo +L: linux-mm@kvack.org +S: Maintained +F: include/linux/rmap.h +F: mm/rmap.c + MEMORY MANAGEMENT - SECRETMEM M: Andrew Morton M: Mike Rapoport @@ -15516,6 +15670,30 @@ S: Maintained F: include/linux/secretmem.h F: mm/secretmem.c +MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) +M: Andrew Morton +M: David Hildenbrand +R: Zi Yan +R: Baolin Wang +R: Lorenzo Stoakes +R: Liam R. Howlett +R: Nico Pache +R: Ryan Roberts +R: Dev Jain +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/admin-guide/mm/transhuge.rst +F: include/linux/huge_mm.h +F: include/linux/khugepaged.h +F: include/trace/events/huge_memory.h +F: mm/huge_memory.c +F: mm/khugepaged.c +F: tools/testing/selftests/mm/khugepaged.c +F: tools/testing/selftests/mm/split_huge_page_test.c +F: tools/testing/selftests/mm/transhuge-stress.c + MEMORY MANAGEMENT - USERFAULTFD M: Andrew Morton R: Peter Xu @@ -15535,10 +15713,12 @@ M: Liam R. Howlett M: Lorenzo Stoakes R: Vlastimil Babka R: Jann Horn +R: Pedro Falcato L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/trace/events/mmap.h F: mm/mlock.c F: mm/mmap.c F: mm/mprotect.c @@ -15549,6 +15729,36 @@ F: mm/vma.h F: mm/vma_internal.h F: tools/testing/vma/ +MEMORY MAPPING - LOCKING +M: Andrew Morton +M: Suren Baghdasaryan +M: Liam R. Howlett +M: Lorenzo Stoakes +R: Vlastimil Babka +R: Shakeel Butt +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: Documentation/mm/process_addrs.rst +F: include/linux/mmap_lock.h +F: include/trace/events/mmap_lock.h +F: mm/mmap_lock.c + +MEMORY MAPPING - MADVISE (MEMORY ADVICE) +M: Andrew Morton +M: Liam R. Howlett +M: Lorenzo Stoakes +M: David Hildenbrand +R: Vlastimil Babka +R: Jann Horn +L: linux-mm@kvack.org +S: Maintained +W: http://www.linux-mm.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm +F: include/uapi/asm-generic/mman-common.h +F: mm/madvise.c + MEMORY TECHNOLOGY DEVICES (MTD) M: Miquel Raynal M: Richard Weinberger @@ -16756,6 +16966,7 @@ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: drivers/ptp/ +F: drivers/s390/net/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h @@ -16765,6 +16976,7 @@ F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h +F: include/linux/ism.h F: include/linux/netdev* F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h @@ -17368,7 +17580,7 @@ T: git git://git.infradead.org/nvme.git F: drivers/nvme/target/ NVMEM FRAMEWORK -M: Srinivas Kandagatla +M: Srinivas Kandagatla S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/srini/nvmem.git F: Documentation/ABI/stable/sysfs-bus-nvmem @@ -18302,7 +18514,7 @@ F: include/uapi/linux/ppdev.h PARAVIRT_OPS INTERFACE M: Juergen Gross R: Ajay Kaher -R: Alexey Makhalov +R: Alexey Makhalov R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org @@ -18633,7 +18845,7 @@ F: drivers/pci/controller/pci-xgene-msi.c PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi M: Krzysztof Wilczyński -R: Manivannan Sadhasivam +M: Manivannan Sadhasivam R: Rob Herring L: linux-pci@vger.kernel.org S: Supported @@ -18686,6 +18898,16 @@ F: include/asm-generic/pci* F: include/linux/of_pci.h F: include/linux/pci* F: include/uapi/linux/pci* + +PCI SUBSYSTEM [RUST] +M: Danilo Krummrich +R: Bjorn Helgaas +R: Krzysztof Wilczyński +L: linux-pci@vger.kernel.org +S: Maintained +C: irc://irc.oftc.net/linux-pci +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git +F: rust/helpers/pci.c F: rust/kernel/pci.rs F: samples/rust/rust_driver_pci.rs @@ -19573,7 +19795,7 @@ S: Supported F: drivers/crypto/intel/qat/ QCOM AUDIO (ASoC) DRIVERS -M: Srinivas Kandagatla +M: Srinivas Kandagatla L: linux-sound@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Supported @@ -19746,6 +19968,7 @@ F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath12k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k @@ -19755,6 +19978,7 @@ N: ath12k QUALCOMM ATHEROS ATH10K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k @@ -19764,6 +19988,7 @@ N: ath10k QUALCOMM ATHEROS ATH11K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath11k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k @@ -19873,7 +20098,7 @@ F: Documentation/devicetree/bindings/net/qcom,ethqos.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c QUALCOMM FASTRPC DRIVER -M: Srinivas Kandagatla +M: Srinivas Kandagatla M: Amol Maheshwari L: linux-arm-msm@vger.kernel.org L: dri-devel@lists.freedesktop.org @@ -20451,8 +20676,8 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c RENESAS ETHERNET AVB DRIVER -M: Paul Barker M: Niklas Söderlund +R: Paul Barker L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org S: Maintained @@ -21253,6 +21478,7 @@ L: linux-s390@vger.kernel.org L: netdev@vger.kernel.org S: Supported F: drivers/s390/net/ +F: include/linux/ism.h S390 PCI SUBSYSTEM M: Niklas Schnelle @@ -21921,7 +22147,7 @@ S: Maintained F: drivers/media/rc/serial_ir.c SERIAL LOW-POWER INTER-CHIP MEDIA BUS (SLIMbus) -M: Srinivas Kandagatla +M: Srinivas Kandagatla L: linux-sound@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/slimbus/ @@ -22137,6 +22363,7 @@ F: drivers/platform/x86/touchscreen_dmi.c SILICON LABS WIRELESS DRIVERS (for WFxxx series) M: Jérôme Pouiller +L: linux-wireless@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml F: drivers/net/wireless/silabs/ @@ -22237,9 +22464,7 @@ F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR M: Christoph Lameter -M: Pekka Enberg M: David Rientjes -M: Joonsoo Kim M: Andrew Morton M: Vlastimil Babka R: Roman Gushchin @@ -22654,9 +22879,15 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git F: Documentation/devicetree/bindings/sound/ F: Documentation/sound/soc/ F: include/dt-bindings/sound/ +F: include/sound/cs-amp-lib.h +F: include/sound/cs35l* +F: include/sound/cs4271.h +F: include/sound/cs42l* +F: include/sound/madera-pdata.h F: include/sound/soc* F: include/sound/sof.h F: include/sound/sof/ +F: include/sound/wm*.h F: include/trace/events/sof*.h F: include/uapi/sound/asoc.h F: sound/soc/ @@ -22767,7 +22998,6 @@ F: drivers/accessibility/speakup/ SPEAR PLATFORM/CLOCK/PINCTRL SUPPORT M: Viresh Kumar -M: Shiraz Hashim L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: soc@lists.linux.dev S: Maintained @@ -25126,9 +25356,13 @@ S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c USB TYPEC PORT CONTROLLER DRIVERS +M: Badhri Jagan Sridharan L: linux-usb@vger.kernel.org -S: Orphan -F: drivers/usb/typec/tcpm/ +S: Maintained +F: drivers/usb/typec/tcpm/tcpci.c +F: drivers/usb/typec/tcpm/tcpm.c +F: include/linux/usb/tcpci.h +F: include/linux/usb/tcpm.h USB TYPEC TUSB1046 MUX DRIVER M: Romain Gantois @@ -25741,7 +25975,7 @@ F: tools/testing/vsock/ VMALLOC M: Andrew Morton -R: Uladzislau Rezki +M: Uladzislau Rezki L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org @@ -25765,7 +25999,7 @@ F: drivers/misc/vmw_balloon.c VMWARE HYPERVISOR INTERFACE M: Ajay Kaher -M: Alexey Makhalov +M: Alexey Makhalov R: Broadcom internal kernel review list L: virtualization@lists.linux.dev L: x86@kernel.org @@ -25793,7 +26027,7 @@ F: drivers/scsi/vmw_pvscsi.h VMWARE VIRTUAL PTP CLOCK DRIVER M: Nick Shi R: Ajay Kaher -R: Alexey Makhalov +R: Alexey Makhalov R: Broadcom internal kernel review list L: netdev@vger.kernel.org S: Supported diff --git a/Makefile b/Makefile index 38689a0c36052b..1c77bc7755b23b 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 15 -SUBLEVEL = 0 -EXTRAVERSION = -rc1 +SUBLEVEL = 3 +EXTRAVERSION = NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -298,7 +298,8 @@ no-dot-config-targets := $(clean-targets) \ outputmakefile rustavailable rustfmt rustfmtcheck no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \ image_name -single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %/ +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.o_thinlto_native \ + %.a_thinlto_native %.o.thinlto.bc %/ config-build := mixed-build := @@ -477,7 +478,6 @@ export rust_common_flags := --edition=2021 \ -Wclippy::ignored_unit_patterns \ -Wclippy::mut_mut \ -Wclippy::needless_bitwise_bool \ - -Wclippy::needless_continue \ -Aclippy::needless_lifetimes \ -Wclippy::no_mangle_with_rust_abi \ -Wclippy::undocumented_unsafe_blocks \ @@ -858,11 +858,19 @@ KBUILD_CFLAGS += -fno-delete-null-pointer-checks ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE KBUILD_CFLAGS += -O2 KBUILD_RUSTFLAGS += -Copt-level=2 +else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 +KBUILD_CFLAGS += -O3 +KBUILD_RUSTFLAGS += -Copt-level=3 else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os KBUILD_RUSTFLAGS += -Copt-level=s endif +# Perform swing modulo scheduling immediately before the first scheduling pass. +# This pass looks at innermost loops and reorders their instructions by +# overlapping different iterations. +KBUILD_CFLAGS += $(call cc-option,-fmodulo-sched -fmodulo-sched-allow-regmoves -fivopts -fmodulo-sched) + # Always set `debug-assertions` and `overflow-checks` because their default # depends on `opt-level` and `debug-assertions`, respectively. KBUILD_RUSTFLAGS += -Cdebug-assertions=$(if $(CONFIG_RUST_DEBUG_ASSERTIONS),y,n) @@ -992,10 +1000,10 @@ export CC_FLAGS_SCS endif ifdef CONFIG_LTO_CLANG -ifdef CONFIG_LTO_CLANG_THIN -CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit -else +ifdef CONFIG_LTO_CLANG_FULL CC_FLAGS_LTO := -flto +else # for CONFIG_LTO_CLANG_THIN or CONFIG_LTO_CLANG_THIN_DIST +CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit endif CC_FLAGS_LTO += -fvisibility=hidden @@ -1053,10 +1061,6 @@ NOSTDINC_FLAGS += -nostdinc # perform bounds checking. KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) -#Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) -KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) - # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow @@ -1068,10 +1072,12 @@ ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -fconserve-stack endif +# Ensure compilers do not transform certain loops into calls to wcslen() +KBUILD_CFLAGS += -fno-builtin-wcslen + # change __FILE__ to the relative path to the source directory ifdef building_out_of_srctree -KBUILD_CPPFLAGS += $(call cc-option,-ffile-prefix-map=$(srcroot)/=) -KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/= +KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=) endif # include additional Makefiles when needed @@ -1216,8 +1222,34 @@ vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE $(call if_changed,ar_vmlinux.a) PHONY += vmlinux_o +ifdef CONFIG_LTO_CLANG_THIN_DIST +vmlinux.thinlink: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_thinlink +targets += vmlinux.thinlink + +vmlinux.a_thinlto_native := $(patsubst %.a,%.a_thinlto_native,$(KBUILD_VMLINUX_OBJS)) +quiet_cmd_ar_vmlinux.a_thinlto_native = AR $@ + cmd_ar_vmlinux.a_thinlto_native = \ + rm -f $@; \ + $(AR) cDPrST $@ $(vmlinux.a_thinlto_native); \ + $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) + +define rule_gen_vmlinux.a_thinlto_native + +$(Q)$(MAKE) $(build)=. need-builtin=1 thinlto_final_pass=1 need-modorder=1 built-in.a_thinlto_native + $(call cmd_and_savecmd,ar_vmlinux.a_thinlto_native) +endef + +vmlinux.a_thinlto_native: vmlinux.thinlink scripts/head-object-list.txt FORCE + $(call if_changed_rule,gen_vmlinux.a_thinlto_native) + +targets += vmlinux.a_thinlto_native + +vmlinux_o: vmlinux.a_thinlto_native + $(Q)$(MAKE) thinlto_final_pass=1 -f $(srctree)/scripts/Makefile.vmlinux_o +else vmlinux_o: vmlinux.a $(KBUILD_VMLINUX_LIBS) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.vmlinux_o +endif vmlinux.o modules.builtin.modinfo modules.builtin: vmlinux_o @: @@ -1575,7 +1607,8 @@ CLEAN_FILES += vmlinux.symvers modules-only.symvers \ modules.builtin.ranges vmlinux.o.map vmlinux.unstripped \ compile_commands.json rust/test \ rust-project.json .vmlinux.objs .vmlinux.export.c \ - .builtin-dtbs-list .builtin-dtb.S + .builtin-dtbs-list .builtin-dtb.S \ + .vmlinux_thinlto_bc_files vmlinux.thinlink # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ @@ -2026,6 +2059,8 @@ clean: $(clean-dirs) -o -name '*.symtypes' -o -name 'modules.order' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ + -o -name '*.a_thinlto_native' -o -name '*.o_thinlto_native' \ + -o -name '*.o.thinlto.bc' \ -o -name '*.gcno' \ \) -type f -print \ -o -name '.tmp_*' -print \ diff --git a/arch/Kconfig b/arch/Kconfig index b0adb665041f17..30dccda07c6716 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -810,6 +810,25 @@ config LTO_CLANG_THIN https://clang.llvm.org/docs/ThinLTO.html If unsure, say Y. + +config LTO_CLANG_THIN_DIST + bool "Clang ThinLTO in distributed mode (EXPERIMENTAL)" + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN + select LTO_CLANG + help + This option enables Clang's ThinLTO in distributed build mode. + In this mode, the linker performs the thin-link, generating + ThinLTO index files. Subsequently, the build system explicitly + invokes ThinLTO backend compilation using these index files + and pre-linked IR objects. The resulting native object files + are with the .o_thinlto_native suffix. + + This build mode offers improved visibility into the ThinLTO + process through explicit subcommand exposure. It also makes + final native object files directly available, benefiting + tools like objtool and kpatch. Additionally, it provides + crucial granular control over back-end options, enabling + module-specific compiler options, and simplifies debugging. endchoice config ARCH_SUPPORTS_AUTOFDO_CLANG diff --git a/arch/arm/boot/dts/amlogic/meson8.dtsi b/arch/arm/boot/dts/amlogic/meson8.dtsi index 847f7b1f1e9617..f785e0de0847b5 100644 --- a/arch/arm/boot/dts/amlogic/meson8.dtsi +++ b/arch/arm/boot/dts/amlogic/meson8.dtsi @@ -451,7 +451,7 @@ pwm_ef: pwm@86c0 { compatible = "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; reg = <0x86c0 0x10>; @@ -705,7 +705,7 @@ &pwm_ab { compatible = "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -713,7 +713,7 @@ &pwm_cd { compatible = "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; diff --git a/arch/arm/boot/dts/amlogic/meson8b.dtsi b/arch/arm/boot/dts/amlogic/meson8b.dtsi index 0876611ce26a8c..fdb0abe23a0c8b 100644 --- a/arch/arm/boot/dts/amlogic/meson8b.dtsi +++ b/arch/arm/boot/dts/amlogic/meson8b.dtsi @@ -406,7 +406,7 @@ compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2"; reg = <0x86c0 0x10>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -680,7 +680,7 @@ &pwm_ab { compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -688,7 +688,7 @@ &pwm_cd { compatible = "amlogic,meson8b-pwm-v2", "amlogic,meson8-pwm-v2"; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "Video PLL" */ + <0>, /* unknown/untested, the datasheet calls it "Video PLL" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; diff --git a/arch/arm/boot/dts/microchip/at91sam9263ek.dts b/arch/arm/boot/dts/microchip/at91sam9263ek.dts index 471ea25296aa14..93c5268a0845d0 100644 --- a/arch/arm/boot/dts/microchip/at91sam9263ek.dts +++ b/arch/arm/boot/dts/microchip/at91sam9263ek.dts @@ -152,7 +152,7 @@ nand@3 { reg = <0x3 0x0 0x800000>; rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>; - cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>; nand-bus-width = <8>; nand-ecc-mode = "soft"; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/microchip/tny_a9263.dts b/arch/arm/boot/dts/microchip/tny_a9263.dts index 3dd48b3e06da57..fd8244b56e0593 100644 --- a/arch/arm/boot/dts/microchip/tny_a9263.dts +++ b/arch/arm/boot/dts/microchip/tny_a9263.dts @@ -64,7 +64,7 @@ nand@3 { reg = <0x3 0x0 0x800000>; rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>; - cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>; nand-bus-width = <8>; nand-ecc-mode = "soft"; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/microchip/usb_a9263.dts b/arch/arm/boot/dts/microchip/usb_a9263.dts index 60d7936dc56274..8e1a3fb61087ca 100644 --- a/arch/arm/boot/dts/microchip/usb_a9263.dts +++ b/arch/arm/boot/dts/microchip/usb_a9263.dts @@ -58,7 +58,7 @@ }; spi0: spi@fffa4000 { - cs-gpios = <&pioB 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioA 5 GPIO_ACTIVE_LOW>; status = "okay"; flash@0 { compatible = "atmel,at45", "atmel,dataflash"; @@ -84,7 +84,7 @@ nand@3 { reg = <0x3 0x0 0x800000>; rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>; - cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>; nand-bus-width = <8>; nand-ecc-mode = "soft"; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi index f2386dcb9ff2c0..dda4fa91b2f2cc 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-imx6ull-opos6ul.dtsi @@ -40,6 +40,9 @@ reg = <1>; interrupt-parent = <&gpio4>; interrupts = <16 IRQ_TYPE_LEVEL_LOW>; + micrel,led-mode = <1>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; status = "okay"; }; }; diff --git a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi index 5f1a6b4b764492..1dad4e4493926f 100644 --- a/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-apq8064.dtsi @@ -213,12 +213,6 @@ }; }; - sfpb_mutex: hwmutex { - compatible = "qcom,sfpb-mutex"; - syscon = <&sfpb_wrapper_mutex 0x604 0x4>; - #hwlock-cells = <1>; - }; - smem { compatible = "qcom,smem"; memory-region = <&smem_region>; @@ -284,6 +278,40 @@ }; }; + replicator { + compatible = "arm,coresight-static-replicator"; + + clocks = <&rpmcc RPM_QDSS_CLK>; + clock-names = "apb_pclk"; + + in-ports { + port { + replicator_in: endpoint { + remote-endpoint = <&funnel_out>; + }; + }; + }; + + out-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + replicator_out0: endpoint { + remote-endpoint = <&etb_in>; + }; + }; + + port@1 { + reg = <1>; + replicator_out1: endpoint { + remote-endpoint = <&tpiu_in>; + }; + }; + }; + }; + soc: soc { #address-cells = <1>; #size-cells = <1>; @@ -305,9 +333,10 @@ pinctrl-0 = <&ps_hold_default_state>; }; - sfpb_wrapper_mutex: syscon@1200000 { - compatible = "syscon"; - reg = <0x01200000 0x8000>; + sfpb_mutex: hwmutex@1200600 { + compatible = "qcom,sfpb-mutex"; + reg = <0x01200600 0x100>; + #hwlock-cells = <1>; }; intc: interrupt-controller@2000000 { @@ -326,6 +355,8 @@ ; reg = <0x0200a000 0x100>; clock-frequency = <27000000>; + clocks = <&sleep_clk>; + clock-names = "sleep"; cpu-offset = <0x80000>; }; @@ -1532,39 +1563,6 @@ }; }; - replicator { - compatible = "arm,coresight-static-replicator"; - - clocks = <&rpmcc RPM_QDSS_CLK>; - clock-names = "apb_pclk"; - - out-ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - replicator_out0: endpoint { - remote-endpoint = <&etb_in>; - }; - }; - port@1 { - reg = <1>; - replicator_out1: endpoint { - remote-endpoint = <&tpiu_in>; - }; - }; - }; - - in-ports { - port { - replicator_in: endpoint { - remote-endpoint = <&funnel_out>; - }; - }; - }; - }; - funnel@1a04000 { compatible = "arm,coresight-dynamic-funnel", "arm,primecell"; reg = <0x1a04000 0x1000>; diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig index f2596a1b2f7d9a..ff13e1ecf4bb95 100644 --- a/arch/arm/configs/at91_dt_defconfig +++ b/arch/arm/configs/at91_dt_defconfig @@ -232,7 +232,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_DEV_ATMEL_AES=y CONFIG_CRYPTO_DEV_ATMEL_TDES=y CONFIG_CRYPTO_DEV_ATMEL_SHA=y -CONFIG_CRC_CCITT=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_ACORN_8x8=y diff --git a/arch/arm/configs/collie_defconfig b/arch/arm/configs/collie_defconfig index 42cb1c8541188c..578c6a4af620d7 100644 --- a/arch/arm/configs/collie_defconfig +++ b/arch/arm/configs/collie_defconfig @@ -78,7 +78,6 @@ CONFIG_ROMFS_FS=y CONFIG_NLS_DEFAULT="cp437" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_FONTS=y CONFIG_FONT_MINI_4x6=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 3474e475373ae8..70b8c78386f405 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -249,7 +249,6 @@ CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=m CONFIG_DMA_CMA=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_RT_MUTEXES=y diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index b382a2e175fbc6..d76eb12d29a759 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -128,7 +128,6 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 7ad48fdda1dac6..e81a5d6c1c2085 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -370,7 +370,6 @@ CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_DEV_EXYNOS_RNG=y CONFIG_CRYPTO_DEV_S5P=y -CONFIG_CRC_CCITT=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=96 CONFIG_FONTS=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 297c6a7b978a69..062c1eb8dd6012 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -481,8 +481,6 @@ CONFIG_SECURITYFS=y CONFIG_CRYPTO_DEV_FSL_CAAM=y CONFIG_CRYPTO_DEV_SAHARA=y CONFIG_CRYPTO_DEV_MXS_DCP=y -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig index 2aa2ac8c6507d8..2d489186e945f3 100644 --- a/arch/arm/configs/lpc18xx_defconfig +++ b/arch/arm/configs/lpc18xx_defconfig @@ -147,7 +147,6 @@ CONFIG_EXT2_FS=y # CONFIG_INOTIFY_USER is not set CONFIG_JFFS2_FS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_CRC_ITU_T=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_MUST_CHECK is not set # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig index 98e267213b214f..9afccd76446b6b 100644 --- a/arch/arm/configs/lpc32xx_defconfig +++ b/arch/arm/configs/lpc32xx_defconfig @@ -179,7 +179,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_CRYPTO_ANSI_CPRNG=y # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig index acd16204f8d7f8..275ddf7a3a14df 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -108,8 +108,6 @@ CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m CONFIG_CRYPTO_CHACHA20_NEON=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=m -CONFIG_CRC_ITU_T=m CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_PRINTK_TIME=y diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index f6f9e135353ed4..842a989baa277d 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -67,7 +67,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig index 27d650635d9b83..1a86dc30552361 100644 --- a/arch/arm/configs/multi_v4t_defconfig +++ b/arch/arm/configs/multi_v4t_defconfig @@ -91,6 +91,5 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_CRAMFS=y CONFIG_MINIX_FS=y -CONFIG_CRC_CCITT=y # CONFIG_FTRACE is not set CONFIG_DEBUG_USER=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index db81862bdb9374..cf6180b4296e98 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -289,7 +289,6 @@ CONFIG_NLS_UTF8=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_CCITT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index a518d4a2581e08..23dbb80fcc2eec 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -187,7 +187,6 @@ CONFIG_NLS_UTF8=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_CCITT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index d8a6e43c401ef2..c76d66135abb19 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -160,7 +160,6 @@ CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_CRYPTO_DEV_MXS_DCP=y -CONFIG_CRC_ITU_T=m CONFIG_FONTS=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 113d6dfe52435e..75b326bc7830ce 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -706,9 +706,6 @@ CONFIG_CRYPTO_DEV_OMAP=m CONFIG_CRYPTO_DEV_OMAP_SHAM=m CONFIG_CRYPTO_DEV_OMAP_AES=m CONFIG_CRYPTO_DEV_OMAP_DES=m -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=y -CONFIG_CRC_ITU_T=y CONFIG_DMA_CMA=y CONFIG_FONTS=y CONFIG_FONT_8x8=y diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 0629b088a584a5..62b9c61027898f 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -136,7 +136,6 @@ CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_DEV_MARVELL_CESA=y -CONFIG_CRC_T10DIF=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/pxa168_defconfig b/arch/arm/configs/pxa168_defconfig index ce10fe2104bf1e..4748c7d33cb8a9 100644 --- a/arch/arm/configs/pxa168_defconfig +++ b/arch/arm/configs/pxa168_defconfig @@ -41,7 +41,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/pxa910_defconfig b/arch/arm/configs/pxa910_defconfig index 1f28aea860146f..49b59c600ae1c2 100644 --- a/arch/arm/configs/pxa910_defconfig +++ b/arch/arm/configs/pxa910_defconfig @@ -50,7 +50,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index de0ac8f521d761..24fca8608554cd 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -663,8 +663,6 @@ CONFIG_CRYPTO_SHA1_ARM=m CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=m CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/arm/configs/s5pv210_defconfig b/arch/arm/configs/s5pv210_defconfig index 5dbe85c263de3c..02121eec365819 100644 --- a/arch/arm/configs/s5pv210_defconfig +++ b/arch/arm/configs/s5pv210_defconfig @@ -113,7 +113,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig index ea7ddf640ba73e..e14720a9a5ac47 100644 --- a/arch/arm/configs/sama7_defconfig +++ b/arch/arm/configs/sama7_defconfig @@ -227,8 +227,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_DEV_ATMEL_AES=y CONFIG_CRYPTO_DEV_ATMEL_TDES=y CONFIG_CRYPTO_DEV_ATMEL_SHA=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_ITU_T=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=32 CONFIG_CMA_ALIGNMENT=9 diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index ac5b7a5aaff680..ffec59e3f49c1d 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -234,7 +234,6 @@ CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m -CONFIG_CRC_CCITT=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index 423bb41c4225ff..dcd9c316072ead 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -74,7 +74,6 @@ CONFIG_EXT3_FS=y # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set CONFIG_NLS=y -CONFIG_CRC_ITU_T=y CONFIG_PRINTK_TIME=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/arm/configs/wpcm450_defconfig b/arch/arm/configs/wpcm450_defconfig index 5e4397f7f828cf..cd4b3e70ff688b 100644 --- a/arch/arm/configs/wpcm450_defconfig +++ b/arch/arm/configs/wpcm450_defconfig @@ -191,8 +191,6 @@ CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y CONFIG_PKCS7_MESSAGE_PARSER=y CONFIG_SYSTEM_TRUSTED_KEYRING=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_ITU_T=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/mach-aspeed/Kconfig b/arch/arm/mach-aspeed/Kconfig index 080019aa6fcd89..fcf287edd0e5e6 100644 --- a/arch/arm/mach-aspeed/Kconfig +++ b/arch/arm/mach-aspeed/Kconfig @@ -2,7 +2,6 @@ menuconfig ARCH_ASPEED bool "Aspeed BMC architectures" depends on (CPU_LITTLE_ENDIAN && ARCH_MULTI_V5) || ARCH_MULTI_V6 || ARCH_MULTI_V7 - select SRAM select WATCHDOG select ASPEED_WATCHDOG select MFD_SYSCON diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a182295e6f08bf..6527d0d5656a13 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -333,9 +333,9 @@ config ARCH_MMAP_RND_BITS_MAX default 24 if ARM64_VA_BITS=39 default 27 if ARM64_VA_BITS=42 default 30 if ARM64_VA_BITS=47 - default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES - default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES - default 33 if ARM64_VA_BITS=48 + default 29 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_64K_PAGES + default 31 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_16K_PAGES + default 33 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) default 14 if ARM64_64K_PAGES default 16 if ARM64_16K_PAGES default 18 diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi index f9f6fea03b7446..bd366389b2389d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi @@ -252,6 +252,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -267,6 +268,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -282,6 +284,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 13a0e63afeaf3d..2c64d834a2c4f7 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -152,28 +152,12 @@ vcc-pg-supply = <®_aldo1>; }; -&r_ir { - linux,rc-map-name = "rc-beelink-gs1"; - status = "okay"; -}; - -&r_pio { - /* - * FIXME: We can't add that supply for now since it would - * create a circular dependency between pinctrl, the regulator - * and the RSB Bus. - * - * vcc-pl-supply = <®_aldo1>; - */ - vcc-pm-supply = <®_aldo1>; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = ; interrupt-controller; @@ -291,6 +275,22 @@ }; }; +&r_ir { + linux,rc-map-name = "rc-beelink-gs1"; + status = "okay"; +}; + +&r_pio { + /* + * PL0 and PL1 are used for PMIC I2C + * don't enable the pl-supply else + * it will fail at boot + * + * vcc-pl-supply = <®_aldo1>; + */ + vcc-pm-supply = <®_aldo1>; +}; + &spdif { pinctrl-names = "default"; pinctrl-0 = <&spdif_tx_pin>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts index ab87c3447cd782..f005072c68a167 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts @@ -176,16 +176,12 @@ vcc-pg-supply = <®_vcc_wifi_io>; }; -&r_ir { - status = "okay"; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = ; interrupt-controller; @@ -296,6 +292,10 @@ }; }; +&r_ir { + status = "okay"; +}; + &rtc { clocks = <&ext_osc32k>; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi index d05dc5d6e6b9f7..e34dbb9920216d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi @@ -113,20 +113,12 @@ vcc-pg-supply = <®_aldo1>; }; -&r_ir { - status = "okay"; -}; - -&r_pio { - vcc-pm-supply = <®_bldo3>; -}; - -&r_rsb { +&r_i2c { status = "okay"; - axp805: pmic@745 { + axp805: pmic@36 { compatible = "x-powers,axp805", "x-powers,axp806"; - reg = <0x745>; + reg = <0x36>; interrupt-parent = <&r_intc>; interrupts = ; interrupt-controller; @@ -241,6 +233,14 @@ }; }; +&r_ir { + status = "okay"; +}; + +&r_pio { + vcc-pm-supply = <®_bldo3>; +}; + &rtc { clocks = <&ext_osc32k>; }; diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index da9de4986660f2..5a72f0b64247d5 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -151,7 +151,7 @@ al,msi-num-spis = <160>; }; - io-fabric@fc000000 { + io-bus@fc000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 8b6156b5af659f..dea60d136c2e3d 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -361,7 +361,7 @@ interrupt-parent = <&gic>; }; - io-fabric@fc000000 { + io-bus@fc000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index ab2b3f15ef1946..69834b49673d40 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -2313,7 +2313,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x19000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -2325,7 +2325,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x1a000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; @@ -2337,7 +2337,7 @@ "amlogic,meson8-pwm-v2"; reg = <0x0 0x1b000 0x0 0x20>; clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; #pwm-cells = <3>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi index de35fa2d7a6de3..8e3e3354ed67a9 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-dreambox.dtsi @@ -116,6 +116,10 @@ status = "okay"; }; +&clkc_audio { + status = "okay"; +}; + &frddr_a { status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index 8ebce7114a60b7..6c134592c7bb81 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -741,7 +741,7 @@ &pwm_ab { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -752,14 +752,14 @@ &pwm_cd { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; &pwm_ef { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 2dc2fdaecf9ff5..19b8a39de6a033 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -811,7 +811,7 @@ &pwm_ab { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; @@ -822,14 +822,14 @@ &pwm_cd { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; &pwm_ef { clocks = <&xtal>, - <>, /* unknown/untested, the datasheet calls it "vid_pll" */ + <0>, /* unknown/untested, the datasheet calls it "vid_pll" */ <&clkc CLKID_FCLK_DIV4>, <&clkc CLKID_FCLK_DIV3>; }; diff --git a/arch/arm64/boot/dts/apple/t8103-j293.dts b/arch/arm64/boot/dts/apple/t8103-j293.dts index 2dfe7b895b2bc0..e2d9439397f71a 100644 --- a/arch/arm64/boot/dts/apple/t8103-j293.dts +++ b/arch/arm64/boot/dts/apple/t8103-j293.dts @@ -77,6 +77,16 @@ }; }; +/* + * The driver depends on boot loader initialized state which resets when this + * power-domain is powered off. This happens on suspend or when the driver is + * missing during boot. Mark the domain as always on until the driver can + * handle this. + */ +&ps_dispdfr_be { + apple,always-on; +}; + &display_dfr { status = "okay"; }; diff --git a/arch/arm64/boot/dts/apple/t8112-j493.dts b/arch/arm64/boot/dts/apple/t8112-j493.dts index 3d73f9ee2f46a3..be86d34c6696cb 100644 --- a/arch/arm64/boot/dts/apple/t8112-j493.dts +++ b/arch/arm64/boot/dts/apple/t8112-j493.dts @@ -40,6 +40,16 @@ }; }; +/* + * The driver depends on boot loader initialized state which resets when this + * power-domain is powered off. This happens on suspend or when the driver is + * missing during boot. Mark the domain as always on until the driver can + * handle this. + */ +&ps_dispdfr_be { + apple,always-on; +}; + &display_dfr { status = "okay"; }; diff --git a/arch/arm64/boot/dts/arm/morello.dtsi b/arch/arm64/boot/dts/arm/morello.dtsi index 0bab0b3ea9693e..5bc1c725dc860b 100644 --- a/arch/arm64/boot/dts/arm/morello.dtsi +++ b/arch/arm64/boot/dts/arm/morello.dtsi @@ -44,7 +44,7 @@ next-level-cache = <&l2_0>; clocks = <&scmi_dvfs 0>; - l2_0: l2-cache-0 { + l2_0: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -53,13 +53,6 @@ cache-sets = <2048>; cache-unified; next-level-cache = <&l3_0>; - - l3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; - cache-size = <0x100000>; - cache-unified; - }; }; }; @@ -78,7 +71,7 @@ next-level-cache = <&l2_1>; clocks = <&scmi_dvfs 0>; - l2_1: l2-cache-1 { + l2_1: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -105,7 +98,7 @@ next-level-cache = <&l2_2>; clocks = <&scmi_dvfs 1>; - l2_2: l2-cache-2 { + l2_2: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -132,7 +125,7 @@ next-level-cache = <&l2_3>; clocks = <&scmi_dvfs 1>; - l2_3: l2-cache-3 { + l2_3: l2-cache { compatible = "cache"; cache-level = <2>; /* 8 ways set associative */ @@ -143,6 +136,13 @@ next-level-cache = <&l3_0>; }; }; + + l3_0: l3-cache { + compatible = "cache"; + cache-level = <3>; + cache-size = <0x100000>; + cache-unified; + }; }; firmware { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts index 97ff1ddd631888..734a75198f06e0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-kit.dts @@ -124,6 +124,7 @@ assigned-clock-parents = <&clk IMX8MM_AUDIO_PLL1_OUT>; assigned-clock-rates = <24576000>; #sound-dai-cells = <0>; + fsl,sai-mclk-direction-output; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 62ed64663f4952..9ba0cb89fa24e0 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -233,6 +233,7 @@ rtc: rtc@51 { compatible = "nxp,pcf85263"; reg = <0x51>; + quartz-load-femtofarads = <12500>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index 7251ad3a0017c8..b46566f3ce2056 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -144,6 +144,19 @@ startup-delay-us = <20000>; }; + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_vsel>; + gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>; + regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; + states = <1800000 0x1>, + <3300000 0x0>; + regulator-name = "PMIC_USDHC_VSELECT"; + vin-supply = <®_nvcc_sd>; + }; + reserved-memory { #address-cells = <2>; #size-cells = <2>; @@ -269,7 +282,7 @@ "SODIMM_19", "", "", - "", + "PMIC_USDHC_VSELECT", "", "", "", @@ -785,6 +798,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>; pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; }; &wdog1 { @@ -1206,13 +1220,17 @@ ; /* SODIMM 76 */ }; + pinctrl_usdhc2_vsel: usdhc2vselgrp { + fsl,pins = + ; /* PMIC_USDHC_VSELECT */ + }; + /* * Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the * on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here. */ pinctrl_usdhc2: usdhc2grp { fsl,pins = - , , /* SODIMM 78 */ , /* SODIMM 74 */ , /* SODIMM 80 */ @@ -1223,7 +1241,6 @@ pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp { fsl,pins = - , , , , @@ -1234,7 +1251,6 @@ pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp { fsl,pins = - , , , , @@ -1246,7 +1262,6 @@ /* Avoid backfeeding with removed card power */ pinctrl_usdhc2_sleep: usdhc2slpgrp { fsl,pins = - , , , , diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts b/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts index 1df5ceb1138793..37fc5ed98d7f61 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-kit.dts @@ -124,6 +124,7 @@ assigned-clock-parents = <&clk IMX8MN_AUDIO_PLL1_OUT>; assigned-clock-rates = <24576000>; #sound-dai-cells = <0>; + fsl,sai-mclk-direction-output; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi index 2a64115eebf1c6..bb11590473a4c7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi @@ -242,6 +242,7 @@ rtc: rtc@51 { compatible = "nxp,pcf85263"; reg = <0x51>; + quartz-load-femtofarads = <12500>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi index 15f7ab58db36cc..88561df70d03ac 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-beacon-som.dtsi @@ -257,6 +257,7 @@ rtc: rtc@51 { compatible = "nxp,pcf85263"; reg = <0x51>; + quartz-load-femtofarads = <12500>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi index a1b75c9068b288..2ce1860b244d5e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-nominal.dtsi @@ -24,6 +24,20 @@ fsl,operating-mode = "nominal"; }; +&gpu2d { + assigned-clocks = <&clk IMX8MP_CLK_GPU2D_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>; +}; + +&gpu3d { + assigned-clocks = <&clk IMX8MP_CLK_GPU3D_CORE>, + <&clk IMX8MP_CLK_GPU3D_SHADER_CORE>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, <800000000>; +}; + &pgc_hdmimix { assigned-clocks = <&clk IMX8MP_CLK_HDMI_AXI>, <&clk IMX8MP_CLK_HDMI_APB>; @@ -46,6 +60,18 @@ assigned-clock-rates = <600000000>, <300000000>; }; +&pgc_mlmix { + assigned-clocks = <&clk IMX8MP_CLK_ML_CORE>, + <&clk IMX8MP_CLK_ML_AXI>, + <&clk IMX8MP_CLK_ML_AHB>; + assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>, + <&clk IMX8MP_SYS_PLL1_800M>; + assigned-clock-rates = <800000000>, + <800000000>, + <300000000>; +}; + &media_blk_ctrl { assigned-clocks = <&clk IMX8MP_CLK_MEDIA_AXI>, <&clk IMX8MP_CLK_MEDIA_APB>, @@ -62,3 +88,5 @@ <0>, <0>, <400000000>, <1039500000>; }; + +/delete-node/ &{noc_opp_table/opp-1000000000}; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi index b2ac2583a59292..b59da91fdd041f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-var-som.dtsi @@ -35,7 +35,6 @@ <0x1 0x00000000 0 0xc0000000>; }; - reg_usdhc2_vmmc: regulator-usdhc2-vmmc { compatible = "regulator-fixed"; regulator-name = "VSD_3V3"; @@ -46,6 +45,16 @@ startup-delay-us = <100>; off-on-delay-us = <12000>; }; + + reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc { + compatible = "regulator-gpio"; + regulator-name = "VSD_VSEL"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + gpios = <&gpio2 12 GPIO_ACTIVE_HIGH>; + states = <3300000 0x0 1800000 0x1>; + vin-supply = <&ldo5>; + }; }; &A53_0 { @@ -205,6 +214,7 @@ pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_gpio>; cd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; vmmc-supply = <®_usdhc2_vmmc>; + vqmmc-supply = <®_usdhc2_vqmmc>; bus-width = <4>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ce6793b2d57eef..7c1c87eab54cc6 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1645,6 +1645,12 @@ opp-hz = /bits/ 64 <200000000>; }; + /* Nominal drive mode maximum */ + opp-800000000 { + opp-hz = /bits/ 64 <800000000>; + }; + + /* Overdrive mode maximum */ opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; }; diff --git a/arch/arm64/boot/dts/freescale/imx95.dtsi b/arch/arm64/boot/dts/freescale/imx95.dtsi index 9bb26b466a061a..59f057ba6fa7ff 100644 --- a/arch/arm64/boot/dts/freescale/imx95.dtsi +++ b/arch/arm64/boot/dts/freescale/imx95.dtsi @@ -1626,7 +1626,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x60100000 0 0xfe00000>, <0 0x4c360000 0 0x10000>, - <0 0x4c340000 0 0x2000>; + <0 0x4c340000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0x0 0x00000000 0x0 0x6ff00000 0 0x00100000>, <0x82000000 0x0 0x10000000 0x9 0x10000000 0 0x10000000>; @@ -1673,7 +1673,7 @@ reg = <0 0x4c300000 0 0x10000>, <0 0x4c360000 0 0x1000>, <0 0x4c320000 0 0x1000>, - <0 0x4c340000 0 0x2000>, + <0 0x4c340000 0 0x4000>, <0 0x4c370000 0 0x10000>, <0x9 0 1 0>; reg-names = "dbi","atu", "dbi2", "app", "dma", "addr_space"; @@ -1700,7 +1700,7 @@ reg = <0 0x4c380000 0 0x10000>, <8 0x80100000 0 0xfe00000>, <0 0x4c3e0000 0 0x10000>, - <0 0x4c3c0000 0 0x2000>; + <0 0x4c3c0000 0 0x4000>; reg-names = "dbi", "config", "atu", "app"; ranges = <0x81000000 0 0x00000000 0x8 0x8ff00000 0 0x00100000>, <0x82000000 0 0x10000000 0xa 0x10000000 0 0x10000000>; @@ -1749,7 +1749,7 @@ reg = <0 0x4c380000 0 0x10000>, <0 0x4c3e0000 0 0x1000>, <0 0x4c3a0000 0 0x1000>, - <0 0x4c3c0000 0 0x2000>, + <0 0x4c3c0000 0 0x4000>, <0 0x4c3f0000 0 0x10000>, <0xa 0 1 0>; reg-names = "dbi", "atu", "dbi2", "app", "dma", "addr_space"; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi index 51c6e19e40b843..7d9394a0430272 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex5.dtsi @@ -222,9 +222,9 @@ status = "disabled"; }; - gpio0: gpio@ffc03200 { + gpio0: gpio@10c03200 { compatible = "snps,dw-apb-gpio"; - reg = <0xffc03200 0x100>; + reg = <0x10c03200 0x100>; #address-cells = <1>; #size-cells = <0>; resets = <&rst GPIO0_RESET>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi index 3a9b6907185d03..24282084570787 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi @@ -26,6 +26,8 @@ leds { compatible = "gpio-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&spi_quad_pins>; led-power1 { label = "udpu:green:power"; @@ -82,8 +84,6 @@ &spi0 { status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&spi_quad_pins>; flash@0 { compatible = "jedec,spi-nor"; @@ -108,6 +108,10 @@ }; }; +&spi_quad_pins { + function = "gpio"; +}; + &pinctrl_nb { i2c2_recovery_pins: i2c2-recovery-pins { groups = "i2c2"; diff --git a/arch/arm64/boot/dts/mediatek/mt6357.dtsi b/arch/arm64/boot/dts/mediatek/mt6357.dtsi index 5fafa842d312f3..dca4e5c3d8e210 100644 --- a/arch/arm64/boot/dts/mediatek/mt6357.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt6357.dtsi @@ -60,7 +60,6 @@ }; mt6357_vfe28_reg: ldo-vfe28 { - compatible = "regulator-fixed"; regulator-name = "vfe28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -75,7 +74,6 @@ }; mt6357_vrf18_reg: ldo-vrf18 { - compatible = "regulator-fixed"; regulator-name = "vrf18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -83,7 +81,6 @@ }; mt6357_vrf12_reg: ldo-vrf12 { - compatible = "regulator-fixed"; regulator-name = "vrf12"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; @@ -112,7 +109,6 @@ }; mt6357_vcn28_reg: ldo-vcn28 { - compatible = "regulator-fixed"; regulator-name = "vcn28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -120,7 +116,6 @@ }; mt6357_vcn18_reg: ldo-vcn18 { - compatible = "regulator-fixed"; regulator-name = "vcn18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -142,7 +137,6 @@ }; mt6357_vcamio_reg: ldo-vcamio18 { - compatible = "regulator-fixed"; regulator-name = "vcamio"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -175,7 +169,6 @@ }; mt6357_vaux18_reg: ldo-vaux18 { - compatible = "regulator-fixed"; regulator-name = "vaux18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; @@ -183,7 +176,6 @@ }; mt6357_vaud28_reg: ldo-vaud28 { - compatible = "regulator-fixed"; regulator-name = "vaud28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -191,7 +183,6 @@ }; mt6357_vio28_reg: ldo-vio28 { - compatible = "regulator-fixed"; regulator-name = "vio28"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; @@ -199,7 +190,6 @@ }; mt6357_vio18_reg: ldo-vio18 { - compatible = "regulator-fixed"; regulator-name = "vio18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; diff --git a/arch/arm64/boot/dts/mediatek/mt6359.dtsi b/arch/arm64/boot/dts/mediatek/mt6359.dtsi index 7b10f9c59819a9..467d8a4c2aa7f1 100644 --- a/arch/arm64/boot/dts/mediatek/mt6359.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt6359.dtsi @@ -20,6 +20,8 @@ }; regulators { + compatible = "mediatek,mt6359-regulator"; + mt6359_vs1_buck_reg: buck_vs1 { regulator-name = "vs1"; regulator-min-microvolt = <800000>; @@ -298,7 +300,7 @@ }; }; - mt6359rtc: mt6359rtc { + mt6359rtc: rtc { compatible = "mediatek,mt6358-rtc"; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index e1495f1900a7b4..f9ca6b3720e915 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -259,14 +259,10 @@ }; }; }; +}; - ports { - port { - dsi_out: endpoint { - remote-endpoint = <&panel_in>; - }; - }; - }; +&dsi_out { + remote-endpoint = <&panel_in>; }; &gic { diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 0aa34e5bbaaa87..3c1fe80e64b9c5 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1836,6 +1836,10 @@ phys = <&mipi_tx0>; phy-names = "dphy"; status = "disabled"; + + port { + dsi_out: endpoint { }; + }; }; dpi0: dpi@14015000 { diff --git a/arch/arm64/boot/dts/mediatek/mt8188.dtsi b/arch/arm64/boot/dts/mediatek/mt8188.dtsi index 69a8423d385890..29d35ca945973c 100644 --- a/arch/arm64/boot/dts/mediatek/mt8188.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8188.dtsi @@ -2579,7 +2579,7 @@ reg = <0 0x1c002000 0 0x1000>; clocks = <&vdosys0 CLK_VDO0_DISP_RDMA0>; interrupts = ; - iommus = <&vdo_iommu M4U_PORT_L1_DISP_RDMA0>; + iommus = <&vpp_iommu M4U_PORT_L1_DISP_RDMA0>; power-domains = <&spm MT8188_POWER_DOMAIN_VDOSYS0>; mediatek,gce-client-reg = <&gce0 SUBSYS_1c00XXXX 0x2000 0x1000>; diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 4f2dc0a7556610..1ded4b3f87605f 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -617,22 +617,6 @@ #size-cells = <0>; #power-domain-cells = <1>; - power-domain@MT8195_POWER_DOMAIN_VDEC1 { - reg = ; - clocks = <&vdecsys CLK_VDEC_LARB1>; - clock-names = "vdec1-0"; - mediatek,infracfg = <&infracfg_ao>; - #power-domain-cells = <0>; - }; - - power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { - reg = ; - clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; - clock-names = "venc1-larb"; - mediatek,infracfg = <&infracfg_ao>; - #power-domain-cells = <0>; - }; - power-domain@MT8195_POWER_DOMAIN_VDOSYS0 { reg = ; clocks = <&topckgen CLK_TOP_CFG_VDO0>, @@ -678,15 +662,25 @@ clocks = <&vdecsys_soc CLK_VDEC_SOC_LARB1>; clock-names = "vdec0-0"; mediatek,infracfg = <&infracfg_ao>; + #address-cells = <1>; + #size-cells = <0>; #power-domain-cells = <0>; - }; - power-domain@MT8195_POWER_DOMAIN_VDEC2 { - reg = ; - clocks = <&vdecsys_core1 CLK_VDEC_CORE1_LARB1>; - clock-names = "vdec2-0"; - mediatek,infracfg = <&infracfg_ao>; - #power-domain-cells = <0>; + power-domain@MT8195_POWER_DOMAIN_VDEC1 { + reg = ; + clocks = <&vdecsys CLK_VDEC_LARB1>; + clock-names = "vdec1-0"; + mediatek,infracfg = <&infracfg_ao>; + #power-domain-cells = <0>; + }; + + power-domain@MT8195_POWER_DOMAIN_VDEC2 { + reg = ; + clocks = <&vdecsys_core1 CLK_VDEC_CORE1_LARB1>; + clock-names = "vdec2-0"; + mediatek,infracfg = <&infracfg_ao>; + #power-domain-cells = <0>; + }; }; power-domain@MT8195_POWER_DOMAIN_VENC { @@ -694,7 +688,17 @@ clocks = <&vencsys CLK_VENC_LARB>; clock-names = "venc0-larb"; mediatek,infracfg = <&infracfg_ao>; + #address-cells = <1>; + #size-cells = <0>; #power-domain-cells = <0>; + + power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { + reg = ; + clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; + clock-names = "venc1-larb"; + mediatek,infracfg = <&infracfg_ao>; + #power-domain-cells = <0>; + }; }; power-domain@MT8195_POWER_DOMAIN_VDOSYS1 { diff --git a/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi b/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi index 60139e6dffd8e0..6a75b230282eda 100644 --- a/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8390-genio-common.dtsi @@ -1199,8 +1199,18 @@ }; &ssusb2 { + /* + * the ssusb2 controller is one but we got two ports : one is routed + * to the M.2 slot, the other is on the RPi header who does support + * full OTG. + * As the controller is shared between them, the role switch default + * mode is set to host to make any peripheral inserted in the M.2 + * slot (i.e BT/WIFI module) be detected when the other port is + * unused. + */ dr_mode = "otg"; maximum-speed = "high-speed"; + role-switch-default-mode = "host"; usb-role-switch; vusb33-supply = <&mt6359_vusb_ldo_reg>; wakeup-source; @@ -1211,7 +1221,7 @@ connector { compatible = "gpio-usb-b-connector", "usb-b-connector"; type = "micro"; - id-gpios = <&pio 89 GPIO_ACTIVE_HIGH>; + id-gpios = <&pio 89 GPIO_ACTIVE_LOW>; vbus-supply = <&usb_p2_vbus>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 2b3bb5d0af17bd..f0b7949df92c05 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -621,9 +621,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTB>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTB>; - reset-names = "serial"; status = "disabled"; }; @@ -633,9 +631,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTD>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTD>; - reset-names = "serial"; status = "disabled"; }; @@ -645,9 +641,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTE>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTE>; - reset-names = "serial"; status = "disabled"; }; @@ -657,9 +651,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTF>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTF>; - reset-names = "serial"; status = "disabled"; }; @@ -1236,9 +1228,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTC>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTC>; - reset-names = "serial"; status = "disabled"; }; @@ -1248,9 +1238,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA186_CLK_UARTG>; - clock-names = "serial"; resets = <&bpmp TEGRA186_RESET_UARTG>; - reset-names = "serial"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 33f92b77cd9d9e..c3695077478514 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -766,9 +766,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTD>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTD>; - reset-names = "serial"; status = "disabled"; }; @@ -778,9 +776,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTE>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTE>; - reset-names = "serial"; status = "disabled"; }; @@ -790,9 +786,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTF>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTF>; - reset-names = "serial"; status = "disabled"; }; @@ -817,9 +811,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTH>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTH>; - reset-names = "serial"; status = "disabled"; }; @@ -1616,9 +1608,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTC>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTC>; - reset-names = "serial"; status = "disabled"; }; @@ -1628,9 +1618,7 @@ reg-shift = <2>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_UARTG>; - clock-names = "serial"; resets = <&bpmp TEGRA194_RESET_UARTG>; - reset-names = "serial"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index 9b9d1d15b0c7ea..1bb1f9640a800a 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -11,6 +11,7 @@ rtc0 = "/i2c@7000d000/pmic@3c"; rtc1 = "/rtc@7000e000"; serial0 = &uarta; + serial3 = &uartd; }; chosen { diff --git a/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi b/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi index ae12f069f26fa5..b24b795873d416 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574-rdp-common.dtsi @@ -111,6 +111,13 @@ regulator-always-on; regulator-boot-on; }; + + mp5496_l5: l5 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + regulator-boot-on; + }; }; }; @@ -146,7 +153,7 @@ }; &usb_0_qmpphy { - vdda-pll-supply = <&mp5496_l2>; + vdda-pll-supply = <&mp5496_l5>; vdda-phy-supply = <®ulator_fixed_0p925>; status = "okay"; @@ -154,7 +161,7 @@ &usb_0_qusbphy { vdd-supply = <®ulator_fixed_0p925>; - vdda-pll-supply = <&mp5496_l2>; + vdda-pll-supply = <&mp5496_l5>; vdda-phy-dpdm-supply = <®ulator_fixed_3p3>; status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi index 94229002897257..b790a6b288abb8 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi @@ -378,6 +378,8 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <1>; + qcom,num-ees = <4>; + num-channels = <16>; qcom,controlled-remotely; }; @@ -972,14 +974,14 @@ ranges = <0x01000000 0x0 0x00000000 0x18200000 0x0 0x100000>, <0x02000000 0x0 0x18300000 0x18300000 0x0 0x7d00000>; - interrupts = , - , - , - , - , - , - , - ; + interrupts = , + , + , + , + , + , + , + ; interrupt-names = "msi0", "msi1", "msi2", diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index c2caad85c668df..fa6769320a238c 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -2,6 +2,7 @@ /* Copyright (c) 2016, The Linux Foundation. All rights reserved. */ #include +#include #include #include #include @@ -2790,11 +2791,11 @@ "gpll0_div"; clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>, <&gcc GCC_MMSS_GPLL0_CLK>, - <&mdss_dsi0_phy 1>, - <&mdss_dsi0_phy 0>, - <&mdss_dsi1_phy 1>, - <&mdss_dsi1_phy 0>, - <&mdss_hdmi_phy 0>, + <&mdss_dsi0_phy DSI_PIXEL_PLL_CLK>, + <&mdss_dsi0_phy DSI_BYTE_PLL_CLK>, + <&mdss_dsi1_phy DSI_PIXEL_PLL_CLK>, + <&mdss_dsi1_phy DSI_BYTE_PLL_CLK>, + <&mdss_hdmi_phy>, <0>, <0>, <&gcc GCC_MMSS_GPLL0_DIV_CLK>; @@ -2932,8 +2933,8 @@ "bus"; assigned-clocks = <&mmcc BYTE0_CLK_SRC>, <&mmcc PCLK0_CLK_SRC>; - assigned-clock-parents = <&mdss_dsi0_phy 0>, - <&mdss_dsi0_phy 1>; + assigned-clock-parents = <&mdss_dsi0_phy DSI_BYTE_PLL_CLK>, + <&mdss_dsi0_phy DSI_PIXEL_PLL_CLK>; operating-points-v2 = <&dsi_opp_table>; power-domains = <&rpmpd MSM8998_VDDCX>; @@ -3008,8 +3009,8 @@ "bus"; assigned-clocks = <&mmcc BYTE1_CLK_SRC>, <&mmcc PCLK1_CLK_SRC>; - assigned-clock-parents = <&mdss_dsi1_phy 0>, - <&mdss_dsi1_phy 1>; + assigned-clock-parents = <&mdss_dsi1_phy DSI_BYTE_PLL_CLK>, + <&mdss_dsi1_phy DSI_PIXEL_PLL_CLK>; operating-points-v2 = <&dsi_opp_table>; power-domains = <&rpmpd MSM8998_VDDCX>; diff --git a/arch/arm64/boot/dts/qcom/qcm2290.dtsi b/arch/arm64/boot/dts/qcom/qcm2290.dtsi index f0746123e594d5..6e3e57dd02612f 100644 --- a/arch/arm64/boot/dts/qcom/qcm2290.dtsi +++ b/arch/arm64/boot/dts/qcom/qcm2290.dtsi @@ -1073,7 +1073,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1092,7 +1092,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; @@ -1137,7 +1137,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1184,7 +1184,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1231,7 +1231,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1278,7 +1278,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; @@ -1297,7 +1297,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; @@ -1342,7 +1342,7 @@ interconnects = <&qup_virt MASTER_QUP_CORE_0 RPM_ALWAYS_TAG &qup_virt SLAVE_QUP_CORE_0 RPM_ALWAYS_TAG>, <&bimc MASTER_APPSS_PROC RPM_ALWAYS_TAG - &config_noc MASTER_APPSS_PROC RPM_ALWAYS_TAG>; + &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>; interconnect-names = "qup-core", "qup-config"; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/qcs615.dtsi b/arch/arm64/boot/dts/qcom/qcs615.dtsi index f4abfad474ea62..12065484904380 100644 --- a/arch/arm64/boot/dts/qcom/qcs615.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs615.dtsi @@ -1022,10 +1022,10 @@ "bus_aggr_clk", "iface_clk", "core_clk_unipro", - "core_clk_ice", "ref_clk", "tx_lane0_sync_clk", - "rx_lane0_sync_clk"; + "rx_lane0_sync_clk", + "ice_core_clk"; resets = <&gcc GCC_UFS_PHY_BCR>; reset-names = "rst"; @@ -1060,10 +1060,10 @@ /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <37500000>, - /bits/ 64 <75000000>, /bits/ 64 <0>, /bits/ 64 <0>, - /bits/ 64 <0>; + /bits/ 64 <0>, + /bits/ 64 <75000000>; required-opps = <&rpmhpd_opp_low_svs>; }; @@ -1072,10 +1072,10 @@ /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <75000000>, - /bits/ 64 <150000000>, /bits/ 64 <0>, /bits/ 64 <0>, - /bits/ 64 <0>; + /bits/ 64 <0>, + /bits/ 64 <150000000>; required-opps = <&rpmhpd_opp_svs>; }; @@ -1084,10 +1084,10 @@ /bits/ 64 <0>, /bits/ 64 <0>, /bits/ 64 <150000000>, - /bits/ 64 <300000000>, /bits/ 64 <0>, /bits/ 64 <0>, - /bits/ 64 <0>; + /bits/ 64 <0>, + /bits/ 64 <300000000>; required-opps = <&rpmhpd_opp_nom>; }; }; @@ -3304,7 +3304,6 @@ #interrupt-cells = <4>; #address-cells = <2>; #size-cells = <0>; - cell-index = <0>; qcom,channel = <0>; qcom,ee = <0>; }; diff --git a/arch/arm64/boot/dts/qcom/qcs8300.dtsi b/arch/arm64/boot/dts/qcom/qcs8300.dtsi index 4a057f7c0d9fae..13b1121cdf175b 100644 --- a/arch/arm64/boot/dts/qcom/qcs8300.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs8300.dtsi @@ -798,18 +798,6 @@ <&apps_smmu 0x481 0x00>; }; - crypto: crypto@1dfa000 { - compatible = "qcom,qcs8300-qce", "qcom,qce"; - reg = <0x0 0x01dfa000 0x0 0x6000>; - dmas = <&cryptobam 4>, <&cryptobam 5>; - dma-names = "rx", "tx"; - iommus = <&apps_smmu 0x480 0x00>, - <&apps_smmu 0x481 0x00>; - interconnects = <&aggre2_noc MASTER_CRYPTO_CORE0 QCOM_ICC_TAG_ALWAYS - &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; - interconnect-names = "memory"; - }; - ice: crypto@1d88000 { compatible = "qcom,qcs8300-inline-crypto-engine", "qcom,inline-crypto-engine"; diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 3394ae2d130034..2010b7988b6cc4 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -2413,22 +2413,13 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <20>; qcom,controlled-remotely; iommus = <&apps_smmu 0x480 0x00>, <&apps_smmu 0x481 0x00>; }; - crypto: crypto@1dfa000 { - compatible = "qcom,sa8775p-qce", "qcom,qce"; - reg = <0x0 0x01dfa000 0x0 0x6000>; - dmas = <&cryptobam 4>, <&cryptobam 5>; - dma-names = "rx", "tx"; - iommus = <&apps_smmu 0x480 0x00>, - <&apps_smmu 0x481 0x00>; - interconnects = <&aggre2_noc MASTER_CRYPTO_CORE0 0 &mc_virt SLAVE_EBI1 0>; - interconnect-names = "memory"; - }; - stm: stm@4002000 { compatible = "arm,coresight-stm", "arm,primecell"; reg = <0x0 0x4002000 0x0 0x1000>, @@ -4903,15 +4894,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <1>; iommus = <&apps_smmu 0x2141 0x04a0>, - <&apps_smmu 0x2161 0x04a0>, - <&apps_smmu 0x2181 0x0400>, - <&apps_smmu 0x21c1 0x04a0>, - <&apps_smmu 0x21e1 0x04a0>, - <&apps_smmu 0x2541 0x04a0>, - <&apps_smmu 0x2561 0x04a0>, - <&apps_smmu 0x2581 0x0400>, - <&apps_smmu 0x25c1 0x04a0>, - <&apps_smmu 0x25e1 0x04a0>; + <&apps_smmu 0x2181 0x0400>; dma-coherent; }; @@ -4919,15 +4902,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <2>; iommus = <&apps_smmu 0x2142 0x04a0>, - <&apps_smmu 0x2162 0x04a0>, - <&apps_smmu 0x2182 0x0400>, - <&apps_smmu 0x21c2 0x04a0>, - <&apps_smmu 0x21e2 0x04a0>, - <&apps_smmu 0x2542 0x04a0>, - <&apps_smmu 0x2562 0x04a0>, - <&apps_smmu 0x2582 0x0400>, - <&apps_smmu 0x25c2 0x04a0>, - <&apps_smmu 0x25e2 0x04a0>; + <&apps_smmu 0x2182 0x0400>; dma-coherent; }; @@ -4935,15 +4910,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <3>; iommus = <&apps_smmu 0x2143 0x04a0>, - <&apps_smmu 0x2163 0x04a0>, - <&apps_smmu 0x2183 0x0400>, - <&apps_smmu 0x21c3 0x04a0>, - <&apps_smmu 0x21e3 0x04a0>, - <&apps_smmu 0x2543 0x04a0>, - <&apps_smmu 0x2563 0x04a0>, - <&apps_smmu 0x2583 0x0400>, - <&apps_smmu 0x25c3 0x04a0>, - <&apps_smmu 0x25e3 0x04a0>; + <&apps_smmu 0x2183 0x0400>; dma-coherent; }; @@ -4951,15 +4918,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <4>; iommus = <&apps_smmu 0x2144 0x04a0>, - <&apps_smmu 0x2164 0x04a0>, - <&apps_smmu 0x2184 0x0400>, - <&apps_smmu 0x21c4 0x04a0>, - <&apps_smmu 0x21e4 0x04a0>, - <&apps_smmu 0x2544 0x04a0>, - <&apps_smmu 0x2564 0x04a0>, - <&apps_smmu 0x2584 0x0400>, - <&apps_smmu 0x25c4 0x04a0>, - <&apps_smmu 0x25e4 0x04a0>; + <&apps_smmu 0x2184 0x0400>; dma-coherent; }; @@ -4967,15 +4926,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <5>; iommus = <&apps_smmu 0x2145 0x04a0>, - <&apps_smmu 0x2165 0x04a0>, - <&apps_smmu 0x2185 0x0400>, - <&apps_smmu 0x21c5 0x04a0>, - <&apps_smmu 0x21e5 0x04a0>, - <&apps_smmu 0x2545 0x04a0>, - <&apps_smmu 0x2565 0x04a0>, - <&apps_smmu 0x2585 0x0400>, - <&apps_smmu 0x25c5 0x04a0>, - <&apps_smmu 0x25e5 0x04a0>; + <&apps_smmu 0x2185 0x0400>; dma-coherent; }; @@ -4983,15 +4934,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <6>; iommus = <&apps_smmu 0x2146 0x04a0>, - <&apps_smmu 0x2166 0x04a0>, - <&apps_smmu 0x2186 0x0400>, - <&apps_smmu 0x21c6 0x04a0>, - <&apps_smmu 0x21e6 0x04a0>, - <&apps_smmu 0x2546 0x04a0>, - <&apps_smmu 0x2566 0x04a0>, - <&apps_smmu 0x2586 0x0400>, - <&apps_smmu 0x25c6 0x04a0>, - <&apps_smmu 0x25e6 0x04a0>; + <&apps_smmu 0x2186 0x0400>; dma-coherent; }; @@ -4999,15 +4942,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <7>; iommus = <&apps_smmu 0x2147 0x04a0>, - <&apps_smmu 0x2167 0x04a0>, - <&apps_smmu 0x2187 0x0400>, - <&apps_smmu 0x21c7 0x04a0>, - <&apps_smmu 0x21e7 0x04a0>, - <&apps_smmu 0x2547 0x04a0>, - <&apps_smmu 0x2567 0x04a0>, - <&apps_smmu 0x2587 0x0400>, - <&apps_smmu 0x25c7 0x04a0>, - <&apps_smmu 0x25e7 0x04a0>; + <&apps_smmu 0x2187 0x0400>; dma-coherent; }; @@ -5015,15 +4950,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <8>; iommus = <&apps_smmu 0x2148 0x04a0>, - <&apps_smmu 0x2168 0x04a0>, - <&apps_smmu 0x2188 0x0400>, - <&apps_smmu 0x21c8 0x04a0>, - <&apps_smmu 0x21e8 0x04a0>, - <&apps_smmu 0x2548 0x04a0>, - <&apps_smmu 0x2568 0x04a0>, - <&apps_smmu 0x2588 0x0400>, - <&apps_smmu 0x25c8 0x04a0>, - <&apps_smmu 0x25e8 0x04a0>; + <&apps_smmu 0x2188 0x0400>; dma-coherent; }; @@ -5031,31 +4958,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <9>; iommus = <&apps_smmu 0x2149 0x04a0>, - <&apps_smmu 0x2169 0x04a0>, - <&apps_smmu 0x2189 0x0400>, - <&apps_smmu 0x21c9 0x04a0>, - <&apps_smmu 0x21e9 0x04a0>, - <&apps_smmu 0x2549 0x04a0>, - <&apps_smmu 0x2569 0x04a0>, - <&apps_smmu 0x2589 0x0400>, - <&apps_smmu 0x25c9 0x04a0>, - <&apps_smmu 0x25e9 0x04a0>; - dma-coherent; - }; - - compute-cb@10 { - compatible = "qcom,fastrpc-compute-cb"; - reg = <10>; - iommus = <&apps_smmu 0x214a 0x04a0>, - <&apps_smmu 0x216a 0x04a0>, - <&apps_smmu 0x218a 0x0400>, - <&apps_smmu 0x21ca 0x04a0>, - <&apps_smmu 0x21ea 0x04a0>, - <&apps_smmu 0x254a 0x04a0>, - <&apps_smmu 0x256a 0x04a0>, - <&apps_smmu 0x258a 0x0400>, - <&apps_smmu 0x25ca 0x04a0>, - <&apps_smmu 0x25ea 0x04a0>; + <&apps_smmu 0x2189 0x0400>; dma-coherent; }; @@ -5063,15 +4966,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <11>; iommus = <&apps_smmu 0x214b 0x04a0>, - <&apps_smmu 0x216b 0x04a0>, - <&apps_smmu 0x218b 0x0400>, - <&apps_smmu 0x21cb 0x04a0>, - <&apps_smmu 0x21eb 0x04a0>, - <&apps_smmu 0x254b 0x04a0>, - <&apps_smmu 0x256b 0x04a0>, - <&apps_smmu 0x258b 0x0400>, - <&apps_smmu 0x25cb 0x04a0>, - <&apps_smmu 0x25eb 0x04a0>; + <&apps_smmu 0x218b 0x0400>; dma-coherent; }; }; @@ -5131,15 +5026,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <1>; iommus = <&apps_smmu 0x2941 0x04a0>, - <&apps_smmu 0x2961 0x04a0>, - <&apps_smmu 0x2981 0x0400>, - <&apps_smmu 0x29c1 0x04a0>, - <&apps_smmu 0x29e1 0x04a0>, - <&apps_smmu 0x2d41 0x04a0>, - <&apps_smmu 0x2d61 0x04a0>, - <&apps_smmu 0x2d81 0x0400>, - <&apps_smmu 0x2dc1 0x04a0>, - <&apps_smmu 0x2de1 0x04a0>; + <&apps_smmu 0x2981 0x0400>; dma-coherent; }; @@ -5147,15 +5034,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <2>; iommus = <&apps_smmu 0x2942 0x04a0>, - <&apps_smmu 0x2962 0x04a0>, - <&apps_smmu 0x2982 0x0400>, - <&apps_smmu 0x29c2 0x04a0>, - <&apps_smmu 0x29e2 0x04a0>, - <&apps_smmu 0x2d42 0x04a0>, - <&apps_smmu 0x2d62 0x04a0>, - <&apps_smmu 0x2d82 0x0400>, - <&apps_smmu 0x2dc2 0x04a0>, - <&apps_smmu 0x2de2 0x04a0>; + <&apps_smmu 0x2982 0x0400>; dma-coherent; }; @@ -5163,15 +5042,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <3>; iommus = <&apps_smmu 0x2943 0x04a0>, - <&apps_smmu 0x2963 0x04a0>, - <&apps_smmu 0x2983 0x0400>, - <&apps_smmu 0x29c3 0x04a0>, - <&apps_smmu 0x29e3 0x04a0>, - <&apps_smmu 0x2d43 0x04a0>, - <&apps_smmu 0x2d63 0x04a0>, - <&apps_smmu 0x2d83 0x0400>, - <&apps_smmu 0x2dc3 0x04a0>, - <&apps_smmu 0x2de3 0x04a0>; + <&apps_smmu 0x2983 0x0400>; dma-coherent; }; @@ -5179,15 +5050,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <4>; iommus = <&apps_smmu 0x2944 0x04a0>, - <&apps_smmu 0x2964 0x04a0>, - <&apps_smmu 0x2984 0x0400>, - <&apps_smmu 0x29c4 0x04a0>, - <&apps_smmu 0x29e4 0x04a0>, - <&apps_smmu 0x2d44 0x04a0>, - <&apps_smmu 0x2d64 0x04a0>, - <&apps_smmu 0x2d84 0x0400>, - <&apps_smmu 0x2dc4 0x04a0>, - <&apps_smmu 0x2de4 0x04a0>; + <&apps_smmu 0x2984 0x0400>; dma-coherent; }; @@ -5195,15 +5058,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <5>; iommus = <&apps_smmu 0x2945 0x04a0>, - <&apps_smmu 0x2965 0x04a0>, - <&apps_smmu 0x2985 0x0400>, - <&apps_smmu 0x29c5 0x04a0>, - <&apps_smmu 0x29e5 0x04a0>, - <&apps_smmu 0x2d45 0x04a0>, - <&apps_smmu 0x2d65 0x04a0>, - <&apps_smmu 0x2d85 0x0400>, - <&apps_smmu 0x2dc5 0x04a0>, - <&apps_smmu 0x2de5 0x04a0>; + <&apps_smmu 0x2985 0x0400>; dma-coherent; }; @@ -5211,15 +5066,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <6>; iommus = <&apps_smmu 0x2946 0x04a0>, - <&apps_smmu 0x2966 0x04a0>, - <&apps_smmu 0x2986 0x0400>, - <&apps_smmu 0x29c6 0x04a0>, - <&apps_smmu 0x29e6 0x04a0>, - <&apps_smmu 0x2d46 0x04a0>, - <&apps_smmu 0x2d66 0x04a0>, - <&apps_smmu 0x2d86 0x0400>, - <&apps_smmu 0x2dc6 0x04a0>, - <&apps_smmu 0x2de6 0x04a0>; + <&apps_smmu 0x2986 0x0400>; dma-coherent; }; @@ -5227,15 +5074,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <7>; iommus = <&apps_smmu 0x2947 0x04a0>, - <&apps_smmu 0x2967 0x04a0>, - <&apps_smmu 0x2987 0x0400>, - <&apps_smmu 0x29c7 0x04a0>, - <&apps_smmu 0x29e7 0x04a0>, - <&apps_smmu 0x2d47 0x04a0>, - <&apps_smmu 0x2d67 0x04a0>, - <&apps_smmu 0x2d87 0x0400>, - <&apps_smmu 0x2dc7 0x04a0>, - <&apps_smmu 0x2de7 0x04a0>; + <&apps_smmu 0x2987 0x0400>; dma-coherent; }; @@ -5243,15 +5082,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <8>; iommus = <&apps_smmu 0x2948 0x04a0>, - <&apps_smmu 0x2968 0x04a0>, - <&apps_smmu 0x2988 0x0400>, - <&apps_smmu 0x29c8 0x04a0>, - <&apps_smmu 0x29e8 0x04a0>, - <&apps_smmu 0x2d48 0x04a0>, - <&apps_smmu 0x2d68 0x04a0>, - <&apps_smmu 0x2d88 0x0400>, - <&apps_smmu 0x2dc8 0x04a0>, - <&apps_smmu 0x2de8 0x04a0>; + <&apps_smmu 0x2988 0x0400>; dma-coherent; }; @@ -5259,15 +5090,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <9>; iommus = <&apps_smmu 0x2949 0x04a0>, - <&apps_smmu 0x2969 0x04a0>, - <&apps_smmu 0x2989 0x0400>, - <&apps_smmu 0x29c9 0x04a0>, - <&apps_smmu 0x29e9 0x04a0>, - <&apps_smmu 0x2d49 0x04a0>, - <&apps_smmu 0x2d69 0x04a0>, - <&apps_smmu 0x2d89 0x0400>, - <&apps_smmu 0x2dc9 0x04a0>, - <&apps_smmu 0x2de9 0x04a0>; + <&apps_smmu 0x2989 0x0400>; dma-coherent; }; @@ -5275,15 +5098,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <10>; iommus = <&apps_smmu 0x294a 0x04a0>, - <&apps_smmu 0x296a 0x04a0>, - <&apps_smmu 0x298a 0x0400>, - <&apps_smmu 0x29ca 0x04a0>, - <&apps_smmu 0x29ea 0x04a0>, - <&apps_smmu 0x2d4a 0x04a0>, - <&apps_smmu 0x2d6a 0x04a0>, - <&apps_smmu 0x2d8a 0x0400>, - <&apps_smmu 0x2dca 0x04a0>, - <&apps_smmu 0x2dea 0x04a0>; + <&apps_smmu 0x298a 0x0400>; dma-coherent; }; @@ -5291,15 +5106,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <11>; iommus = <&apps_smmu 0x294b 0x04a0>, - <&apps_smmu 0x296b 0x04a0>, - <&apps_smmu 0x298b 0x0400>, - <&apps_smmu 0x29cb 0x04a0>, - <&apps_smmu 0x29eb 0x04a0>, - <&apps_smmu 0x2d4b 0x04a0>, - <&apps_smmu 0x2d6b 0x04a0>, - <&apps_smmu 0x2d8b 0x0400>, - <&apps_smmu 0x2dcb 0x04a0>, - <&apps_smmu 0x2deb 0x04a0>; + <&apps_smmu 0x298b 0x0400>; dma-coherent; }; @@ -5307,15 +5114,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <12>; iommus = <&apps_smmu 0x294c 0x04a0>, - <&apps_smmu 0x296c 0x04a0>, - <&apps_smmu 0x298c 0x0400>, - <&apps_smmu 0x29cc 0x04a0>, - <&apps_smmu 0x29ec 0x04a0>, - <&apps_smmu 0x2d4c 0x04a0>, - <&apps_smmu 0x2d6c 0x04a0>, - <&apps_smmu 0x2d8c 0x0400>, - <&apps_smmu 0x2dcc 0x04a0>, - <&apps_smmu 0x2dec 0x04a0>; + <&apps_smmu 0x298c 0x0400>; dma-coherent; }; @@ -5323,15 +5122,7 @@ compatible = "qcom,fastrpc-compute-cb"; reg = <13>; iommus = <&apps_smmu 0x294d 0x04a0>, - <&apps_smmu 0x296d 0x04a0>, - <&apps_smmu 0x298d 0x0400>, - <&apps_smmu 0x29Cd 0x04a0>, - <&apps_smmu 0x29ed 0x04a0>, - <&apps_smmu 0x2d4d 0x04a0>, - <&apps_smmu 0x2d6d 0x04a0>, - <&apps_smmu 0x2d8d 0x0400>, - <&apps_smmu 0x2dcd 0x04a0>, - <&apps_smmu 0x2ded 0x04a0>; + <&apps_smmu 0x298d 0x0400>; dma-coherent; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index f3190f408f4b2c..0f1ebd869ce315 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -1202,9 +1202,6 @@ "VA DMIC0", "MIC BIAS1", "VA DMIC1", "MIC BIAS1", "VA DMIC2", "MIC BIAS3", - "VA DMIC0", "VA MIC BIAS1", - "VA DMIC1", "VA MIC BIAS1", - "VA DMIC2", "VA MIC BIAS3", "TX SWR_ADC1", "ADC2_OUTPUT"; wcd-playback-dai-link { diff --git a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts index d402f4c85b11d1..ee696317f78cc3 100644 --- a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts +++ b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts @@ -175,6 +175,7 @@ * BAM DMA interconnects support is in place. */ /delete-property/ clocks; + /delete-property/ clock-names; }; &blsp1_uart2 { @@ -187,6 +188,7 @@ * BAM DMA interconnects support is in place. */ /delete-property/ clocks; + /delete-property/ clock-names; }; &blsp2_uart1 { diff --git a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts index 7167f75bced3fd..a9926ad6c6f9f5 100644 --- a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts +++ b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts @@ -107,6 +107,7 @@ status = "okay"; vdd-supply = <&vreg_l1b_0p925>; + vdda-pll-supply = <&vreg_l10a_1p8>; vdda-phy-dpdm-supply = <&vreg_l7b_3p125>; }; @@ -404,6 +405,8 @@ &sdhc_2 { status = "okay"; + cd-gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>; + vmmc-supply = <&vreg_l5b_2p95>; vqmmc-supply = <&vreg_l2b_2p95>; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts index d37a433130b98f..5948b401165ce9 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-samsung-starqltechn.dts @@ -135,8 +135,6 @@ vdda_sp_sensor: vdda_ufs1_core: vdda_ufs2_core: - vdda_usb1_ss_core: - vdda_usb2_ss_core: vreg_l1a_0p875: ldo1 { regulator-min-microvolt = <880000>; regulator-max-microvolt = <880000>; @@ -157,6 +155,7 @@ regulator-initial-mode = ; }; + vdda_usb1_ss_core: vdd_wcss_cx: vdd_wcss_mx: vdda_wcss_pll: @@ -383,8 +382,8 @@ }; &sdhc_2 { - pinctrl-names = "default"; pinctrl-0 = <&sdc2_clk_state &sdc2_cmd_state &sdc2_data_state &sd_card_det_n_state>; + pinctrl-names = "default"; cd-gpios = <&tlmm 126 GPIO_ACTIVE_LOW>; vmmc-supply = <&vreg_l21a_2p95>; vqmmc-supply = <&vddpx_2>; @@ -418,16 +417,9 @@ status = "okay"; }; -&wifi { - vdd-0.8-cx-mx-supply = <&vreg_l5a_0p8>; - vdd-1.8-xo-supply = <&vreg_l7a_1p8>; - vdd-1.3-rfa-supply = <&vreg_l17a_1p3>; - vdd-3.3-ch0-supply = <&vreg_l25a_3p3>; - status = "okay"; -}; - &tlmm { - gpio-reserved-ranges = <0 4>, <27 4>, <81 4>, <85 4>; + gpio-reserved-ranges = <27 4>, /* SPI (eSE - embedded Secure Element) */ + <85 4>; /* SPI (fingerprint reader) */ sdc2_clk_state: sdc2-clk-state { pins = "sdc2_clk"; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index c2937b4d9f1802..68613ea7146c88 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -606,7 +606,7 @@ }; cpu7_opp9: opp-1747200000 { - opp-hz = /bits/ 64 <1708800000>; + opp-hz = /bits/ 64 <1747200000>; opp-peak-kBps = <5412000 42393600>; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 69da30f35baaab..a86d0067634e81 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -455,7 +455,7 @@ no-map; }; - pil_camera_mem: mmeory@85200000 { + pil_camera_mem: memory@85200000 { reg = <0x0 0x85200000 0x0 0x500000>; no-map; }; @@ -1806,11 +1806,11 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <16>; qcom,controlled-remotely; iommus = <&apps_smmu 0x594 0x0011>, <&apps_smmu 0x596 0x0011>; - /* FIXME: Probing BAM DMA causes some abort and system hang */ - status = "fail"; }; crypto: crypto@1dfa000 { @@ -1822,8 +1822,6 @@ <&apps_smmu 0x596 0x0011>; interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>; interconnect-names = "memory"; - /* FIXME: dependency BAM DMA is disabled */ - status = "disabled"; }; ipa: ipa@1e40000 { diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 9c809fc5fa45a9..419df72cd04b0c 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -5283,6 +5283,8 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <16>; qcom,controlled-remotely; iommus = <&apps_smmu 0x584 0x11>, <&apps_smmu 0x588 0x0>, diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index eac8de4005d82f..65ebddd124e2a8 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -331,7 +331,8 @@ scm: scm { compatible = "qcom,scm-sm8550", "qcom,scm"; qcom,dload-mode = <&tcsr 0x19000>; - interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&aggre2_noc MASTER_CRYPTO QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; }; }; @@ -850,9 +851,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 0 QCOM_GPI_I2C>, <&gpi_dma2 1 0 QCOM_GPI_I2C>; @@ -868,9 +872,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi8_data_clk>, <&qup_spi8_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 0 QCOM_GPI_SPI>, <&gpi_dma2 1 0 QCOM_GPI_SPI>; @@ -890,9 +897,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 1 QCOM_GPI_I2C>, <&gpi_dma2 1 1 QCOM_GPI_I2C>; @@ -908,9 +918,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi9_data_clk>, <&qup_spi9_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 1 QCOM_GPI_SPI>, <&gpi_dma2 1 1 QCOM_GPI_SPI>; @@ -930,9 +943,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 2 QCOM_GPI_I2C>, <&gpi_dma2 1 2 QCOM_GPI_I2C>; @@ -948,9 +964,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi10_data_clk>, <&qup_spi10_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 2 QCOM_GPI_SPI>, <&gpi_dma2 1 2 QCOM_GPI_SPI>; @@ -970,9 +989,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 3 QCOM_GPI_I2C>, <&gpi_dma2 1 3 QCOM_GPI_I2C>; @@ -988,9 +1010,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi11_data_clk>, <&qup_spi11_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 3 QCOM_GPI_I2C>, <&gpi_dma2 1 3 QCOM_GPI_I2C>; @@ -1010,9 +1035,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 4 QCOM_GPI_I2C>, <&gpi_dma2 1 4 QCOM_GPI_I2C>; @@ -1028,9 +1056,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi12_data_clk>, <&qup_spi12_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 4 QCOM_GPI_I2C>, <&gpi_dma2 1 4 QCOM_GPI_I2C>; @@ -1050,9 +1081,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 5 QCOM_GPI_I2C>, <&gpi_dma2 1 5 QCOM_GPI_I2C>; @@ -1068,9 +1102,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi13_data_clk>, <&qup_spi13_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 5 QCOM_GPI_SPI>, <&gpi_dma2 1 5 QCOM_GPI_SPI>; @@ -1088,8 +1125,10 @@ pinctrl-names = "default"; pinctrl-0 = <&qup_uart14_default>, <&qup_uart14_cts_rts>; interrupts = ; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1104,9 +1143,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 7 QCOM_GPI_I2C>, <&gpi_dma2 1 7 QCOM_GPI_I2C>; @@ -1122,9 +1164,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi15_data_clk>, <&qup_spi15_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_2 0 &clk_virt SLAVE_QUP_CORE_2 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_2 0>, - <&aggre2_noc MASTER_QUP_2 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_2 QCOM_ICC_TAG_ALWAYS>, + <&aggre2_noc MASTER_QUP_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma2 0 7 QCOM_GPI_SPI>, <&gpi_dma2 1 7 QCOM_GPI_SPI>; @@ -1156,8 +1201,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1173,8 +1220,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1190,8 +1239,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1207,8 +1258,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1224,8 +1277,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1241,8 +1296,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1258,8 +1315,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1275,8 +1334,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1292,8 +1353,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1309,8 +1372,10 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_I2C 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_0 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_I2C QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config"; status = "disabled"; }; @@ -1347,7 +1412,8 @@ clocks = <&gcc GCC_QUPV3_WRAP_1_M_AHB_CLK>, <&gcc GCC_QUPV3_WRAP_1_S_AHB_CLK>; iommus = <&apps_smmu 0xa3 0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core"; dma-coherent; #address-cells = <2>; @@ -1364,9 +1430,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 0 QCOM_GPI_I2C>, <&gpi_dma1 1 0 QCOM_GPI_I2C>; @@ -1382,9 +1451,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi0_data_clk>, <&qup_spi0_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 0 QCOM_GPI_SPI>, <&gpi_dma1 1 0 QCOM_GPI_SPI>; @@ -1404,9 +1476,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 1 QCOM_GPI_I2C>, <&gpi_dma1 1 1 QCOM_GPI_I2C>; @@ -1422,9 +1497,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi1_data_clk>, <&qup_spi1_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 1 QCOM_GPI_SPI>, <&gpi_dma1 1 1 QCOM_GPI_SPI>; @@ -1444,9 +1522,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 2 QCOM_GPI_I2C>, <&gpi_dma1 1 2 QCOM_GPI_I2C>; @@ -1462,9 +1543,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi2_data_clk>, <&qup_spi2_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 2 QCOM_GPI_SPI>, <&gpi_dma1 1 2 QCOM_GPI_SPI>; @@ -1484,9 +1568,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 3 QCOM_GPI_I2C>, <&gpi_dma1 1 3 QCOM_GPI_I2C>; @@ -1502,9 +1589,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi3_data_clk>, <&qup_spi3_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 3 QCOM_GPI_SPI>, <&gpi_dma1 1 3 QCOM_GPI_SPI>; @@ -1524,9 +1614,12 @@ interrupts = ; #address-cells = <1>; #size-cells = <0>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 4 QCOM_GPI_I2C>, <&gpi_dma1 1 4 QCOM_GPI_I2C>; @@ -1542,9 +1635,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi4_data_clk>, <&qup_spi4_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 4 QCOM_GPI_SPI>, <&gpi_dma1 1 4 QCOM_GPI_SPI>; @@ -1562,9 +1658,12 @@ pinctrl-names = "default"; pinctrl-0 = <&qup_i2c5_data_clk>; interrupts = ; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 5 QCOM_GPI_I2C>, <&gpi_dma1 1 5 QCOM_GPI_I2C>; @@ -1582,9 +1681,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi5_data_clk>, <&qup_spi5_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 5 QCOM_GPI_SPI>, <&gpi_dma1 1 5 QCOM_GPI_SPI>; @@ -1602,9 +1704,12 @@ pinctrl-names = "default"; pinctrl-0 = <&qup_i2c6_data_clk>; interrupts = ; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 6 QCOM_GPI_I2C>, <&gpi_dma1 1 6 QCOM_GPI_I2C>; @@ -1622,9 +1727,12 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&qup_spi6_data_clk>, <&qup_spi6_cs>; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>, - <&aggre1_noc MASTER_QUP_1 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>, + <&aggre1_noc MASTER_QUP_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "qup-core", "qup-config", "qup-memory"; dmas = <&gpi_dma1 0 6 QCOM_GPI_SPI>, <&gpi_dma1 1 6 QCOM_GPI_SPI>; @@ -1643,8 +1751,10 @@ pinctrl-0 = <&qup_uart7_default>; interrupts = ; interconnect-names = "qup-core", "qup-config"; - interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_QUP_1 0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS + &clk_virt SLAVE_QUP_CORE_1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_QUP_1 QCOM_ICC_TAG_ALWAYS>; status = "disabled"; }; }; @@ -1768,8 +1878,10 @@ "ddrss_sf_tbu", "noc_aggr"; - interconnects = <&pcie_noc MASTER_PCIE_0 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_0 0>; + interconnects = <&pcie_noc MASTER_PCIE_0 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &cnoc_main SLAVE_PCIE_0 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "pcie-mem", "cpu-pcie"; msi-map = <0x0 &gic_its 0x1400 0x1>, @@ -1891,8 +2003,10 @@ assigned-clocks = <&gcc GCC_PCIE_1_AUX_CLK>; assigned-clock-rates = <19200000>; - interconnects = <&pcie_noc MASTER_PCIE_1 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_1 0>; + interconnects = <&pcie_noc MASTER_PCIE_1 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &cnoc_main SLAVE_PCIE_1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "pcie-mem", "cpu-pcie"; msi-map = <0x0 &gic_its 0x1480 0x1>, @@ -1957,6 +2071,8 @@ interrupts = ; #dma-cells = <1>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <20>; qcom,controlled-remotely; iommus = <&apps_smmu 0x480 0x0>, <&apps_smmu 0x481 0x0>; @@ -1969,7 +2085,8 @@ dma-names = "rx", "tx"; iommus = <&apps_smmu 0x480 0x0>, <&apps_smmu 0x481 0x0>; - interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&aggre2_noc MASTER_CRYPTO QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "memory"; }; @@ -2013,8 +2130,10 @@ dma-coherent; operating-points-v2 = <&ufs_opp_table>; - interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>; + interconnects = <&aggre1_noc MASTER_UFS_MEM QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_UFS_MEM_CFG QCOM_ICC_TAG_ALWAYS>; interconnect-names = "ufs-ddr", "cpu-ufs"; clock-names = "core_clk", @@ -2314,8 +2433,10 @@ clocks = <&rpmhcc RPMH_IPA_CLK>; clock-names = "core"; - interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_IPA_CFG 0>; + interconnects = <&aggre2_noc MASTER_IPA QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_IPA_CFG QCOM_ICC_TAG_ALWAYS>; interconnect-names = "memory", "config"; @@ -2349,7 +2470,8 @@ <&rpmhpd RPMHPD_MSS>; power-domain-names = "cx", "mss"; - interconnects = <&mc_virt MASTER_LLCC 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&mc_virt MASTER_LLCC QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; memory-region = <&mpss_mem>, <&q6_mpss_dtb_mem>, <&mpss_dsm_mem>; @@ -2390,7 +2512,8 @@ <&rpmhpd RPMHPD_LMX>; power-domain-names = "lcx", "lmx"; - interconnects = <&lpass_lpicx_noc MASTER_LPASS_PROC 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&lpass_lpicx_noc MASTER_LPASS_PROC QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; memory-region = <&adspslpi_mem>, <&q6_adsp_dtb_mem>; @@ -2848,8 +2971,10 @@ power-domains = <&rpmhpd RPMHPD_CX>; operating-points-v2 = <&sdhc2_opp_table>; - interconnects = <&aggre2_noc MASTER_SDCC_2 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_SDCC_2 0>; + interconnects = <&aggre2_noc MASTER_SDCC_2 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_SDCC_2 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "sdhc-ddr", "cpu-sdhc"; bus-width = <4>; dma-coherent; @@ -3020,8 +3145,11 @@ power-domains = <&dispcc MDSS_GDSC>; - interconnects = <&mmss_noc MASTER_MDP 0 &mc_virt SLAVE_EBI1 0>; - interconnect-names = "mdp0-mem"; + interconnects = <&mmss_noc MASTER_MDP QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>; + interconnect-names = "mdp0-mem", "cpu-cfg"; iommus = <&apps_smmu 0x1c00 0x2>; @@ -3493,8 +3621,10 @@ resets = <&gcc GCC_USB30_PRIM_BCR>; - interconnects = <&aggre1_noc MASTER_USB3_0 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>; + interconnects = <&aggre1_noc MASTER_USB3_0 QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ALWAYS + &config_noc SLAVE_USB3_0 QCOM_ICC_TAG_ALWAYS>; interconnect-names = "usb-ddr", "apps-usb"; status = "disabled"; @@ -4617,7 +4747,8 @@ compatible = "qcom,sm8550-llcc-bwmon", "qcom,sc7280-llcc-bwmon"; reg = <0 0x24091000 0 0x1000>; interrupts = ; - interconnects = <&mc_virt MASTER_LLCC 3 &mc_virt SLAVE_EBI1 3>; + interconnects = <&mc_virt MASTER_LLCC QCOM_ICC_TAG_ACTIVE_ONLY + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ACTIVE_ONLY>; operating-points-v2 = <&llcc_bwmon_opp_table>; @@ -4666,7 +4797,8 @@ compatible = "qcom,sm8550-cpu-bwmon", "qcom,sdm845-bwmon"; reg = <0 0x240b6400 0 0x600>; interrupts = ; - interconnects = <&gem_noc MASTER_APPSS_PROC 3 &gem_noc SLAVE_LLCC 3>; + interconnects = <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &gem_noc SLAVE_LLCC QCOM_ICC_TAG_ACTIVE_ONLY>; operating-points-v2 = <&cpu_bwmon_opp_table>; @@ -4750,7 +4882,8 @@ <&rpmhpd RPMHPD_NSP>; power-domain-names = "cx", "mxc", "nsp"; - interconnects = <&nsp_noc MASTER_CDSP_PROC 0 &mc_virt SLAVE_EBI1 0>; + interconnects = <&nsp_noc MASTER_CDSP_PROC QCOM_ICC_TAG_ALWAYS + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; memory-region = <&cdsp_mem>, <&q6_cdsp_dtb_mem>; diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi index 86684cb9a93256..76acce6754986a 100644 --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi @@ -159,13 +159,20 @@ power-domain-names = "psci"; enable-method = "psci"; - next-level-cache = <&l2_200>; + next-level-cache = <&l2_300>; capacity-dmips-mhz = <1792>; dynamic-power-coefficient = <238>; qcom,freq-domain = <&cpufreq_hw 3>; #cooling-cells = <2>; + + l2_300: l2-cache { + compatible = "cache"; + cache-level = <2>; + cache-unified; + next-level-cache = <&l3_0>; + }; }; cpu4: cpu@400 { @@ -460,7 +467,7 @@ cpu_pd2: power-domain-cpu2 { #power-domain-cells = <0>; power-domains = <&cluster_pd>; - domain-idle-states = <&silver_cpu_sleep_0>; + domain-idle-states = <&gold_cpu_sleep_0>; }; cpu_pd3: power-domain-cpu3 { @@ -2533,6 +2540,8 @@ <&apps_smmu 0x481 0>; qcom,ee = <0>; + qcom,num-ees = <4>; + num-channels = <20>; qcom,controlled-remotely; }; @@ -3656,8 +3665,11 @@ resets = <&dispcc DISP_CC_MDSS_CORE_BCR>; interconnects = <&mmss_noc MASTER_MDP QCOM_ICC_TAG_ALWAYS - &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>; - interconnect-names = "mdp0-mem"; + &mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>, + <&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY + &config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>; + interconnect-names = "mdp0-mem", + "cpu-cfg"; power-domains = <&dispcc MDSS_GDSC>; @@ -6535,20 +6547,20 @@ trips { gpu0_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6568,20 +6580,20 @@ trips { gpu1_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6601,20 +6613,20 @@ trips { gpu2_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6634,20 +6646,20 @@ trips { gpu3_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6667,20 +6679,20 @@ trips { gpu4_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6700,20 +6712,20 @@ trips { gpu5_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6733,20 +6745,20 @@ trips { gpu6_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; @@ -6766,20 +6778,20 @@ trips { gpu7_alert0: trip-point0 { - temperature = <85000>; + temperature = <95000>; hysteresis = <1000>; type = "passive"; }; trip-point1 { - temperature = <90000>; + temperature = <110000>; hysteresis = <1000>; type = "hot"; }; trip-point2 { - temperature = <110000>; - hysteresis = <1000>; + temperature = <115000>; + hysteresis = <0>; type = "critical"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8750.dtsi b/arch/arm64/boot/dts/qcom/sm8750.dtsi index 3bbd7d18598ee0..e8bb587a7813f9 100644 --- a/arch/arm64/boot/dts/qcom/sm8750.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8750.dtsi @@ -233,53 +233,59 @@ cpu_pd0: power-domain-cpu0 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd1: power-domain-cpu1 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd2: power-domain-cpu2 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd3: power-domain-cpu3 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd4: power-domain-cpu4 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd5: power-domain-cpu5 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster0_pd>; domain-idle-states = <&cluster0_c4>; }; cpu_pd6: power-domain-cpu6 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster1_pd>; domain-idle-states = <&cluster1_c4>; }; cpu_pd7: power-domain-cpu7 { #power-domain-cells = <0>; - power-domains = <&cluster_pd>; + power-domains = <&cluster1_pd>; domain-idle-states = <&cluster1_c4>; }; - cluster_pd: power-domain-cluster { + cluster0_pd: power-domain-cluster0 { + #power-domain-cells = <0>; + domain-idle-states = <&cluster_cl5>; + power-domains = <&system_pd>; + }; + + cluster1_pd: power-domain-cluster1 { #power-domain-cells = <0>; domain-idle-states = <&cluster_cl5>; power-domains = <&system_pd>; @@ -987,7 +993,7 @@ interrupts = ; - clocks = <&gcc GCC_QUPV3_WRAP2_S5_CLK>; + clocks = <&gcc GCC_QUPV3_WRAP2_S6_CLK>; clock-names = "se"; interconnects = <&clk_virt MASTER_QUP_CORE_2 QCOM_ICC_TAG_ALWAYS diff --git a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts index 5e3970b26e2f95..3cfe42ec089141 100644 --- a/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts +++ b/arch/arm64/boot/dts/qcom/x1e001de-devkit.dts @@ -507,6 +507,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -528,6 +529,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b_2p9: ldo16 { @@ -745,8 +747,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; @@ -788,6 +790,9 @@ reset-gpios = <&tlmm 185 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&rtmr2_default>; + pinctrl-names = "default"; + orientation-switch; retimer-switch; @@ -843,6 +848,9 @@ reset-gpios = <&pm8550_gpios 10 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&rtmr0_default>; + pinctrl-names = "default"; + retimer-switch; orientation-switch; @@ -898,6 +906,9 @@ reset-gpios = <&tlmm 176 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&rtmr1_default>; + pinctrl-names = "default"; + retimer-switch; orientation-switch; @@ -1016,9 +1027,22 @@ }; &pm8550_gpios { + rtmr0_default: rtmr0-reset-n-active-state { + pins = "gpio10"; + function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; + }; + usb0_3p3_reg_en: usb0-3p3-reg-en-state { pins = "gpio11"; function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; }; }; @@ -1026,6 +1050,10 @@ usb0_pwr_1p15_en: usb0-pwr-1p15-en-state { pins = "gpio8"; function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; }; }; @@ -1033,6 +1061,10 @@ usb0_1p8_reg_en: usb0-1p8-reg-en-state { pins = "gpio8"; function = "normal"; + power-source = <1>; /* 1.8 V */ + bias-disable; + input-disable; + output-enable; }; }; @@ -1203,6 +1235,20 @@ }; }; + rtmr1_default: rtmr1-reset-n-active-state { + pins = "gpio176"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + + rtmr2_default: rtmr2-reset-n-active-state { + pins = "gpio185"; + function = "gpio"; + drive-strength = <2>; + bias-disable; + }; + rtmr1_1p15_reg_en: rtmr1-1p15-reg-en-state { pins = "gpio188"; function = "gpio"; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts index 53781f9b13af3e..f53067463b7601 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-asus-vivobook-s15.dts @@ -330,8 +330,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts b/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts index 86e87f03b0ec61..90f588ed7d63d7 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-dell-xps13-9345.dts @@ -359,6 +359,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -380,6 +381,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l17b_2p5: ldo17 { diff --git a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts index cd860a246c450b..929da9ecddc47c 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-hp-omnibook-x14.dts @@ -633,6 +633,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -654,6 +655,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b_2p9: ldo16 { @@ -871,8 +873,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; @@ -1352,18 +1354,22 @@ status = "okay"; }; +&smb2360_0 { + status = "okay"; +}; + &smb2360_0_eusb2_repeater { vdd18-supply = <&vreg_l3d_1p8>; vdd3-supply = <&vreg_l2b_3p0>; +}; +&smb2360_1 { status = "okay"; }; &smb2360_1_eusb2_repeater { vdd18-supply = <&vreg_l3d_1p8>; vdd3-supply = <&vreg_l14b_3p0>; - - status = "okay"; }; &swr0 { diff --git a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts index a3d53f2ba2c3d0..744a66ae5bdc84 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-lenovo-yoga-slim7x.dts @@ -290,6 +290,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l14b_3p0: ldo14 { @@ -304,8 +305,8 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; - }; regulators-1 { @@ -508,8 +509,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi index 5867953c73564c..6a883fafe3c77a 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100-microsoft-romulus.dtsi @@ -510,6 +510,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b: ldo13 { @@ -531,6 +532,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b: ldo16 { diff --git a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts index ec594628304a9a..f06f4547884683 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts +++ b/arch/arm64/boot/dts/qcom/x1e80100-qcp.dts @@ -437,6 +437,7 @@ regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l13b_3p0: ldo13 { @@ -458,6 +459,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-initial-mode = ; + regulator-always-on; }; vreg_l16b_2p9: ldo16 { @@ -675,8 +677,8 @@ vreg_l2j_1p2: ldo2 { regulator-name = "vreg_l2j_1p2"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1256000>; + regulator-max-microvolt = <1256000>; regulator-initial-mode = ; }; diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi index 4936fa5b98ff7a..607d32f68c3406 100644 --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi @@ -20,6 +20,7 @@ #include #include #include +#include / { interrupt-parent = <&intc>; @@ -3125,7 +3126,7 @@ device_type = "pci"; compatible = "qcom,pcie-x1e80100"; reg = <0x0 0x01bd0000 0x0 0x3000>, - <0x0 0x78000000 0x0 0xf1d>, + <0x0 0x78000000 0x0 0xf20>, <0x0 0x78000f40 0x0 0xa8>, <0x0 0x78001000 0x0 0x1000>, <0x0 0x78100000 0x0 0x100000>, @@ -4814,6 +4815,8 @@ snps,dis-u1-entry-quirk; snps,dis-u2-entry-quirk; + dma-coherent; + ports { #address-cells = <1>; #size-cells = <0>; @@ -8457,8 +8460,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8483,7 +8486,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8509,7 +8512,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8535,7 +8538,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8561,7 +8564,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8587,7 +8590,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8613,7 +8616,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8639,7 +8642,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8665,7 +8668,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8683,8 +8686,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8701,8 +8704,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8719,7 +8722,7 @@ }; mem-critical { - temperature = <125000>; + temperature = <115000>; hysteresis = <0>; type = "critical"; }; @@ -8727,15 +8730,19 @@ }; video-thermal { - polling-delay-passive = <250>; - thermal-sensors = <&tsens0 12>; trips { trip-point0 { - temperature = <125000>; + temperature = <90000>; + hysteresis = <2000>; + type = "hot"; + }; + + video-critical { + temperature = <115000>; hysteresis = <1000>; - type = "passive"; + type = "critical"; }; }; }; @@ -8751,8 +8758,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8777,7 +8784,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8803,7 +8810,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8829,7 +8836,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8855,7 +8862,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8881,7 +8888,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8907,7 +8914,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8933,7 +8940,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8959,7 +8966,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -8977,8 +8984,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -8995,8 +9002,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9013,8 +9020,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9039,7 +9046,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9065,7 +9072,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9091,7 +9098,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9117,7 +9124,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9143,7 +9150,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9169,7 +9176,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9195,7 +9202,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9221,7 +9228,7 @@ }; cpu-critical { - temperature = <110000>; + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9239,8 +9246,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9257,8 +9264,8 @@ }; cpuss2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9275,8 +9282,8 @@ }; aoss0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9293,8 +9300,8 @@ }; nsp0-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9311,8 +9318,8 @@ }; nsp1-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9329,8 +9336,8 @@ }; nsp2-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9347,33 +9354,34 @@ }; nsp3-critical { - temperature = <125000>; - hysteresis = <0>; + temperature = <115000>; + hysteresis = <1000>; type = "critical"; }; }; }; gpuss-0-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 5>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss0_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss0_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9381,25 +9389,26 @@ }; gpuss-1-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 6>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss1_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss1_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9407,25 +9416,26 @@ }; gpuss-2-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 7>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss2_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss2_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9433,25 +9443,26 @@ }; gpuss-3-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 8>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss3_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss3_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9459,25 +9470,26 @@ }; gpuss-4-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 9>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss4_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss4_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9485,25 +9497,26 @@ }; gpuss-5-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 10>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss5_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss5_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9511,25 +9524,26 @@ }; gpuss-6-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 11>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss6_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss6_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9537,25 +9551,26 @@ }; gpuss-7-thermal { - polling-delay-passive = <10>; + polling-delay-passive = <200>; thermal-sensors = <&tsens3 12>; - trips { - trip-point0 { - temperature = <85000>; - hysteresis = <1000>; - type = "passive"; + cooling-maps { + map0 { + trip = <&gpuss7_alert0>; + cooling-device = <&gpu THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; }; + }; - trip-point1 { - temperature = <90000>; + trips { + gpuss7_alert0: trip-point0 { + temperature = <95000>; hysteresis = <1000>; - type = "hot"; + type = "passive"; }; - trip-point2 { - temperature = <125000>; + gpu-critical { + temperature = <115000>; hysteresis = <1000>; type = "critical"; }; @@ -9574,7 +9589,7 @@ camera0-critical { temperature = <115000>; - hysteresis = <0>; + hysteresis = <1000>; type = "critical"; }; }; @@ -9592,7 +9607,7 @@ camera0-critical { temperature = <115000>; - hysteresis = <0>; + hysteresis = <1000>; type = "critical"; }; }; diff --git a/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso b/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso index c27b9b3d4e5f4a..f2d53e958da116 100644 --- a/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso +++ b/arch/arm64/boot/dts/renesas/white-hawk-ard-audio-da7212.dtso @@ -108,7 +108,7 @@ }; tpu0_pins: tpu0 { - groups = "tpu_to0_a"; + groups = "tpu_to0_b"; function = "tpu"; }; }; diff --git a/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi b/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi index 20e8232f2f3234..976a3ab44e5a52 100644 --- a/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi +++ b/arch/arm64/boot/dts/renesas/white-hawk-single.dtsi @@ -11,6 +11,10 @@ / { model = "Renesas White Hawk Single board"; compatible = "renesas,white-hawk-single"; + + aliases { + ethernet3 = &tsn0; + }; }; &hscif0 { @@ -53,7 +57,7 @@ pinctrl-0 = <&tsn0_pins>; pinctrl-names = "default"; phy-mode = "rgmii"; - phy-handle = <&phy3>; + phy-handle = <&tsn0_phy>; status = "okay"; mdio { @@ -63,7 +67,7 @@ reset-gpios = <&gpio1 23 GPIO_ACTIVE_LOW>; reset-post-delay-us = <4000>; - phy3: ethernet-phy@0 { + tsn0_phy: ethernet-phy@0 { compatible = "ethernet-phy-id002b.0980", "ethernet-phy-ieee802.3-c22"; reg = <0>; diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi index 1edfd643b25ae8..a334ef0629d1bb 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-common.dtsi @@ -31,7 +31,7 @@ }; vcc3v3_btreg: vcc3v3-btreg { - compatible = "regulator-gpio"; + compatible = "regulator-fixed"; enable-active-high; pinctrl-names = "default"; pinctrl-0 = <&bt_enable_h>; @@ -39,7 +39,6 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; regulator-always-on; - states = <3300000 0x0>; }; vcc3v3_rf_aux_mod: regulator-vcc3v3-rf-aux-mod { diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi index 80db778c968483..b60e68faa83aa9 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-ctouch2.dtsi @@ -26,5 +26,5 @@ }; &vcc3v3_btreg { - enable-gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>; + gpios = <&gpio1 RK_PC3 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts index 165d09ccb94244..5886b802c5202b 100644 --- a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts +++ b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core-edimm2.2.dts @@ -39,5 +39,5 @@ }; &vcc3v3_btreg { - enable-gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; + gpios = <&gpio1 RK_PC2 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index f2234dabd66411..70979079923c10 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -312,14 +312,6 @@ status = "okay"; }; -&usb_host0_ehci { - status = "okay"; -}; - -&usb_host0_ohci { - status = "okay"; -}; - &vopb { status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index e00fbaa8acc168..587e89d7fc5e42 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -60,16 +60,6 @@ vin-supply = <&vcc5v0_sys>; }; - vcc5v0_host: regulator-vcc5v0-host { - compatible = "regulator-fixed"; - gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_LOW>; - pinctrl-names = "default"; - pinctrl-0 = <&vcc5v0_host_en>; - regulator-name = "vcc5v0_host"; - regulator-always-on; - vin-supply = <&vcc5v0_sys>; - }; - vcc5v0_sys: regulator-vcc5v0-sys { compatible = "regulator-fixed"; regulator-name = "vcc5v0_sys"; @@ -527,10 +517,10 @@ }; }; - usb2 { - vcc5v0_host_en: vcc5v0-host-en { + usb { + cy3304_reset: cy3304-reset { rockchip,pins = - <4 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>; + <4 RK_PA3 RK_FUNC_GPIO &pcfg_output_high>; }; }; @@ -595,11 +585,6 @@ u2phy1_otg: otg-port { status = "okay"; }; - - u2phy1_host: host-port { - phy-supply = <&vcc5v0_host>; - status = "okay"; - }; }; &usbdrd3_1 { @@ -609,12 +594,27 @@ &usbdrd_dwc3_1 { status = "okay"; dr_mode = "host"; -}; + pinctrl-names = "default"; + pinctrl-0 = <&cy3304_reset>; + #address-cells = <1>; + #size-cells = <0>; + + hub_2_0: hub@1 { + compatible = "usb4b4,6502", "usb4b4,6506"; + reg = <1>; + peer-hub = <&hub_3_0>; + reset-gpios = <&gpio4 RK_PA3 GPIO_ACTIVE_HIGH>; + vdd-supply = <&vcc1v2_phy>; + vdd2-supply = <&vcc3v3_sys>; -&usb_host1_ehci { - status = "okay"; -}; + }; -&usb_host1_ohci { - status = "okay"; + hub_3_0: hub@2 { + compatible = "usb4b4,6500", "usb4b4,6504"; + reg = <2>; + peer-hub = <&hub_2_0>; + reset-gpios = <&gpio4 RK_PA3 GPIO_ACTIVE_HIGH>; + vdd-supply = <&vcc1v2_phy>; + vdd2-supply = <&vcc3v3_sys>; + }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 541dca12bf1a1f..046dbe32901786 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -43,7 +43,7 @@ sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; clocks = <&rk808 1>; - clock-names = "lpo"; + clock-names = "ext_clock"; pinctrl-names = "default"; pinctrl-0 = <&wifi_enable_h>; reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/rockchip/rk3528.dtsi b/arch/arm64/boot/dts/rockchip/rk3528.dtsi index 26c3559d6a6deb..7f1ffd6003f581 100644 --- a/arch/arm64/boot/dts/rockchip/rk3528.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3528.dtsi @@ -404,9 +404,10 @@ uart3: serial@ffa08000 { compatible = "rockchip,rk3528-uart", "snps,dw-apb-uart"; + reg = <0x0 0xffa08000 0x0 0x100>; clocks = <&cru SCLK_UART3>, <&cru PCLK_UART3>; clock-names = "baudclk", "apb_pclk"; - reg = <0x0 0xffa08000 0x0 0x100>; + interrupts = ; reg-io-width = <4>; reg-shift = <2>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi index a483514717640f..e7ba477e75f9bf 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-bigtreetech-cb2.dtsi @@ -775,7 +775,7 @@ rockchip,default-sample-phase = <90>; status = "okay"; - sdio-wifi@1 { + wifi@1 { compatible = "brcm,bcm4329-fmac"; reg = <1>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts b/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts index 53e71528e4c4c7..6224d72813e593 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-rock-3c.dts @@ -636,6 +636,7 @@ spi-max-frequency = <104000000>; spi-rx-bus-width = <4>; spi-tx-bus-width = <1>; + vcc-supply = <&vcc_1v8>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi index 00c479aa18711a..a28b4af10d13a2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi @@ -486,9 +486,12 @@ &sdhci { bus-width = <8>; max-frequency = <200000000>; + mmc-hs200-1_8v; non-removable; pinctrl-names = "default"; - pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd>; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd &emmc_datastrobe>; + vmmc-supply = <&vcc_3v3>; + vqmmc-supply = <&vcc_1v8>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts index 7bd32d230ad2f1..b80d628c426b71 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-qnap-ts433.dts @@ -619,6 +619,8 @@ bus-width = <8>; max-frequency = <200000000>; non-removable; + pinctrl-names = "default"; + pinctrl-0 = <&emmc_bus8 &emmc_clk &emmc_cmd &emmc_datastrobe>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts index 828bde7fab68dc..314067ba6f3c4f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3576-armsom-sige5.dts @@ -610,7 +610,7 @@ reg = <0x51>; clock-output-names = "hym8563"; interrupt-parent = <&gpio0>; - interrupts = ; + interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; wakeup-source; diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index ebb5fc8bb8b136..3824242f8ae88a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -1364,6 +1364,7 @@ interrupts = ; clocks = <&cru SCLK_FSPI1_X2>, <&cru HCLK_FSPI1>; clock-names = "clk_sfc", "hclk_sfc"; + power-domains = <&power RK3576_PD_SDGMAC>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -1414,6 +1415,7 @@ interrupts = ; clocks = <&cru SCLK_FSPI_X2>, <&cru HCLK_FSPI>; clock-names = "clk_sfc", "hclk_sfc"; + power-domains = <&power RK3576_PD_NVM>; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index 1e18ad93ba0ebd..c52af310c7062e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -439,16 +439,15 @@ #clock-cells = <0>; }; - pmu_sram: sram@10f000 { - compatible = "mmio-sram"; - reg = <0x0 0x0010f000 0x0 0x100>; - ranges = <0 0x0 0x0010f000 0x100>; - #address-cells = <1>; - #size-cells = <1>; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; - scmi_shmem: sram@0 { + scmi_shmem: shmem@10f000 { compatible = "arm,scmi-shmem"; - reg = <0x0 0x100>; + reg = <0x0 0x0010f000 0x0 0x100>; + no-map; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi index 1af0a30866f619..af431fdcbea7a6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-friendlyelec-cm3588.dtsi @@ -222,6 +222,10 @@ compatible = "realtek,rt5616"; reg = <0x1b>; #sound-dai-cells = <0>; + assigned-clocks = <&cru I2S0_8CH_MCLKOUT>; + assigned-clock-rates = <12288000>; + clocks = <&cru I2S0_8CH_MCLKOUT>; + clock-names = "mclk"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index 711ac4f2c7cb66..60ad272982ad51 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -214,6 +214,8 @@ }; &package_thermal { + polling-delay = <1000>; + trips { package_active1: trip-active1 { temperature = <45000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi index bce72bac4503b5..3045cb3bd68c63 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi @@ -11,20 +11,15 @@ compatible = "operating-points-v2"; opp-shared; - opp-1416000000 { - opp-hz = /bits/ 64 <1416000000>; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <750000 750000 950000>; clock-latency-ns = <40000>; opp-suspend; }; - opp-1608000000 { - opp-hz = /bits/ 64 <1608000000>; - opp-microvolt = <887500 887500 950000>; - clock-latency-ns = <40000>; - }; - opp-1704000000 { - opp-hz = /bits/ 64 <1704000000>; - opp-microvolt = <937500 937500 950000>; + opp-1296000000 { + opp-hz = /bits/ 64 <1296000000>; + opp-microvolt = <775000 775000 950000>; clock-latency-ns = <40000>; }; }; @@ -33,9 +28,14 @@ compatible = "operating-points-v2"; opp-shared; + opp-1200000000{ + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; opp-1416000000 { opp-hz = /bits/ 64 <1416000000>; - opp-microvolt = <750000 750000 950000>; + opp-microvolt = <762500 762500 950000>; clock-latency-ns = <40000>; }; opp-1608000000 { @@ -43,25 +43,20 @@ opp-microvolt = <787500 787500 950000>; clock-latency-ns = <40000>; }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; - opp-microvolt = <875000 875000 950000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; cluster2_opp_table: opp-table-cluster2 { compatible = "operating-points-v2"; opp-shared; + opp-1200000000{ + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; opp-1416000000 { opp-hz = /bits/ 64 <1416000000>; - opp-microvolt = <750000 750000 950000>; + opp-microvolt = <762500 762500 950000>; clock-latency-ns = <40000>; }; opp-1608000000 { @@ -69,16 +64,6 @@ opp-microvolt = <787500 787500 950000>; clock-latency-ns = <40000>; }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; - opp-microvolt = <875000 875000 950000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; - opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; }; gpu_opp_table: opp-table { @@ -104,10 +89,6 @@ opp-hz = /bits/ 64 <700000000>; opp-microvolt = <750000 750000 850000>; }; - opp-850000000 { - opp-hz = /bits/ 64 <800000000>; - opp-microvolt = <787500 787500 850000>; - }; }; }; diff --git a/arch/arm64/boot/dts/st/stm32mp211.dtsi b/arch/arm64/boot/dts/st/stm32mp211.dtsi index 6dd1377f3e1d8b..bf888d60cd4f01 100644 --- a/arch/arm64/boot/dts/st/stm32mp211.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp211.dtsi @@ -116,11 +116,11 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x0 0x1000>, - <0x4ac20000 0x0 0x2000>, - <0x4ac40000 0x0 0x2000>, - <0x4ac60000 0x0 0x2000>; + <0x4ac20000 0x0 0x20000>, + <0x4ac40000 0x0 0x20000>, + <0x4ac60000 0x0 0x20000>; #interrupt-cells = <3>; interrupt-controller; }; diff --git a/arch/arm64/boot/dts/st/stm32mp231.dtsi b/arch/arm64/boot/dts/st/stm32mp231.dtsi index 8820d219a33e6e..75697acd1345b2 100644 --- a/arch/arm64/boot/dts/st/stm32mp231.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp231.dtsi @@ -1201,13 +1201,12 @@ }; intc: interrupt-controller@4ac10000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; reg = <0x4ac10000 0x1000>, - <0x4ac20000 0x2000>, - <0x4ac40000 0x2000>, - <0x4ac60000 0x2000>; + <0x4ac20000 0x20000>, + <0x4ac40000 0x20000>, + <0x4ac60000 0x20000>; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; }; }; diff --git a/arch/arm64/boot/dts/st/stm32mp251.dtsi b/arch/arm64/boot/dts/st/stm32mp251.dtsi index f3c6cdfd7008c5..87110f91e4895a 100644 --- a/arch/arm64/boot/dts/st/stm32mp251.dtsi +++ b/arch/arm64/boot/dts/st/stm32mp251.dtsi @@ -115,14 +115,13 @@ }; intc: interrupt-controller@4ac00000 { - compatible = "arm,cortex-a7-gic"; + compatible = "arm,gic-400"; #interrupt-cells = <3>; - #address-cells = <1>; interrupt-controller; reg = <0x0 0x4ac10000 0x0 0x1000>, - <0x0 0x4ac20000 0x0 0x2000>, - <0x0 0x4ac40000 0x0 0x2000>, - <0x0 0x4ac60000 0x0 0x2000>; + <0x0 0x4ac20000 0x0 0x20000>, + <0x0 0x4ac40000 0x0 0x20000>, + <0x0 0x4ac60000 0x0 0x20000>; }; psci { diff --git a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi index 7d355aa73ea211..0c286f600296cd 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi @@ -552,8 +552,6 @@ power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 57 5>, <&k3_clks 57 6>; clock-names = "clk_ahb", "clk_xin"; - assigned-clocks = <&k3_clks 57 6>; - assigned-clock-parents = <&k3_clks 57 8>; bus-width = <8>; mmc-ddr-1_8v; mmc-hs200-1_8v; diff --git a/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi index a1daba7b1fad5d..455ccc770f16a1 100644 --- a/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62a-main.dtsi @@ -575,8 +575,6 @@ power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 57 5>, <&k3_clks 57 6>; clock-names = "clk_ahb", "clk_xin"; - assigned-clocks = <&k3_clks 57 6>; - assigned-clock-parents = <&k3_clks 57 8>; bus-width = <8>; mmc-hs200-1_8v; ti,clkbuf-sel = <0x7>; diff --git a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi index 6e3beb5c2e010e..f9b5c97518d68f 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-main.dtsi @@ -564,8 +564,6 @@ power-domains = <&k3_pds 57 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 57 1>, <&k3_clks 57 2>; clock-names = "clk_ahb", "clk_xin"; - assigned-clocks = <&k3_clks 57 2>; - assigned-clock-parents = <&k3_clks 57 4>; bus-width = <8>; mmc-ddr-1_8v; mmc-hs200-1_8v; diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso index 76ca02127f95ff..dd090813a32d61 100644 --- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso +++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-imx219.dtso @@ -22,7 +22,7 @@ #size-cells = <0>; status = "okay"; - i2c-switch@71 { + i2c-mux@71 { compatible = "nxp,pca9543"; #address-cells = <1>; #size-cells = <0>; @@ -39,7 +39,6 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; - clock-names = "xclk"; reset-gpios = <&exp1 13 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso index ccc7f5e43184fa..7fc7c95f5cd578 100644 --- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso +++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-ov5640.dtso @@ -22,7 +22,7 @@ #size-cells = <0>; status = "okay"; - i2c-switch@71 { + i2c-mux@71 { compatible = "nxp,pca9543"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso index 4eaf9d757dd0ad..b6bfdfbbdd984a 100644 --- a/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso +++ b/arch/arm64/boot/dts/ti/k3-am62x-sk-csi2-tevi-ov5640.dtso @@ -22,7 +22,7 @@ #size-cells = <0>; status = "okay"; - i2c-switch@71 { + i2c-mux@71 { compatible = "nxp,pca9543"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 94a812a1355baf..5ebf7ada6e4851 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -449,6 +449,8 @@ ti,otap-del-sel-mmc-hs = <0x0>; ti,otap-del-sel-ddr52 = <0x5>; ti,otap-del-sel-hs200 = <0x5>; + ti,itap-del-sel-legacy = <0xa>; + ti,itap-del-sel-mmc-hs = <0x1>; ti,itap-del-sel-ddr52 = <0x0>; dma-coherent; status = "disabled"; diff --git a/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts b/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts index 11522b36e0cece..5fa70a874d7b4d 100644 --- a/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts +++ b/arch/arm64/boot/dts/ti/k3-am68-sk-base-board.dts @@ -44,6 +44,17 @@ regulator-boot-on; }; + vsys_5v0: regulator-vsys5v0 { + /* Output of LM61460 */ + compatible = "regulator-fixed"; + regulator-name = "vsys_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&vusb_main>; + regulator-always-on; + regulator-boot-on; + }; + vsys_3v3: regulator-vsys3v3 { /* Output of LM5141 */ compatible = "regulator-fixed"; @@ -76,7 +87,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; - vin-supply = <&vsys_3v3>; + vin-supply = <&vsys_5v0>; gpios = <&main_gpio0 49 GPIO_ACTIVE_HIGH>; states = <1800000 0x0>, <3300000 0x1>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts index 4421852161dd65..da4e0cacd6d72d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-common-proc-board.dts @@ -573,6 +573,7 @@ &ospi1 { pinctrl-names = "default"; pinctrl-0 = <&mcu_fss0_ospi1_pins_default>; + status = "okay"; flash@0 { compatible = "jedec,spi-nor"; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso index 47bb5480b5b006..4eb3cffab0321d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso +++ b/arch/arm64/boot/dts/ti/k3-j721e-sk-csi2-dual-imx219.dtso @@ -19,6 +19,33 @@ #clock-cells = <0>; clock-frequency = <24000000>; }; + + reg_2p8v: regulator-2p8v { + compatible = "regulator-fixed"; + regulator-name = "2P8V"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; + vin-supply = <&vdd_sd_dv>; + regulator-always-on; + }; + + reg_1p8v: regulator-1p8v { + compatible = "regulator-fixed"; + regulator-name = "1P8V"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + vin-supply = <&vdd_sd_dv>; + regulator-always-on; + }; + + reg_1p2v: regulator-1p2v { + compatible = "regulator-fixed"; + regulator-name = "1P2V"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + vin-supply = <&vdd_sd_dv>; + regulator-always-on; + }; }; &csi_mux { @@ -34,7 +61,9 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; - clock-names = "xclk"; + VANA-supply = <®_2p8v>; + VDIG-supply = <®_1p8v>; + VDDL-supply = <®_1p2v>; port { csi2_cam0: endpoint { @@ -56,7 +85,9 @@ reg = <0x10>; clocks = <&clk_imx219_fixed>; - clock-names = "xclk"; + VANA-supply = <®_2p8v>; + VDIG-supply = <®_1p8v>; + VDDL-supply = <®_1p2v>; port { csi2_cam1: endpoint { diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts index 440ef57be2943c..ffef3d1cfd5532 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts +++ b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts @@ -184,6 +184,17 @@ regulator-boot-on; }; + vsys_5v0: fixedregulator-vsys5v0 { + /* Output of LM61460 */ + compatible = "regulator-fixed"; + regulator-name = "vsys_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&vusb_main>; + regulator-always-on; + regulator-boot-on; + }; + vdd_mmc1: fixedregulator-sd { compatible = "regulator-fixed"; pinctrl-names = "default"; @@ -211,6 +222,20 @@ <3300000 0x1>; }; + vdd_sd_dv: gpio-regulator-TLV71033 { + compatible = "regulator-gpio"; + pinctrl-names = "default"; + pinctrl-0 = <&vdd_sd_dv_pins_default>; + regulator-name = "tlv71033"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + vin-supply = <&vsys_5v0>; + gpios = <&main_gpio0 118 GPIO_ACTIVE_HIGH>; + states = <1800000 0x0>, + <3300000 0x1>; + }; + transceiver1: can-phy1 { compatible = "ti,tcan1042"; #phy-cells = <0>; @@ -613,6 +638,12 @@ >; }; + vdd_sd_dv_pins_default: vdd-sd-dv-default-pins { + pinctrl-single,pins = < + J721E_IOPAD(0x1dc, PIN_OUTPUT, 7) /* (Y1) SPI1_CLK.GPIO0_118 */ + >; + }; + wkup_uart0_pins_default: wkup-uart0-default-pins { pinctrl-single,pins = < J721E_WKUP_IOPAD(0xa0, PIN_INPUT, 0) /* (J29) WKUP_UART0_RXD */ diff --git a/arch/arm64/boot/dts/ti/k3-j722s-evm.dts b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts index 2127316f36a34b..0bf2e182166244 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts @@ -843,6 +843,10 @@ ; }; +&serdes_wiz0 { + status = "okay"; +}; + &serdes0 { status = "okay"; serdes0_usb_link: phy@0 { @@ -854,6 +858,10 @@ }; }; +&serdes_wiz1 { + status = "okay"; +}; + &serdes1 { status = "okay"; serdes1_pcie_link: phy@0 { diff --git a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi index 6850f50530f12b..beda9e40e931b4 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi @@ -32,6 +32,8 @@ assigned-clocks = <&k3_clks 279 1>; assigned-clock-parents = <&k3_clks 279 5>; + status = "disabled"; + serdes0: serdes@f000000 { compatible = "ti,j721e-serdes-10g"; reg = <0x0f000000 0x00010000>; @@ -70,6 +72,8 @@ assigned-clocks = <&k3_clks 280 1>; assigned-clock-parents = <&k3_clks 280 5>; + status = "disabled"; + serdes1: serdes@f010000 { compatible = "ti,j721e-serdes-10g"; reg = <0x0f010000 0x00010000>; diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi index 1944616ab3579a..1fc0a11c5ab4a9 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j784s4-j742s2-main-common.dtsi @@ -77,7 +77,7 @@ serdes_ln_ctrl: mux-controller@4080 { compatible = "reg-mux"; - reg = <0x00004080 0x30>; + reg = <0x00004080 0x50>; #mux-control-cells = <1>; mux-reg-masks = <0x0 0x3>, <0x4 0x3>, /* SERDES0 lane0/1 select */ <0x8 0x3>, <0xc 0x3>, /* SERDES0 lane2/3 select */ diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5bb8f09422a221..8e5d4dbd74e50d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1587,6 +1587,9 @@ CONFIG_PHY_HISTB_COMBPHY=y CONFIG_PHY_HISI_INNO_USB2=y CONFIG_PHY_MVEBU_CP110_COMPHY=y CONFIG_PHY_MTK_TPHY=y +CONFIG_PHY_MTK_HDMI=m +CONFIG_PHY_MTK_MIPI_DSI=m +CONFIG_PHY_MTK_DP=m CONFIG_PHY_QCOM_EDP=m CONFIG_PHY_QCOM_PCIE2=m CONFIG_PHY_QCOM_QMP=m @@ -1729,12 +1732,12 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_SECURITY=y CONFIG_CRYPTO_USER=y +CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index d1cc0571798bfa..dffff6763812f9 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,6 +81,7 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D @@ -168,6 +169,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index ebceaae3c749b8..d40e427ddad94a 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -52,7 +52,7 @@ mrs x0, id_aa64mmfr1_el1 ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ - mov_q x0, HCRX_HOST_FLAGS + mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) /* Enable GCS if supported */ mrs_s x1, SYS_ID_AA64PFR1_EL1 diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d1b1a33f9a8b0d..71f0cbf7b28872 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -121,6 +121,15 @@ #define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_ADDRSZ (0x00) + +/* + * Annoyingly, the negative levels for Address size faults aren't laid out + * contiguously (or in the desired order) + */ +#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C) +#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \ + (ESR_ELx_FSC_ADDRSZ + (n))) /* Status codes for individual page table levels */ #define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) @@ -161,8 +170,6 @@ #define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) /* ISS field definitions for exceptions taken in to Hyp */ -#define ESR_ELx_FSC_ADDRSZ (0x00) -#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -371,12 +378,14 @@ /* * ISS values for SME traps */ +#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0) +#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK) -#define ESR_ELx_SME_ISS_SME_DISABLED 0 -#define ESR_ELx_SME_ISS_ILL 1 -#define ESR_ELx_SME_ISS_SM_DISABLED 2 -#define ESR_ELx_SME_ISS_ZA_DISABLED 3 -#define ESR_ELx_SME_ISS_ZT_DISABLED 4 +#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_SMTC_ILL 1 +#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4 /* ISS field definitions for MOPS exceptions */ #define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) @@ -464,6 +473,39 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) (esr == ESR_ELx_FSC_ACCESS_L(0)); } +static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) +{ + esr &= ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(2)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(1)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(0)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); +} + +static inline bool esr_fsc_is_sea_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SEA_TTW(3)) || + (esr == ESR_ELx_FSC_SEA_TTW(2)) || + (esr == ESR_ELx_FSC_SEA_TTW(1)) || + (esr == ESR_ELx_FSC_SEA_TTW(0)) || + (esr == ESR_ELx_FSC_SEA_TTW(-1)); +} + +static inline bool esr_fsc_is_secc_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SECC_TTW(3)) || + (esr == ESR_ELx_FSC_SECC_TTW(2)) || + (esr == ESR_ELx_FSC_SECC_TTW(1)) || + (esr == ESR_ELx_FSC_SECC_TTW(0)) || + (esr == ESR_ELx_FSC_SECC_TTW(-1)); +} + /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ static inline bool esr_iss_is_eretax(unsigned long esr) { diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 39577f1d079a98..18c7811774d301 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -706,6 +706,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type); u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 974d72b5905b86..e9c8a581e16f44 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -100,9 +100,8 @@ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) -#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) +#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO) -#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM) #define MPAMHCR_HOST_FLAGS 0 /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d7cf66573acaff..bd020fc28aa9ca 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -305,7 +305,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { - return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; + u64 hpfar = vcpu->arch.fault.hpfar_el2; + + if (unlikely(!(hpfar & HPFAR_EL2_NS))) + return INVALID_GPA; + + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e98cfe7855a624..08ba91e6fb035a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1588,4 +1588,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h index 87e10d9a635b55..9398ade632aaf9 100644 --- a/arch/arm64/include/asm/kvm_ras.h +++ b/arch/arm64/include/asm/kvm_ras.h @@ -14,7 +14,7 @@ * Was this synchronous external abort a RAS notification? * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. */ -static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr) +static inline int kvm_handle_guest_sea(void) { /* apei_claim_sea(NULL) expects to mask interrupts itself */ lockdep_assert_irqs_enabled(); diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 30a29e88994ba3..6e8aa8e726015e 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -94,17 +94,6 @@ static inline bool kaslr_requires_kpti(void) return false; } - /* - * Systems affected by Cavium erratum 24756 are incompatible - * with KPTI. - */ - if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - if (is_midr_in_range_list(cavium_erratum_27456_cpus)) - return false; - } - return true; } diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h index 5b80785324b6c3..9ea0a74e589273 100644 --- a/arch/arm64/include/asm/rqspinlock.h +++ b/arch/arm64/include/asm/rqspinlock.h @@ -86,7 +86,7 @@ #endif -#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) +#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) #include diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f1524cdeacf1c4..8fef1262609011 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -97,6 +97,9 @@ enum mitigation_state arm64_get_meltdown_state(void); enum mitigation_state arm64_get_spectre_bhb_state(void); bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +extern bool __nospectre_bhb; +u8 get_spectre_bhb_loop_value(void); +bool is_spectre_bhb_fw_mitigated(void); void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr); diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 92a2b59a9f3df4..3322c7047d84fe 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -99,6 +99,19 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return res; } +#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) +{ + const struct vdso_time_data *ret = &vdso_u_time_data; + + /* Work around invalid absolute relocations */ + OPTIMIZER_HIDE_VAR(ret); + + return ret; +} +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data +#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */ + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b55f5f7057502c..6b0ad5070d3e00 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -335,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -const struct midr_range cavium_erratum_27456_cpus[] = { +static const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9c4d6d552b25cb..4c46d80aa64b7c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -114,7 +114,14 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NC DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); -bool arm64_use_ng_mappings = false; +/* + * arm64_use_ng_mappings must be placed in the .data section, otherwise it + * ends up in the .bss section where it is initialized in early_map_kernel() + * after the MMU (with the idmap) was enabled. create_init_idmap() - which + * runs before early_map_kernel() and reads the variable via PTE_MAYBE_NG - + * may end up generating an incorrect idmap page table attributes. + */ +bool arm64_use_ng_mappings __read_mostly = false; EXPORT_SYMBOL(arm64_use_ng_mappings); DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 8370d55f035334..0e649d0e59b06e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -359,9 +359,6 @@ static void task_fpsimd_load(void) WARN_ON(preemptible()); WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE)); - if (system_supports_fpmr()) - write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); - if (system_supports_sve() || system_supports_sme()) { switch (current->thread.fp_type) { case FP_STATE_FPSIMD: @@ -413,6 +410,9 @@ static void task_fpsimd_load(void) restore_ffr = system_supports_fa64(); } + if (system_supports_fpmr()) + write_sysreg_s(current->thread.uw.fpmr, SYS_FPMR); + if (restore_sve_regs) { WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE); sve_load_state(sve_pffr(¤t->thread), @@ -651,7 +651,7 @@ static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, * task->thread.uw.fpsimd_state must be up to date before calling this * function. */ -static void fpsimd_to_sve(struct task_struct *task) +static inline void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; @@ -675,7 +675,7 @@ static void fpsimd_to_sve(struct task_struct *task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. */ -static void sve_to_fpsimd(struct task_struct *task) +static inline void sve_to_fpsimd(struct task_struct *task) { unsigned int vq, vl; void const *sst = task->thread.sve_state; @@ -1436,7 +1436,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) * If this not a trap due to SME being disabled then something * is being used in the wrong mode, report as SIGILL. */ - if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { + if (ESR_ELx_SME_ISS_SMTC(esr) != ESR_ELx_SME_ISS_SMTC_SME_DISABLED) { force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); return; } @@ -1460,6 +1460,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) sme_set_vq(vq_minus_one); fpsimd_bind_task_to_cpu(); + } else { + fpsimd_flush_task_state(current); } put_cpu_fpsimd_context(); @@ -1573,8 +1575,8 @@ void fpsimd_thread_switch(struct task_struct *next) fpsimd_save_user_state(); if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) { - fpsimd_load_kernel_state(next); fpsimd_flush_cpu_state(); + fpsimd_load_kernel_state(next); } else { /* * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's @@ -1661,6 +1663,9 @@ void fpsimd_flush_thread(void) current->thread.svcr = 0; } + if (system_supports_fpmr()) + current->thread.uw.fpmr = 0; + current->thread.fp_type = FP_STATE_FPSIMD; put_cpu_fpsimd_context(); @@ -1801,7 +1806,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; - if (test_thread_flag(TIF_SVE)) + if (current->thread.fp_type == FP_STATE_SVE) fpsimd_to_sve(current); task_fpsimd_load(); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5e3c4b58f27904..2004b4f41ade68 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -47,10 +47,6 @@ PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override); PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override); PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override); PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings); -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus); -PROVIDE(__pi_is_midr_in_range_list = is_midr_in_range_list); -#endif PROVIDE(__pi__ctype = _ctype); PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e57b043f324b51..c6650cfe706c34 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -207,6 +207,29 @@ static void __init map_fdt(u64 fdt) dsb(ishst); } +/* + * PI version of the Cavium Eratum 27456 detection, which makes it + * impossible to use non-global mappings. + */ +static bool __init ng_mappings_allowed(void) +{ + static const struct midr_range cavium_erratum_27456_cpus[] __initconst = { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), + /* Cavium ThunderX, T81 pass 1.0 */ + MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), + {}, + }; + + for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) { + if (midr_is_cpu_model_range(read_cpuid_id(), r->model, + r->rv_min, r->rv_max)) + return false; + } + + return true; +} + asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) { static char const chosen_str[] __initconst = "/chosen"; @@ -246,7 +269,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) u64 kaslr_seed = kaslr_early_init(fdt, chosen); if (kaslr_seed && kaslr_requires_kpti()) - arm64_use_ng_mappings = true; + arm64_use_ng_mappings = ng_mappings_allowed(); kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); } diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index b198dde79e591d..edf1783ffc8174 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -879,16 +879,19 @@ static u8 spectre_bhb_loop_affected(void) static const struct midr_range spectre_bhb_k132_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + {}, }; static const struct midr_range spectre_bhb_k38_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + {}, }; static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), @@ -997,6 +1000,11 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, return true; } +u8 get_spectre_bhb_loop_value(void) +{ + return max_bhb_k; +} + static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) { const char *v = arm64_get_bp_hardening_vector(slot); @@ -1014,7 +1022,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) isb(); } -static bool __read_mostly __nospectre_bhb; +bool __read_mostly __nospectre_bhb; static int __init parse_spectre_bhb_param(char *str) { __nospectre_bhb = true; @@ -1092,6 +1100,11 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) update_mitigation_state(&spectre_bhb_state, state); } +bool is_spectre_bhb_fw_mitigated(void) +{ + return test_bit(BHB_FW, &system_bhb_mitigations); +} + /* Patched to NOP when enabled */ void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 68fec8c95feef9..19ca57def6292b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2743,11 +2743,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -bool kvm_arch_has_irq_bypass(void) -{ - return true; -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 17df94570f03a5..fc573fc767b0e7 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -12,6 +12,16 @@ #include #include +static inline bool __fault_safe_to_translate(u64 esr) +{ + u64 fsc = esr & ESR_ELx_FSC; + + if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr)) + return false; + + return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV)); +} + static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) { int ret; @@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) return true; } -static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +/* + * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR. + */ +static inline bool __hpfar_valid(u64 esr) { - u64 hpfar, far; - - far = read_sysreg_el2(SYS_FAR); - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 + * CPUs affected by ARM erratum #834220 may incorrectly report a + * stage-2 translation fault when a stage-1 permission fault occurs. * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. + * Re-walk the page tables to determine if a stage-1 fault actually + * occurred. */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - esr_fsc_is_permission_fault(esr))) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { + if (cpus_have_final_cap(ARM64_WORKAROUND_834220) && + esr_fsc_is_translation_fault(esr)) + return false; + + if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr)) + return true; + + if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr)) + return true; + + return esr_fsc_is_addr_sz_fault(esr); +} + +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +{ + u64 hpfar; + + fault->far_el2 = read_sysreg_el2(SYS_FAR); + fault->hpfar_el2 = 0; + + if (__hpfar_valid(esr)) hpfar = read_sysreg(hpfar_el2); - } + else if (unlikely(!__fault_safe_to_translate(esr))) + return true; + else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar)) + return false; - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; + /* + * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid + * HPFAR value. + */ + fault->hpfar_el2 = hpfar | HPFAR_EL2_NS; return true; } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index b741ea6aefa58f..96f625dc725669 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -235,6 +235,8 @@ static inline void __deactivate_traps_mpam(void) static inline void __activate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); @@ -245,11 +247,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - write_sysreg(0, pmselr_el0); - hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -269,6 +268,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hcrx &= ~clr; } + ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2); write_sysreg_s(hcrx, SYS_HCRX_EL2); } @@ -278,19 +278,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); write_sysreg(0, hstr_el2); if (system_supports_pmuv3()) { - struct kvm_cpu_context *hctxt; - - hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } if (cpus_have_final_cap(ARM64_HAS_HCX)) - write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2); + write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2); __deactivate_traps_hfgxtr(vcpu); __deactivate_traps_mpam(); diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index e433dfab882aa5..3369dd0c4009f8 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_ffa_version = ffa_req_version; } - if (hyp_ffa_post_init()) + if (hyp_ffa_post_init()) { res->a0 = FFA_RET_NOT_SUPPORTED; - else { - has_version_negotiated = true; + } else { + smp_store_release(&has_version_negotiated, true); res->a0 = hyp_ffa_version; } unlock: @@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; - if (!has_version_negotiated && func_id != FFA_VERSION) { + if (func_id != FFA_VERSION && + !smp_load_acquire(&has_version_negotiated)) { ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); goto out_handled; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f34f11c720d707..e80f3ebd3e2a26 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -503,7 +503,7 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { int ret; - if (!addr_is_memory(addr)) + if (!range_is_memory(addr, addr + size)) return -EPERM; ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, @@ -578,7 +578,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) return; } - addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + + /* + * Yikes, we couldn't resolve the fault IPA. This should reinject an + * abort into the host when we figure out how to do that. + */ + BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); + addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; + ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index ed363aa3027e59..50aa8dbcae75b0 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -429,23 +429,27 @@ u64 __vgic_v3_get_gic_config(void) /* * To check whether we have a MMIO-based (GICv2 compatible) * CPU interface, we need to disable the system register - * view. To do that safely, we have to prevent any interrupt - * from firing (which would be deadly). + * view. * - * Note that this only makes sense on VHE, as interrupts are - * already masked for nVHE as part of the exception entry to - * EL2. - */ - if (has_vhe()) - flags = local_daif_save(); - - /* * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates * that to be able to set ICC_SRE_EL1.SRE to 0, all the * interrupt overrides must be set. You've got to love this. + * + * As we always run VHE with HCR_xMO set, no extra xMO + * manipulation is required in that case. + * + * To safely disable SRE, we have to prevent any interrupt + * from firing (which would be deadly). This only makes sense + * on VHE, as interrupts are already masked for nVHE as part + * of the exception entry to EL2. */ - sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); - isb(); + if (has_vhe()) { + flags = local_daif_save(); + } else { + sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO); + isb(); + } + write_gicreg(0, ICC_SRE_EL1); isb(); @@ -453,11 +457,13 @@ u64 __vgic_v3_get_gic_config(void) write_gicreg(sre, ICC_SRE_EL1); isb(); - sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); - isb(); - if (has_vhe()) + if (has_vhe()) { local_daif_restore(flags); + } else { + sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0); + isb(); + } val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63); val |= read_gicreg(ICH_VTR_EL2); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2feb6c6b63af6c..eeda92330ade70 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1501,6 +1501,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } + if (!is_protected_kvm_enabled()) + memcache = &vcpu->arch.mmu_page_cache; + else + memcache = &vcpu->arch.pkvm_memcache; + /* * Permission faults just need to update the existing leaf entry, * and so normally don't require allocations from the memcache. The @@ -1510,13 +1515,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (!fault_is_perm || (logging_active && write_fault)) { int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu); - if (!is_protected_kvm_enabled()) { - memcache = &vcpu->arch.mmu_page_cache; + if (!is_protected_kvm_enabled()) ret = kvm_mmu_topup_memory_cache(memcache, min_pages); - } else { - memcache = &vcpu->arch.pkvm_memcache; + else ret = topup_hyp_memcache(memcache, min_pages); - } + if (ret) return ret; } @@ -1794,9 +1797,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; + /* Synchronous External Abort? */ + if (kvm_vcpu_abt_issea(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ + if (kvm_handle_guest_sea()) + kvm_inject_vabt(vcpu); + + return 1; + } + esr = kvm_vcpu_get_esr(vcpu); + /* + * The fault IPA should be reliable at this point as we're not dealing + * with an SEA. + */ ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm)) + return -EFAULT; + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (esr_fsc_is_translation_fault(esr)) { @@ -1818,18 +1840,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } } - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) - kvm_inject_vabt(vcpu); - - return 1; - } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 005ad28f730681..5dde9285afc809 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1945,6 +1945,12 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, if ((hw_val & mpam_mask) == (user_val & mpam_mask)) user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + /* Fail the guest's request to disable the AA64 ISA at EL{0,1,2} */ + if (!FIELD_GET(ID_AA64PFR0_EL1_EL0, user_val) || + !FIELD_GET(ID_AA64PFR0_EL1_EL1, user_val) || + (vcpu_has_nv(vcpu) && !FIELD_GET(ID_AA64PFR0_EL1_EL2, user_val))) + return -EINVAL; + return set_id_reg(vcpu, rd, user_val); } diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 9bef696e2230be..4e298baddc2e56 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -5,6 +5,7 @@ * * Copyright (C) 2014-2016 Zi Shen Lim */ +#include #include #include #include @@ -1500,43 +1501,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } -u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type) { - u32 opt; - u32 insn; - switch (type) { case AARCH64_INSN_MB_SY: - opt = 0xf; - break; + return 0xf; case AARCH64_INSN_MB_ST: - opt = 0xe; - break; + return 0xe; case AARCH64_INSN_MB_LD: - opt = 0xd; - break; + return 0xd; case AARCH64_INSN_MB_ISH: - opt = 0xb; - break; + return 0xb; case AARCH64_INSN_MB_ISHST: - opt = 0xa; - break; + return 0xa; case AARCH64_INSN_MB_ISHLD: - opt = 0x9; - break; + return 0x9; case AARCH64_INSN_MB_NSH: - opt = 0x7; - break; + return 0x7; case AARCH64_INSN_MB_NSHST: - opt = 0x6; - break; + return 0x6; case AARCH64_INSN_MB_NSHLD: - opt = 0x5; - break; + return 0x5; default: - pr_err("%s: unknown dmb type %d\n", __func__, type); + pr_err("%s: unknown barrier type %d\n", __func__, type); return AARCH64_BREAK_FAULT; } +} + +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +{ + u32 opt; + u32 insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; insn = aarch64_insn_get_dmb_value(); insn &= ~GENMASK(11, 8); @@ -1545,6 +1544,21 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) return insn; } +u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type) +{ + u32 opt, insn; + + opt = __get_barrier_crm_val(type); + if (opt == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_dsb_base_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} + u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, enum aarch64_insn_system_register sysreg) { diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 70d7c89d3ac907..634d78422adb27 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -939,7 +941,51 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +/* Clobbers BPF registers 1-4, aka x0-x3 */ +static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) +{ + const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */ + u8 k = get_spectre_bhb_loop_value(); + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + cpu_mitigations_off() || __nospectre_bhb || + arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) + return; + + if (capable(CAP_SYS_ADMIN)) + return; + + if (supports_clearbhb(SCOPE_SYSTEM)) { + emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx); + return; + } + + if (k) { + emit_a64_mov_i64(r1, k, ctx); + emit(A64_B(1), ctx); + emit(A64_SUBS_I(true, r1, r1, 1), ctx); + emit(A64_B_(A64_COND_NE, -2), ctx); + emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx); + emit(aarch64_insn_get_isb_value(), ctx); + } + + if (is_spectre_bhb_fw_mitigated()) { + emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR, + ARM_SMCCC_ARCH_WORKAROUND_3), ctx); + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + emit(aarch64_insn_get_hvc_value(), ctx); + break; + case SMCCC_CONDUIT_SMC: + emit(aarch64_insn_get_smc_value(), ctx); + break; + default: + pr_err_once("Firmware mitigation enabled with unknown conduit\n"); + } + } +} + +static void build_epilogue(struct jit_ctx *ctx, bool was_classic) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 ptr = bpf2a64[TCCNT_PTR]; @@ -952,10 +998,13 @@ static void build_epilogue(struct jit_ctx *ctx) emit(A64_POP(A64_ZR, ptr, A64_SP), ctx); + if (was_classic) + build_bhb_mitigation(ctx); + /* Restore FP/LR registers */ emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); - /* Set return value */ + /* Move the return value from bpf:r0 (aka x7) to x0 */ emit(A64_MOV(1, A64_R(0), r0), ctx); /* Authenticate lr */ @@ -1898,7 +1947,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1961,7 +2010,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_free_hdr; } - build_epilogue(&ctx); + build_epilogue(&ctx, was_classic); build_plt(&ctx); /* Extra pass to validate JITed code. */ diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index f9476848a2edfa..bdf044c5d11b6d 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3536,3 +3536,10 @@ Field 5 F Field 4 P Field 3:0 Align EndSysreg + +Sysreg HPFAR_EL2 3 4 6 0 4 +Field 63 NS +Res0 62:48 +Field 47:4 FIPA +Res0 3:0 +EndSysreg diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 9d01361696a145..ae551b8571374f 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -83,7 +83,26 @@ HYPERCALL3(vcpu_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); -HYPERCALL3(dm_op); + +SYM_FUNC_START(HYPERVISOR_dm_op) + mov x16, #__HYPERVISOR_dm_op; \ + /* + * dm_op hypercalls are issued by the userspace. The kernel needs to + * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 + * translations to user memory via AT instructions. Since AT + * instructions are not affected by the PAN bit (ARMv8.1), we only + * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation + * is enabled (it implies that hardware UAO and PAN disabled). + */ + uaccess_ttbr0_enable x6, x7, x8 + hvc XEN_IMM + + /* + * Disable userspace access from kernel once the hyp call completed. + */ + uaccess_ttbr0_disable x6, x7 + ret +SYM_FUNC_END(HYPERVISOR_dm_op); SYM_FUNC_START(privcmd_call) mov x16, x0 diff --git a/arch/hexagon/configs/comet_defconfig b/arch/hexagon/configs/comet_defconfig index 469c025297c699..c6108f00028876 100644 --- a/arch/hexagon/configs/comet_defconfig +++ b/arch/hexagon/configs/comet_defconfig @@ -72,9 +72,6 @@ CONFIG_INET=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y -CONFIG_CRC16=y -CONFIG_CRC_T10DIF=y CONFIG_FRAME_WARN=0 CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 067c0b994648cc..1a2cf012b8f2f5 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -73,6 +73,7 @@ config LOONGARCH select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_BPF_JIT diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 3177674228f896..45514f314664d8 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -22,22 +22,29 @@ struct sigcontext; #define kernel_fpu_available() cpu_has_fpu -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); - -extern void _init_fpu(unsigned int); -extern void _save_fp(struct loongarch_fpu *); -extern void _restore_fp(struct loongarch_fpu *); - -extern void _save_lsx(struct loongarch_fpu *fpu); -extern void _restore_lsx(struct loongarch_fpu *fpu); -extern void _init_lsx_upper(void); -extern void _restore_lsx_upper(struct loongarch_fpu *fpu); - -extern void _save_lasx(struct loongarch_fpu *fpu); -extern void _restore_lasx(struct loongarch_fpu *fpu); -extern void _init_lasx_upper(void); -extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +void kernel_fpu_begin(void); +void kernel_fpu_end(void); + +asmlinkage void _init_fpu(unsigned int); +asmlinkage void _save_fp(struct loongarch_fpu *); +asmlinkage void _restore_fp(struct loongarch_fpu *); +asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); + +asmlinkage void _save_lsx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lsx(struct loongarch_fpu *fpu); +asmlinkage void _init_lsx_upper(void); +asmlinkage void _restore_lsx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); + +asmlinkage void _save_lasx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lasx(struct loongarch_fpu *fpu); +asmlinkage void _init_lasx_upper(void); +asmlinkage void _restore_lasx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); static inline void enable_lsx(void); static inline void disable_lsx(void); diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h index e671978bf5523f..38566574e56214 100644 --- a/arch/loongarch/include/asm/lbt.h +++ b/arch/loongarch/include/asm/lbt.h @@ -12,9 +12,13 @@ #include #include -extern void _init_lbt(void); -extern void _save_lbt(struct loongarch_lbt *); -extern void _restore_lbt(struct loongarch_lbt *); +asmlinkage void _init_lbt(void); +asmlinkage void _save_lbt(struct loongarch_lbt *); +asmlinkage void _restore_lbt(struct loongarch_lbt *); +asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _save_ftop_context(void __user *ftop); +asmlinkage int _restore_ftop_context(void __user *ftop); static inline int is_lbt_enabled(void) { diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index f3ddaed9ef7f08..e5d21e836d993c 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -33,9 +33,9 @@ struct pt_regs { unsigned long __last[]; } __aligned(8); -static inline int regs_irqs_disabled(struct pt_regs *regs) +static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) { - return arch_irqs_disabled_flags(regs->csr_prmd); + return !(regs->csr_prmd & CSR_PRMD_PIE); } static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) @@ -55,7 +55,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long v /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset diff --git a/arch/loongarch/include/asm/uprobes.h b/arch/loongarch/include/asm/uprobes.h index 99a0d198927f8b..025fc3f0a1028d 100644 --- a/arch/loongarch/include/asm/uprobes.h +++ b/arch/loongarch/include/asm/uprobes.h @@ -15,7 +15,6 @@ typedef u32 uprobe_opcode_t; #define UPROBE_XOLBP_INSN __emit_break(BRK_UPROBE_XOLBP) struct arch_uprobe { - unsigned long resume_era; u32 insn[2]; u32 ixol[2]; bool simulate; diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4853e8b04c6fbe..f9dcaa60033d95 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -21,10 +21,10 @@ obj-$(CONFIG_CPU_HAS_LBT) += lbt.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o -CFLAGS_module.o += $(call cc-option,-Wno-override-init,) -CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) -CFLAGS_traps.o += $(call cc-option,-Wno-override-init,) -CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) +CFLAGS_module.o += $(call cc-disable-warning, override-init) +CFLAGS_syscall.o += $(call cc-disable-warning, override-init) +CFLAGS_traps.o += $(call cc-disable-warning, override-init) +CFLAGS_perf_event.o += $(call cc-disable-warning, override-init) ifdef CONFIG_FUNCTION_TRACER ifndef CONFIG_DYNAMIC_FTRACE diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 6ab640101457cc..28caf416ae36e6 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -458,6 +458,7 @@ SYM_FUNC_START(_save_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_fp_context) +EXPORT_SYMBOL_GPL(_save_fp_context) /* * a0: fpregs @@ -471,6 +472,7 @@ SYM_FUNC_START(_restore_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_fp_context) +EXPORT_SYMBOL_GPL(_restore_fp_context) /* * a0: fpregs @@ -484,6 +486,7 @@ SYM_FUNC_START(_save_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lsx_context) +EXPORT_SYMBOL_GPL(_save_lsx_context) /* * a0: fpregs @@ -497,6 +500,7 @@ SYM_FUNC_START(_restore_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lsx_context) +EXPORT_SYMBOL_GPL(_restore_lsx_context) /* * a0: fpregs @@ -510,6 +514,7 @@ SYM_FUNC_START(_save_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lasx_context) +EXPORT_SYMBOL_GPL(_save_lasx_context) /* * a0: fpregs @@ -523,6 +528,7 @@ SYM_FUNC_START(_restore_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lasx_context) +EXPORT_SYMBOL_GPL(_restore_lasx_context) .L_fpu_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 4f09121417818d..733a7665e434dc 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -16,6 +16,7 @@ #include #include + .section .cpuidle.text, "ax" .align 5 SYM_FUNC_START(__arch_cpu_idle) /* start of idle interrupt region */ @@ -31,14 +32,16 @@ SYM_FUNC_START(__arch_cpu_idle) */ idle 0 /* end of idle interrupt region */ -1: jr ra +idle_exit: + jr ra SYM_FUNC_END(__arch_cpu_idle) + .previous SYM_CODE_START(handle_vint) UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL - la_abs t1, 1b + la_abs t1, idle_exit LONG_L t0, sp, PT_ERA /* 3 instructions idle interrupt region */ ori t0, t0, 0b1100 diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index ec5b28e570c963..4c476904227f95 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -18,11 +18,28 @@ static unsigned int euen_mask = CSR_EUEN_FPEN; static DEFINE_PER_CPU(bool, in_kernel_fpu); static DEFINE_PER_CPU(unsigned int, euen_current); +static inline void fpregs_lock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_bh_disable(); +} + +static inline void fpregs_unlock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_bh_enable(); +} + void kernel_fpu_begin(void) { unsigned int *euen_curr; - preempt_disable(); + if (!irqs_disabled()) + fpregs_lock(); WARN_ON(this_cpu_read(in_kernel_fpu)); @@ -73,7 +90,8 @@ void kernel_fpu_end(void) this_cpu_write(in_kernel_fpu, false); - preempt_enable(); + if (!irqs_disabled()) + fpregs_unlock(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S index 001f061d226ab5..71678912d24ce2 100644 --- a/arch/loongarch/kernel/lbt.S +++ b/arch/loongarch/kernel/lbt.S @@ -90,6 +90,7 @@ SYM_FUNC_START(_save_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lbt_context) +EXPORT_SYMBOL_GPL(_save_lbt_context) /* * a0: scr @@ -110,6 +111,7 @@ SYM_FUNC_START(_restore_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lbt_context) +EXPORT_SYMBOL_GPL(_restore_lbt_context) /* * a0: ftop @@ -120,6 +122,7 @@ SYM_FUNC_START(_save_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_ftop_context) +EXPORT_SYMBOL_GPL(_save_ftop_context) /* * a0: ftop @@ -150,6 +153,7 @@ SYM_FUNC_START(_restore_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_ftop_context) +EXPORT_SYMBOL_GPL(_restore_ftop_context) .L_lbt_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 7a555b60017193..4740cb5b238898 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -51,27 +51,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -/* Assembly functions to move context to/from the FPU */ -extern asmlinkage int -_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); - -#ifdef CONFIG_CPU_HAS_LBT -extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _save_ftop_context(void __user *ftop); -extern asmlinkage int _restore_ftop_context(void __user *ftop); -#endif - struct rt_sigframe { struct siginfo rs_info; struct ucontext rs_uctx; diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index e2d3bfeb636643..bc75a3a69fc8d5 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -111,7 +111,7 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_offset __nosavedata; +static long init_offset; void save_counter(void) { diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 2ec3106c0da3d1..47fc2de6d15018 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -553,9 +553,10 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); #else + bool pie = regs_irqs_disabled(regs); unsigned int *pc; - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); @@ -582,7 +583,7 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); #endif irqentry_exit(regs, state); @@ -621,12 +622,13 @@ static void bug_handler(struct pt_regs *regs) asmlinkage void noinstr do_bce(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned long era = exception_era(regs); u64 badv = 0, lower = 0, upper = ULONG_MAX; union loongarch_instruction insn; irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); current->thread.trap_nr = read_csr_excode(); @@ -692,7 +694,7 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -710,11 +712,12 @@ asmlinkage void noinstr do_bce(struct pt_regs *regs) asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); + bool pie = regs_irqs_disabled(regs); unsigned int opcode, bcode; unsigned long era = exception_era(regs); irqentry_state_t state = irqentry_enter(regs); - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (__get_inst(&opcode, (u32 *)era, user)) @@ -780,7 +783,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) } out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); @@ -1015,6 +1018,7 @@ static void init_restore_lbt(void) asmlinkage void noinstr do_lbt(struct pt_regs *regs) { + bool pie = regs_irqs_disabled(regs); irqentry_state_t state = irqentry_enter(regs); /* @@ -1024,7 +1028,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) * (including the user using 'MOVGR2GCSR' to turn on TM, which * will not trigger the BTE), we need to check PRMD first. */ - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_enable(); if (!cpu_has_lbt) { @@ -1038,7 +1042,7 @@ asmlinkage void noinstr do_lbt(struct pt_regs *regs) preempt_enable(); out: - if (regs->csr_prmd & CSR_PRMD_PIE) + if (!pie) local_irq_disable(); irqentry_exit(regs, state); diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c index 87abc7137b738e..6022eb0f71dbce 100644 --- a/arch/loongarch/kernel/uprobes.c +++ b/arch/loongarch/kernel/uprobes.c @@ -42,7 +42,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; instruction_pointer_set(regs, utask->xol_vaddr); - user_enable_single_step(current); return 0; } @@ -53,13 +52,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); current->thread.trap_nr = utask->autask.saved_trap_nr; - - if (auprobe->simulate) - instruction_pointer_set(regs, auprobe->resume_era); - else - instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); - - user_disable_single_step(current); + instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); return 0; } @@ -70,7 +63,6 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) current->thread.trap_nr = utask->autask.saved_trap_nr; instruction_pointer_set(regs, utask->vaddr); - user_disable_single_step(current); } bool arch_uprobe_xol_was_trapped(struct task_struct *t) @@ -90,7 +82,6 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) insn.word = auprobe->insn[0]; arch_simulate_insn(insn, regs); - auprobe->resume_era = regs->csr_era; return true; } diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index f4c8e35c216a4b..cb41d9265662f4 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -21,4 +21,4 @@ kvm-y += intc/eiointc.o kvm-y += intc/pch_pic.o kvm-y += irqfd.o -CFLAGS_exit.o += $(call cc-option,-Wno-override-init,) +CFLAGS_exit.o += $(call cc-disable-warning, override-init) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 93f4acd445236e..fe734dc062ed47 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -111,7 +111,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { - kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); return ret; } /* Construct the mask by scanning the bit 27-30 */ @@ -127,7 +127,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) - kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); return ret; } diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index d165cd38c6bb38..80ea63d465b8ea 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -296,10 +296,10 @@ int kvm_arch_enable_virtualization_cpu(void) /* * Enable virtualization features granting guest direct control of * certain features: - * GCI=2: Trap on init or unimplement cache instruction. + * GCI=2: Trap on init or unimplemented cache instruction. * TORU=0: Trap on Root Unimplement. * CACTRL=1: Root control cache. - * TOP=0: Trap on Previlege. + * TOP=0: Trap on Privilege. * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8e427b37966123..5af32ec62cb16a 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -294,6 +294,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { + kvm_lose_pmu(vcpu); /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); @@ -902,6 +903,13 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); + + /* + * When vCPU reset, clear the ESTAT and GINTC registers + * Other CSR registers are cleared with function _kvm_setcsr(). + */ + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_GINTC, 0); + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_ESTAT, 0); break; default: ret = -EINVAL; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index e4068906143b33..cea84d7f2b91a1 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -47,7 +47,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd = pmd_offset(pud, addr); } } - return (pte_t *) pmd; + return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd; } uint64_t pmd_to_entrylo(unsigned long pmd_val) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index fdb7f73ad16016..06f11d9e4ec113 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -65,9 +65,6 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; -#ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; -#endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; #endif diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c index 1e0590542f987c..e7b7346592cb2a 100644 --- a/arch/loongarch/power/hibernate.c +++ b/arch/loongarch/power/hibernate.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,7 @@ struct pt_regs saved_regs; void save_processor_state(void) { + save_counter(); saved_crmd = csr_read32(LOONGARCH_CSR_CRMD); saved_prmd = csr_read32(LOONGARCH_CSR_PRMD); saved_euen = csr_read32(LOONGARCH_CSR_EUEN); @@ -26,6 +28,7 @@ void save_processor_state(void) void restore_processor_state(void) { + sync_counter(); csr_write32(saved_crmd, LOONGARCH_CSR_CRMD); csr_write32(saved_prmd, LOONGARCH_CSR_PRMD); csr_write32(saved_euen, LOONGARCH_CSR_EUEN); diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index 67a0d157122daa..110279a64aa447 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig @@ -89,4 +89,3 @@ CONFIG_PANIC_ON_OOPS=y # CONFIG_CRYPTO_ECHAINIV is not set CONFIG_CRYPTO_ANSI_CPRNG=y # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index e324410ef239c0..d26c7f4f8c360a 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -793,7 +793,7 @@ static void __init mac_identify(void) } macintosh_config = mac_data_table; - for (m = macintosh_config; m->ident != -1; m++) { + for (m = &mac_data_table[1]; m->ident != -1; m++) { if (m->ident == model) { macintosh_config = m; break; diff --git a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts index c7ea4f1c0bb21f..6c277ab83d4b94 100644 --- a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts +++ b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts @@ -29,6 +29,7 @@ compatible = "loongson,pch-msi-1.0"; reg = <0 0x2ff00000 0 0x8>; interrupt-controller; + #interrupt-cells = <1>; msi-controller; loongson,msi-base-vec = <64>; loongson,msi-num-vecs = <64>; diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 8caa03a41327d9..cba0b85c6707cb 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -82,7 +82,6 @@ CONFIG_LEDS_GPIO=y # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_PROC_PAGE_MONITOR is not set -CONFIG_CRC_ITU_T=m CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index fe282630b51cb9..8f7c36868204a3 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -238,7 +238,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_T10DIF=m CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 5ab149cd3178e8..114fcd67898d47 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -218,4 +218,3 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 31ca93d3acc59d..f1a8ccf2c45928 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -326,5 +326,4 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=m CONFIG_DEBUG_MEMORY_INIT=y diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index b8907b3d7a331f..5d079941fd207e 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -317,4 +317,3 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_T10DIF=m diff --git a/arch/mips/configs/ip30_defconfig b/arch/mips/configs/ip30_defconfig index 270181a7320a4e..a4524e78546947 100644 --- a/arch/mips/configs/ip30_defconfig +++ b/arch/mips/configs/ip30_defconfig @@ -179,4 +179,3 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_T10DIF=m diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 121e7e48fa7713..d8ac11427f69b0 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -177,7 +177,6 @@ CONFIG_CRYPTO_SERPENT=y CONFIG_CRYPTO_TEA=y CONFIG_CRYPTO_TWOFISH=y CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRC_T10DIF=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig index 128f9abab7fcc7..e2bcdfd290a1d8 100644 --- a/arch/mips/configs/omega2p_defconfig +++ b/arch/mips/configs/omega2p_defconfig @@ -111,7 +111,6 @@ CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y -CONFIG_CRC16=y CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 0261969a6e45e2..42b161d587c778 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -155,5 +155,4 @@ CONFIG_JFFS2_COMPRESSION_OPTIONS=y CONFIG_SQUASHFS=y CONFIG_CRYPTO_TEST=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=m CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index 8404e0a9d8b22f..8f9701efef1984 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -128,7 +128,6 @@ CONFIG_SQUASHFS=y # CONFIG_SQUASHFS_ZLIB is not set CONFIG_SQUASHFS_XZ=y CONFIG_CRYPTO_ARC4=m -CONFIG_CRC_ITU_T=m # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set # CONFIG_XZ_DEC_IA64 is not set diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig index ce855b644bb03d..ae2afff00e01a0 100644 --- a/arch/mips/configs/sb1250_swarm_defconfig +++ b/arch/mips/configs/sb1250_swarm_defconfig @@ -99,4 +99,3 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=m diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig index 917967fed45fe6..2a9a9b12847d3a 100644 --- a/arch/mips/configs/vocore2_defconfig +++ b/arch/mips/configs/vocore2_defconfig @@ -111,7 +111,6 @@ CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y -CONFIG_CRC16=y CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig index 7b91edfe3e0753..aae8497b687285 100644 --- a/arch/mips/configs/xway_defconfig +++ b/arch/mips/configs/xway_defconfig @@ -140,7 +140,6 @@ CONFIG_SQUASHFS=y # CONFIG_SQUASHFS_ZLIB is not set CONFIG_SQUASHFS_XZ=y CONFIG_CRYPTO_ARC4=m -CONFIG_CRC_ITU_T=m CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h index 0992cad9c632ec..c7d75807d13fe6 100644 --- a/arch/mips/include/asm/idle.h +++ b/arch/mips/include/asm/idle.h @@ -6,11 +6,10 @@ #include extern void (*cpu_wait)(void); -extern void r4k_wait(void); -extern asmlinkage void __r4k_wait(void); +extern asmlinkage void r4k_wait(void); extern void r4k_wait_irqoff(void); -static inline int using_rollback_handler(void) +static inline int using_skipover_handler(void) { return cpu_wait == r4k_wait; } diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 85fa9962266a2b..ef72c46b556887 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -65,7 +65,8 @@ static inline void instruction_pointer_set(struct pt_regs *regs, /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET \ + (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index a572ce36a24f21..08c0a01d9a298c 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -104,48 +104,59 @@ handle_vcei: __FINIT - .align 5 /* 32 byte rollback region */ -LEAF(__r4k_wait) - .set push - .set noreorder - /* start of rollback region */ - LONG_L t0, TI_FLAGS($28) - nop - andi t0, _TIF_NEED_RESCHED - bnez t0, 1f - nop - nop - nop -#ifdef CONFIG_CPU_MICROMIPS - nop - nop - nop - nop -#endif + .section .cpuidle.text,"ax" + /* Align to 32 bytes for the maximum idle interrupt region size. */ + .align 5 +LEAF(r4k_wait) + /* Keep the ISA bit clear for calculations on local labels here. */ +0: .fill 0 + /* Start of idle interrupt region. */ + local_irq_enable + /* + * If an interrupt lands here, before going idle on the next + * instruction, we must *NOT* go idle since the interrupt could + * have set TIF_NEED_RESCHED or caused a timer to need resched. + * Fall through -- see skipover_handler below -- and have the + * idle loop take care of things. + */ +1: .fill 0 + /* The R2 EI/EHB sequence takes 8 bytes, otherwise pad up. */ + .if 1b - 0b > 32 + .error "overlong idle interrupt region" + .elseif 1b - 0b > 8 + .align 4 + .endif +2: .fill 0 + .equ r4k_wait_idle_size, 2b - 0b + /* End of idle interrupt region; size has to be a power of 2. */ .set MIPS_ISA_ARCH_LEVEL_RAW +r4k_wait_insn: wait - /* end of rollback region (the region size must be power of two) */ -1: +r4k_wait_exit: + .set mips0 + local_irq_disable jr ra - nop - .set pop - END(__r4k_wait) + END(r4k_wait) + .previous - .macro BUILD_ROLLBACK_PROLOGUE handler - FEXPORT(rollback_\handler) + .macro BUILD_SKIPOVER_PROLOGUE handler + FEXPORT(skipover_\handler) .set push .set noat MFC0 k0, CP0_EPC - PTR_LA k1, __r4k_wait - ori k0, 0x1f /* 32 byte rollback region */ - xori k0, 0x1f + /* Subtract/add 2 to let the ISA bit propagate through the mask. */ + PTR_LA k1, r4k_wait_insn - 2 + ori k0, r4k_wait_idle_size - 2 + .set noreorder bne k0, k1, \handler + PTR_ADDIU k0, r4k_wait_exit - r4k_wait_insn + 2 + .set reorder MTC0 k0, CP0_EPC .set pop .endm .align 5 -BUILD_ROLLBACK_PROLOGUE handle_int +BUILD_SKIPOVER_PROLOGUE handle_int NESTED(handle_int, PT_SIZE, sp) .cfi_signal_frame #ifdef CONFIG_TRACE_IRQFLAGS @@ -265,7 +276,7 @@ NESTED(except_vec_ejtag_debug, 0, sp) * This prototype is copied to ebase + n*IntCtl.VS and patched * to invoke the handler */ -BUILD_ROLLBACK_PROLOGUE except_vec_vi +BUILD_SKIPOVER_PROLOGUE except_vec_vi NESTED(except_vec_vi, 0, sp) SAVE_SOME docfi=1 SAVE_AT docfi=1 diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 5abc8b7340f887..80e8a04a642e05 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -35,13 +35,6 @@ static void __cpuidle r3081_wait(void) write_c0_conf(cfg | R30XX_CONF_HALT); } -void __cpuidle r4k_wait(void) -{ - raw_local_irq_enable(); - __r4k_wait(); - raw_local_irq_disable(); -} - /* * This variant is preferable as it allows testing need_resched and going to * sleep depending on the outcome atomically. Unfortunately the "It is diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index e85bd087467e8c..cc26d56f3ab61a 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -332,6 +332,8 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) mips_cps_cluster_bootcfg = kcalloc(nclusters, sizeof(*mips_cps_cluster_bootcfg), GFP_KERNEL); + if (!mips_cps_cluster_bootcfg) + goto err_out; if (nclusters > 1) mips_cm_update_property(); @@ -348,6 +350,8 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) mips_cps_cluster_bootcfg[cl].core_power = kcalloc(BITS_TO_LONGS(ncores), sizeof(unsigned long), GFP_KERNEL); + if (!mips_cps_cluster_bootcfg[cl].core_power) + goto err_out; /* Allocate VPE boot configuration structs */ for (c = 0; c < ncores; c++) { diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 39e248d0ed59ac..8ec1e185b35ce0 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -77,7 +77,7 @@ #include "access-helper.h" extern void check_wait(void); -extern asmlinkage void rollback_handle_int(void); +extern asmlinkage void skipover_handle_int(void); extern asmlinkage void handle_int(void); extern asmlinkage void handle_adel(void); extern asmlinkage void handle_ades(void); @@ -2066,7 +2066,7 @@ void *set_vi_handler(int n, vi_handler_t addr) { extern const u8 except_vec_vi[]; extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; - extern const u8 rollback_except_vec_vi[]; + extern const u8 skipover_except_vec_vi[]; unsigned long handler; unsigned long old_handler = vi_handlers[n]; int srssets = current_cpu_data.srsets; @@ -2095,7 +2095,7 @@ void *set_vi_handler(int n, vi_handler_t addr) change_c0_srsmap(0xf << n*4, 0 << n*4); } - vec_start = using_rollback_handler() ? rollback_except_vec_vi : + vec_start = using_skipover_handler() ? skipover_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) ori_offset = except_vec_vi_ori - vec_start + 2; @@ -2426,8 +2426,8 @@ void __init trap_init(void) if (board_be_init) board_be_init(); - set_except_vector(EXCCODE_INT, using_rollback_handler() ? - rollback_handle_int : handle_int); + set_except_vector(EXCCODE_INT, using_skipover_handler() ? + skipover_handle_int : handle_int); set_except_vector(EXCCODE_MOD, handle_tlbm); set_except_vector(EXCCODE_TLBL, handle_tlbl); set_except_vector(EXCCODE_TLBS, handle_tlbs); diff --git a/arch/openrisc/include/asm/cacheflush.h b/arch/openrisc/include/asm/cacheflush.h index 984c331ff5f474..0e60af486ec155 100644 --- a/arch/openrisc/include/asm/cacheflush.h +++ b/arch/openrisc/include/asm/cacheflush.h @@ -23,6 +23,9 @@ */ extern void local_dcache_page_flush(struct page *page); extern void local_icache_page_inv(struct page *page); +extern void local_dcache_range_flush(unsigned long start, unsigned long end); +extern void local_dcache_range_inv(unsigned long start, unsigned long end); +extern void local_icache_range_inv(unsigned long start, unsigned long end); /* * Data cache flushing always happen on the local cpu. Instruction cache @@ -38,6 +41,20 @@ extern void local_icache_page_inv(struct page *page); extern void smp_icache_page_inv(struct page *page); #endif /* CONFIG_SMP */ +/* + * Even if the actual block size is larger than L1_CACHE_BYTES, paddr + * can be incremented by L1_CACHE_BYTES. When paddr is written to the + * invalidate register, the entire cache line encompassing this address + * is invalidated. Each subsequent reference to the same cache line will + * not affect the invalidation process. + */ +#define local_dcache_block_flush(addr) \ + local_dcache_range_flush(addr, addr + L1_CACHE_BYTES) +#define local_dcache_block_inv(addr) \ + local_dcache_range_inv(addr, addr + L1_CACHE_BYTES) +#define local_icache_block_inv(addr) \ + local_icache_range_inv(addr, addr + L1_CACHE_BYTES) + /* * Synchronizes caches. Whenever a cpu writes executable code to memory, this * should be called to make sure the processor sees the newly written code. diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 5e4744153d0ec5..3cfc4cf0b01934 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -15,16 +15,21 @@ #ifndef __ASM_OPENRISC_CPUINFO_H #define __ASM_OPENRISC_CPUINFO_H +#include +#include + +struct cache_desc { + u32 size; + u32 sets; + u32 block_size; + u32 ways; +}; + struct cpuinfo_or1k { u32 clock_frequency; - u32 icache_size; - u32 icache_block_size; - u32 icache_ways; - - u32 dcache_size; - u32 dcache_block_size; - u32 dcache_ways; + struct cache_desc icache; + struct cache_desc dcache; u16 coreid; }; @@ -32,4 +37,9 @@ struct cpuinfo_or1k { extern struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS]; extern void setup_cpuinfo(void); +/* + * Check if the cache component exists. + */ +extern bool cpu_cache_is_present(const unsigned int cache_type); + #endif /* __ASM_OPENRISC_CPUINFO_H */ diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile index 79129161f3e031..e4c7d9bdd598d2 100644 --- a/arch/openrisc/kernel/Makefile +++ b/arch/openrisc/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := vmlinux.lds obj-y := head.o setup.o or32_ksyms.o process.o dma.o \ traps.o time.o irq.o entry.o ptrace.o signal.o \ - sys_call_table.o unwinder.o + sys_call_table.o unwinder.o cacheinfo.o obj-$(CONFIG_SMP) += smp.o sync-timer.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/openrisc/kernel/cacheinfo.c b/arch/openrisc/kernel/cacheinfo.c new file mode 100644 index 00000000000000..61230545e4ff6f --- /dev/null +++ b/arch/openrisc/kernel/cacheinfo.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * OpenRISC cacheinfo support + * + * Based on work done for MIPS and LoongArch. All original copyrights + * apply as per the original source declaration. + * + * OpenRISC implementation: + * Copyright (C) 2025 Sahil Siddiq + */ + +#include +#include +#include +#include + +static inline void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, + unsigned int level, struct cache_desc *cache, int cpu) +{ + this_leaf->type = type; + this_leaf->level = level; + this_leaf->coherency_line_size = cache->block_size; + this_leaf->number_of_sets = cache->sets; + this_leaf->ways_of_associativity = cache->ways; + this_leaf->size = cache->size; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); +} + +int init_cache_level(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + int leaves = 0, levels = 0; + unsigned long upr = mfspr(SPR_UPR); + unsigned long iccfgr, dccfgr; + + if (!(upr & SPR_UPR_UP)) { + printk(KERN_INFO + "-- no UPR register... unable to detect configuration\n"); + return -ENOENT; + } + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + dccfgr = mfspr(SPR_DCCFGR); + cpuinfo->dcache.ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo->dcache.sets = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); + cpuinfo->dcache.block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); + cpuinfo->dcache.size = + cpuinfo->dcache.sets * cpuinfo->dcache.ways * cpuinfo->dcache.block_size; + leaves += 1; + printk(KERN_INFO + "-- dcache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->dcache.size, cpuinfo->dcache.block_size, + cpuinfo->dcache.sets, cpuinfo->dcache.ways); + } else + printk(KERN_INFO "-- dcache disabled\n"); + + if (cpu_cache_is_present(SPR_UPR_ICP)) { + iccfgr = mfspr(SPR_ICCFGR); + cpuinfo->icache.ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo->icache.sets = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); + cpuinfo->icache.block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); + cpuinfo->icache.size = + cpuinfo->icache.sets * cpuinfo->icache.ways * cpuinfo->icache.block_size; + leaves += 1; + printk(KERN_INFO + "-- icache: %d bytes total, %d bytes/line, %d set(s), %d way(s)\n", + cpuinfo->icache.size, cpuinfo->icache.block_size, + cpuinfo->icache.sets, cpuinfo->icache.ways); + } else + printk(KERN_INFO "-- icache disabled\n"); + + if (!leaves) + return -ENOENT; + + levels = 1; + + this_cpu_ci->num_leaves = leaves; + this_cpu_ci->num_levels = levels; + + return 0; +} + +int populate_cache_leaves(unsigned int cpu) +{ + struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + int level = 1; + + if (cpu_cache_is_present(SPR_UPR_DCP)) { + ci_leaf_init(this_leaf, CACHE_TYPE_DATA, level, &cpuinfo->dcache, cpu); + this_leaf->attributes = ((mfspr(SPR_DCCFGR) & SPR_DCCFGR_CWS) >> 8) ? + CACHE_WRITE_BACK : CACHE_WRITE_THROUGH; + this_leaf++; + } + + if (cpu_cache_is_present(SPR_UPR_ICP)) + ci_leaf_init(this_leaf, CACHE_TYPE_INST, level, &cpuinfo->icache, cpu); + + this_cpu_ci->cpu_map_populated = true; + + return 0; +} diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index b3edbb33b621d0..3a7b5baaa45066 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -24,9 +25,6 @@ static int page_set_nocache(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - pte_val(*pte) |= _PAGE_CI; /* @@ -36,8 +34,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, flush_tlb_kernel_range(addr, addr + PAGE_SIZE); /* Flush page out of dcache */ - for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(__pa(addr), __pa(next)); return 0; } @@ -98,21 +95,14 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) void arch_sync_dma_for_device(phys_addr_t addr, size_t size, enum dma_data_direction dir) { - unsigned long cl; - struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()]; - switch (dir) { case DMA_TO_DEVICE: /* Flush the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBFR, cl); + local_dcache_range_flush(addr, addr + size); break; case DMA_FROM_DEVICE: /* Invalidate the dcache for the requested range */ - for (cl = addr; cl < addr + size; - cl += cpuinfo->dcache_block_size) - mtspr(SPR_DCBIR, cl); + local_dcache_range_inv(addr, addr + size); break; default: /* diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index be56eaafc8b957..a9fb9cc6779ebd 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -113,21 +113,6 @@ static void print_cpuinfo(void) return; } - if (upr & SPR_UPR_DCP) - printk(KERN_INFO - "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->dcache_size, cpuinfo->dcache_block_size, - cpuinfo->dcache_ways); - else - printk(KERN_INFO "-- dcache disabled\n"); - if (upr & SPR_UPR_ICP) - printk(KERN_INFO - "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo->icache_size, cpuinfo->icache_block_size, - cpuinfo->icache_ways); - else - printk(KERN_INFO "-- icache disabled\n"); - if (upr & SPR_UPR_DMP) printk(KERN_INFO "-- dmmu: %4d entries, %lu way(s)\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), @@ -155,8 +140,6 @@ static void print_cpuinfo(void) void __init setup_cpuinfo(void) { struct device_node *cpu; - unsigned long iccfgr, dccfgr; - unsigned long cache_set_size; int cpu_id = smp_processor_id(); struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id]; @@ -164,20 +147,6 @@ void __init setup_cpuinfo(void) if (!cpu) panic("Couldn't find CPU%d in device tree...\n", cpu_id); - iccfgr = mfspr(SPR_ICCFGR); - cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); - cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); - cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); - cpuinfo->icache_size = - cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size; - - dccfgr = mfspr(SPR_DCCFGR); - cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); - cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); - cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); - cpuinfo->dcache_size = - cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size; - if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo->clock_frequency)) { printk(KERN_WARNING @@ -294,14 +263,14 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned int vr, cpucfgr; unsigned int avr; unsigned int version; +#ifdef CONFIG_SMP struct cpuinfo_or1k *cpuinfo = v; + seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); +#endif vr = mfspr(SPR_VR); cpucfgr = mfspr(SPR_CPUCFGR); -#ifdef CONFIG_SMP - seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid); -#endif if (vr & SPR_VR_UVRP) { vr = mfspr(SPR_VR2); version = vr & SPR_VR2_VER; @@ -320,14 +289,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV); } seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ); - seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size); - seq_printf(m, "dcache block size\t: %d bytes\n", - cpuinfo->dcache_block_size); - seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways); - seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size); - seq_printf(m, "icache block size\t: %d bytes\n", - cpuinfo->icache_block_size); - seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways); seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n", 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW)); diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c index eb43b73f385580..0f265b8e73ec22 100644 --- a/arch/openrisc/mm/cache.c +++ b/arch/openrisc/mm/cache.c @@ -14,31 +14,70 @@ #include #include #include +#include #include -static __always_inline void cache_loop(struct page *page, const unsigned int reg) +/* + * Check if the cache component exists. + */ +bool cpu_cache_is_present(const unsigned int cache_type) { - unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; - unsigned long line = paddr & ~(L1_CACHE_BYTES - 1); + unsigned long upr = mfspr(SPR_UPR); + unsigned long mask = SPR_UPR_UP | cache_type; + + return !((upr & mask) ^ mask); +} + +static __always_inline void cache_loop(unsigned long paddr, unsigned long end, + const unsigned short reg, const unsigned int cache_type) +{ + if (!cpu_cache_is_present(cache_type)) + return; - while (line < paddr + PAGE_SIZE) { - mtspr(reg, line); - line += L1_CACHE_BYTES; + while (paddr < end) { + mtspr(reg, paddr); + paddr += L1_CACHE_BYTES; } } +static __always_inline void cache_loop_page(struct page *page, const unsigned short reg, + const unsigned int cache_type) +{ + unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; + unsigned long end = paddr + PAGE_SIZE; + + paddr &= ~(L1_CACHE_BYTES - 1); + + cache_loop(paddr, end, reg, cache_type); +} + void local_dcache_page_flush(struct page *page) { - cache_loop(page, SPR_DCBFR); + cache_loop_page(page, SPR_DCBFR, SPR_UPR_DCP); } EXPORT_SYMBOL(local_dcache_page_flush); void local_icache_page_inv(struct page *page) { - cache_loop(page, SPR_ICBIR); + cache_loop_page(page, SPR_ICBIR, SPR_UPR_ICP); } EXPORT_SYMBOL(local_icache_page_inv); +void local_dcache_range_flush(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBFR, SPR_UPR_DCP); +} + +void local_dcache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_DCBIR, SPR_UPR_DCP); +} + +void local_icache_range_inv(unsigned long start, unsigned long end) +{ + cache_loop(start, end, SPR_ICBIR, SPR_UPR_ICP); +} + void update_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte) { @@ -58,4 +97,3 @@ void update_cache(struct vm_area_struct *vma, unsigned long address, sync_icache_dcache(folio_page(folio, nr)); } } - diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index be1c2eb8bb9455..e4904ca6f0a088 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -35,6 +35,7 @@ #include #include #include +#include int mem_init_done; @@ -176,8 +177,8 @@ void __init paging_init(void) barrier(); /* Invalidate instruction caches after code modification */ - mtspr(SPR_ICBIR, 0x900); - mtspr(SPR_ICBIR, 0xa00); + local_icache_block_inv(0x900); + local_icache_block_inv(0xa00); /* New TLB miss handlers and kernel page tables are in now place. * Make sure that page flags get updated for all pages in TLB by diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index f5fffc24c3bc5b..5b65c98596131a 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -264,8 +264,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y CONFIG_FONTS=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index 2487765b7be362..ecc9ffcc11cd71 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -292,7 +292,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_DEFLATE=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_STRIP_ASM_SYMS=y diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 34495446e051c2..71829cb7bc812a 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -97,9 +97,19 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0]); - return -1; + int sig = signalcode >> 24; + + if (sig == SIGFPE) { + /* + * Clear floating point trap bit to avoid trapping + * again on the first floating-point instruction in + * the userspace signal handler. + */ + regs->fr[0] &= ~(1ULL << 38); + } + force_sig_fault(sig, signalcode & 0xffffff, + (void __user *) regs->iaoq[0]); + return -1; } return signalcode ? -1 : 0; diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 1db60fe13802db..3d8dc822282ac8 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -234,10 +234,8 @@ fi # suppress some warnings in recent ld versions nowarn="-z noexecstack" -if ! ld_is_lld; then - if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then - nowarn="$nowarn --no-warn-rwx-segments" - fi +if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then + nowarn="$nowarn --no-warn-rwx-segments" fi platformo=$object/"$platform".o diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 2479ab62d12fa6..98221bda380d0b 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -91,5 +91,4 @@ CONFIG_AFFS_FS=m # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 20891c413149ca..5757625469c4ce 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -85,8 +85,6 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig index 1715ff5474427a..b99caba8724a72 100644 --- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig @@ -73,6 +73,5 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_8=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig index e65c0057147f7d..11163052fdbacf 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig @@ -80,5 +80,4 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig index 17714bf0ed40b9..312d39e4242c3f 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig @@ -72,5 +72,4 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index 58fae5131fa7fc..ac27f99faab86e 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -75,6 +75,5 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index 6f58ee1edf1fc6..7beb36a41d4589 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -221,8 +221,6 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_MD5=y diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index e7080497048d5c..0a42072fa23c75 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -60,8 +60,6 @@ CONFIG_CRAMFS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=m CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 3a6381aa9fdc69..488d03ae6d6c28 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -132,7 +132,6 @@ CONFIG_ROOT_NFS=y CONFIG_NFSD=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_T10DIF=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_HMAC=y diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config index 0cb24b33c88e9d..e7bd265fae5a4a 100644 --- a/arch/powerpc/configs/86xx-hw.config +++ b/arch/powerpc/configs/86xx-hw.config @@ -5,7 +5,6 @@ CONFIG_BROADCOM_PHY=y # CONFIG_CARDBUS is not set CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_ST=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_HMAC=y CONFIG_DS1682=y CONFIG_EEPROM_LEGACY=y diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 200bb1ecb56044..69ef3dc31c4b65 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -106,7 +106,6 @@ CONFIG_TMPFS=y CONFIG_AFFS_FS=m CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index fb314f75ad4b5e..b799c95480ae6d 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -110,7 +110,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index d6d2a458847bbf..2f81bc2d819e15 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -15,7 +15,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_SCHED=y CONFIG_CGROUPS=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_CRC_T10DIF=y CONFIG_CPUSETS=y CONFIG_CRAMFS=y CONFIG_CRYPTO_MD4=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 9215bed532919d..7e58f3e6c9870c 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -231,7 +231,6 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index d77eeb525366a3..cdd99657b71b0f 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -82,7 +82,6 @@ CONFIG_ROOT_NFS=y CONFIG_CIFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index fa707de761bed4..b564f9e33a0dfa 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -125,8 +125,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_932=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index 83c4710017e949..a815d9e5e3e8cd 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -97,7 +97,6 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA512=y diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index a0d27c59ea788b..dfbdd5e8e10879 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -38,4 +38,3 @@ CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_CRC_CCITT=y diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index d1c7fd5bf34b31..fa2b3b9c594528 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -107,8 +107,6 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_932=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 61993944db40ad..8bbf51b38480ec 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -159,7 +159,6 @@ CONFIG_NFSD=y CONFIG_NFSD_V4=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index e8b3f67bf3f56b..1bc3466bc909ed 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -276,7 +276,6 @@ CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 8b595f67068c21..41c930f74ed41e 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -90,7 +90,6 @@ CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m -CONFIG_CRC_T10DIF=m CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 4c05f4e4d50560..d2e659a2d8cb97 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -207,7 +207,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 2b175ddf82f0bc..0b48d2b776c443 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -148,8 +148,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_LZO=m -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 3086c4a12d6de6..2b71a6dc399e4d 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -278,8 +278,6 @@ CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y # CONFIG_INTEGRITY is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y -CONFIG_CRC_ITU_T=y # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_IA64 is not set # CONFIG_XZ_DEC_ARM is not set diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 7a978d39699119..e415222bd83980 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -75,4 +75,3 @@ CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 5017a697b67bc3..7c714a19221e10 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -114,7 +114,6 @@ CONFIG_ROOT_NFS=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SPINLOCK=y diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6ac621155ec3c8..9d1ab3971694ae 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -160,9 +160,7 @@ endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),) obj-y += ppc_save_regs.o -endif obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 34a5aec4908fba..126bf3b06ab7e2 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -258,10 +258,6 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, break; } } - if (i == hdr->e_shnum) { - pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); - return -ENOEXEC; - } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 9ac3266e496522..a325c1c02f96dc 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -359,7 +359,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs) if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) is_via_system_reset = 1; - crash_smp_send_stop(); + if (IS_ENABLED(CONFIG_SMP)) + crash_smp_send_stop(); + else + crash_kexec_prepare(); crash_save_cpu(regs, crashing_cpu); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 311e2112d782ea..9f764bc42b8cc8 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -976,7 +976,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, return 0; } - +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) { if (radix_enabled()) @@ -984,6 +984,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap) return false; } +#endif int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, unsigned long addr, unsigned long next) @@ -1120,6 +1121,19 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in pmd_t *pmd; pte_t *pte; + /* + * Make sure we align the start vmemmap addr so that we calculate + * the correct start_pfn in altmap boundary check to decided whether + * we should use altmap or RAM based backing memory allocation. Also + * the address need to be aligned for set_pte operation. + + * If the start addr is already PMD_SIZE aligned we will try to use + * a pmd mapping. We don't want to be too aggressive here beacause + * that will cause more allocations in RAM. So only if the namespace + * vmemmap start addr is PMD_SIZE aligned we will use PMD mapping. + */ + + start = ALIGN_DOWN(start, PAGE_SIZE); for (addr = start; addr < end; addr = next) { next = pmd_addr_end(addr, end); @@ -1145,8 +1159,8 @@ int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, in * in altmap block allocation failures, in which case * we fallback to RAM for vmemmap allocation. */ - if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) || - altmap_cross_boundary(altmap, addr, PMD_SIZE))) { + if (!IS_ALIGNED(addr, PMD_SIZE) || (altmap && + altmap_cross_boundary(altmap, addr, PMD_SIZE))) { /* * make sure we don't create altmap mappings * covering things outside the device. diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d1171..dc6f75d3ac6ef7 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 3fbe0295ce1418..95d7ba73d43d0d 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,7 +17,7 @@ config PPC_POWERNV select MMU_NOTIFIER select FORCE_SMP select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config OPAL_PRD diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 4ac9808e55a44d..2ea30b34335415 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index a934c2a262f66a..fa3c2fff082a87 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -23,7 +23,7 @@ config PPC_PSERIES select FORCE_SMP select SWIOTLB select ARCH_SUPPORTS_PER_VMA_LOCK - select PPC_RADIX_BROADCAST_TLBIE + select PPC_RADIX_BROADCAST_TLBIE if PPC_RADIX_MMU default y config PARAVIRT diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index d6ebc19fb99c51..eec333dd2e598c 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -197,7 +197,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl) static void tce_free_pSeries(struct iommu_table *tbl) { - if (!tbl->it_userspace) + if (tbl->it_userspace) tce_iommu_userspace_view_free(tbl); } diff --git a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi index c18822ec849f35..58cd546392e056 100644 --- a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi @@ -341,7 +341,7 @@ 1024 1024 1024 1024>; snps,priority = <0 1 2 3 4 5 6 7>; snps,dma-masters = <2>; - snps,data-width = <4>; + snps,data-width = <2>; status = "disabled"; }; diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 721ec275ce57e3..231d777d936c2d 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -115,24 +115,19 @@ \old_c .endm -#define _ALTERNATIVE_CFG(old_c, ...) \ - ALTERNATIVE_CFG old_c - -#define _ALTERNATIVE_CFG_2(old_c, ...) \ - ALTERNATIVE_CFG old_c +#define __ALTERNATIVE_CFG(old_c, ...) ALTERNATIVE_CFG old_c +#define __ALTERNATIVE_CFG_2(old_c, ...) ALTERNATIVE_CFG old_c #else /* !__ASSEMBLY__ */ -#define __ALTERNATIVE_CFG(old_c) \ - old_c "\n" +#define __ALTERNATIVE_CFG(old_c, ...) old_c "\n" +#define __ALTERNATIVE_CFG_2(old_c, ...) old_c "\n" -#define _ALTERNATIVE_CFG(old_c, ...) \ - __ALTERNATIVE_CFG(old_c) +#endif /* __ASSEMBLY__ */ -#define _ALTERNATIVE_CFG_2(old_c, ...) \ - __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG(old_c, ...) __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG_2(old_c, ...) __ALTERNATIVE_CFG_2(old_c) -#endif /* __ASSEMBLY__ */ #endif /* CONFIG_RISCV_ALTERNATIVE */ /* diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8de73f91bfa371..b59ffeb668d6a5 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -34,11 +34,6 @@ static inline void flush_dcache_page(struct page *page) flush_dcache_folio(page_folio(page)); } -/* - * RISC-V doesn't have an instruction to flush parts of the instruction cache, - * so instead we just flush the whole thing. - */ -#define flush_icache_range(start, end) flush_icache_all() #define flush_icache_user_page(vma, pg, addr, len) \ do { \ if (vma->vm_flags & VM_EXEC) \ @@ -78,6 +73,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range flush_icache_range +static inline void flush_icache_range(unsigned long start, unsigned long end) +{ + flush_icache_all(); +} + extern unsigned int riscv_cbom_block_size; extern unsigned int riscv_cboz_block_size; void riscv_init_cbo_blocksizes(void); diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h index 46677daf708bd0..cc11c4544cffd1 100644 --- a/arch/riscv/include/asm/kgdb.h +++ b/arch/riscv/include/asm/kgdb.h @@ -19,16 +19,9 @@ #ifndef __ASSEMBLY__ +void arch_kgdb_breakpoint(void); extern unsigned long kgdb_compiled_break; -static inline void arch_kgdb_breakpoint(void) -{ - asm(".global kgdb_compiled_break\n" - ".option norvc\n" - "kgdb_compiled_break: ebreak\n" - ".option rvc\n"); -} - #endif /* !__ASSEMBLY__ */ #define DBG_REG_ZERO "zero" diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 121fff429dce66..eceabf59ae482a 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -62,8 +62,11 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long *args) { args[0] = regs->orig_a0; - args++; - memcpy(args, ®s->a1, 5 * sizeof(args[0])); + args[1] = regs->a1; + args[2] = regs->a2; + args[3] = regs->a3; + args[4] = regs->a4; + args[5] = regs->a5; } static inline int syscall_get_arch(struct task_struct *task) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 8d186bfced451e..f7480c9c6f8d73 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -9,8 +9,8 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif -CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) -CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_table.o += $(call cc-disable-warning, override-init) +CFLAGS_compat_syscall_table.o += $(call cc-disable-warning, override-init) ifdef CONFIG_KEXEC_CORE AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 2e0266ae6bd728..9f3db3503dabd6 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -254,6 +254,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) regs->epc = pc; } +noinline void arch_kgdb_breakpoint(void) +{ + asm(".global kgdb_compiled_break\n" + "kgdb_compiled_break: ebreak\n"); +} + void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, char *remcom_out_buffer) { diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index e264e59e596e80..91d0b355ceeff6 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -73,16 +73,17 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx) static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts, unsigned int *gots) { - unsigned int type, i; - - for (i = 0; i < num; i++) { - type = ELF_RISCV_R_TYPE(relas[i].r_info); - if (type == R_RISCV_CALL_PLT) { + for (int i = 0; i < num; i++) { + switch (ELF_R_TYPE(relas[i].r_info)) { + case R_RISCV_CALL_PLT: + case R_RISCV_PLT32: if (!duplicate_rela(relas, i)) (*plts)++; - } else if (type == R_RISCV_GOT_HI20) { + break; + case R_RISCV_GOT_HI20: if (!duplicate_rela(relas, i)) (*gots)++; + break; } } } diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 47d0ebeec93c23..7f6147c18033b2 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -648,7 +648,7 @@ process_accumulated_relocations(struct module *me, kfree(bucket_iter); } - kfree(*relocation_hashtable); + kvfree(*relocation_hashtable); } static int add_relocation_to_accumulate(struct module *me, int type, @@ -752,9 +752,10 @@ initialize_relocation_hashtable(unsigned int num_relocations, hashtable_size <<= should_double_size; - *relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(**relocation_hashtable), - GFP_KERNEL); + /* Number of relocations may be large, so kvmalloc it */ + *relocation_hashtable = kvmalloc_array(hashtable_size, + sizeof(**relocation_hashtable), + GFP_KERNEL); if (!*relocation_hashtable) return 0; @@ -859,7 +860,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } j++; - if (j > sechdrs[relsec].sh_size / sizeof(*rel)) + if (j == num_relocations) j = 0; } while (j_idx != j); diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 4b3dc8beaf77d3..cc15f7ca6cc17b 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, /* Initialize the slot */ void *kaddr = kmap_atomic(page); void *dst = kaddr + (vaddr & ~PAGE_MASK); + unsigned long start = (unsigned long)dst; memcpy(dst, src, len); @@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, *(uprobe_opcode_t *)dst = __BUG_INSN_32; } + flush_icache_range(start, start + len); kunmap_atomic(kaddr); - - /* - * We probably need flush_icache_user_page() but it needs vma. - * This should work on most of architectures by default. If - * architecture needs to do something different it can define - * its own version of the function. - */ - flush_dcache_page(page); } diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7c244de7718008..15d8f75902f858 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -275,6 +275,9 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) unsigned long pmm; u8 pmlen; + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + if (is_compat_thread(ti)) return -EINVAL; @@ -330,6 +333,9 @@ long get_tagged_addr_ctrl(struct task_struct *task) struct thread_info *ti = task_thread_info(task); long ret = 0; + if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_SUPM)) + return -EINVAL; + if (is_compat_thread(ti)) return -EINVAL; diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c174544eefc8ee..f7c9a1caa83e62 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -66,6 +66,9 @@ static struct resource bss_res = { .name = "Kernel bss", }; static struct resource elfcorehdr_res = { .name = "ELF Core hdr", }; #endif +static int num_standard_resources; +static struct resource *standard_resources; + static int __init add_resource(struct resource *parent, struct resource *res) { @@ -139,7 +142,7 @@ static void __init init_resources(void) struct resource *res = NULL; struct resource *mem_res = NULL; size_t mem_res_sz = 0; - int num_resources = 0, res_idx = 0; + int num_resources = 0, res_idx = 0, non_resv_res = 0; int ret = 0; /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ @@ -193,6 +196,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { res = &mem_res[res_idx--]; + non_resv_res++; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -210,6 +214,9 @@ static void __init init_resources(void) goto error; } + num_standard_resources = non_resv_res; + standard_resources = &mem_res[res_idx + 1]; + /* Clean-up any unused pre-allocated resources */ if (res_idx >= 0) memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res)); @@ -221,6 +228,33 @@ static void __init init_resources(void) memblock_free(mem_res, mem_res_sz); } +static int __init reserve_memblock_reserved_regions(void) +{ + u64 i, j; + + for (i = 0; i < num_standard_resources; i++) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) + continue; + + for_each_reserved_mem_range(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "Reserved"); + } + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); static void __init parse_dtb(void) { diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 8ff8e8b36524b7..9c83848797a78b 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -198,47 +198,57 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); -asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) +enum misaligned_access_type { + MISALIGNED_STORE, + MISALIGNED_LOAD, +}; +static const struct { + const char *type_str; + int (*handler)(struct pt_regs *regs); +} misaligned_handler[] = { + [MISALIGNED_STORE] = { + .type_str = "Oops - store (or AMO) address misaligned", + .handler = handle_misaligned_store, + }, + [MISALIGNED_LOAD] = { + .type_str = "Oops - load address misaligned", + .handler = handle_misaligned_load, + }, +}; + +static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type type) { + irqentry_state_t state; + if (user_mode(regs)) { irqentry_enter_from_user_mode(regs); + local_irq_enable(); + } else { + state = irqentry_nmi_enter(regs); + } - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); + if (misaligned_handler[type].handler(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + misaligned_handler[type].type_str); + if (user_mode(regs)) { + local_irq_disable(); irqentry_exit_to_user_mode(regs); } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_load(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); - irqentry_nmi_exit(regs, state); } } -asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { - if (user_mode(regs)) { - irqentry_enter_from_user_mode(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); - - irqentry_exit_to_user_mode(regs); - } else { - irqentry_state_t state = irqentry_nmi_enter(regs); - - if (handle_misaligned_store(regs)) - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); + do_trap_misaligned(regs, MISALIGNED_LOAD); +} - irqentry_nmi_exit(regs, state); - } +asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) +{ + do_trap_misaligned(regs, MISALIGNED_STORE); } + DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_s, diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 4354c87c0376fd..56f06a27d45fb1 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -88,6 +88,13 @@ #define INSN_MATCH_C_FSWSP 0xe002 #define INSN_MASK_C_FSWSP 0xe003 +#define INSN_MATCH_C_LHU 0x8400 +#define INSN_MASK_C_LHU 0xfc43 +#define INSN_MATCH_C_LH 0x8440 +#define INSN_MASK_C_LH 0xfc43 +#define INSN_MATCH_C_SH 0x8c00 +#define INSN_MASK_C_SH 0xfc43 + #define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) #if defined(CONFIG_64BIT) @@ -268,7 +275,7 @@ static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset, int __ret; \ \ if (user_mode(regs)) { \ - __ret = __get_user(insn, (type __user *) insn_addr); \ + __ret = get_user(insn, (type __user *) insn_addr); \ } else { \ insn = *(type *)insn_addr; \ __ret = 0; \ @@ -431,6 +438,13 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) fp = 1; len = 4; #endif + } else if ((insn & INSN_MASK_C_LHU) == INSN_MATCH_C_LHU) { + len = 2; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LH) == INSN_MATCH_C_LH) { + len = 2; + shift = 8 * (sizeof(ulong) - len); + insn = RVC_RS2S(insn) << SH_RD; } else { regs->epc = epc; return -1; @@ -441,7 +455,7 @@ static int handle_scalar_misaligned_load(struct pt_regs *regs) val.data_u64 = 0; if (user_mode(regs)) { - if (copy_from_user(&val, (u8 __user *)addr, len)) + if (copy_from_user_nofault(&val, (u8 __user *)addr, len)) return -1; } else { memcpy(&val, (u8 *)addr, len); @@ -530,6 +544,9 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) len = 4; val.data_ulong = GET_F32_RS2C(insn, regs); #endif + } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) { + len = 2; + val.data_ulong = GET_RS2S(insn, regs); } else { regs->epc = epc; return -1; @@ -539,7 +556,7 @@ static int handle_scalar_misaligned_store(struct pt_regs *regs) return -EOPNOTSUPP; if (user_mode(regs)) { - if (copy_to_user((u8 __user *)addr, &val, len)) + if (copy_to_user_nofault((u8 __user *)addr, &val, len)) return -1; } else { memcpy((u8 *)addr, &val, len); diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 585d2dcf2dab1c..b8ba13819d05e5 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -439,29 +439,36 @@ static int __init check_unaligned_access_all_cpus(void) { int cpu; - if (unaligned_scalar_speed_param == RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN && - !check_unaligned_access_emulated_all_cpus()) { - check_unaligned_access_speed_all_cpus(); - } else { - pr_info("scalar unaligned access speed set to '%s' by command line\n", - speed_str[unaligned_scalar_speed_param]); + if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { + pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n", + speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param); for_each_online_cpu(cpu) per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; + } else if (!check_unaligned_access_emulated_all_cpus()) { + check_unaligned_access_speed_all_cpus(); + } + + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { + if (!has_vector() && + unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) { + pr_warn("vector support is not available, ignoring unaligned_vector_speed=%s\n", + speed_str[unaligned_vector_speed_param]); + } else { + pr_info("vector unaligned access speed set to '%s' (%lu) by command line\n", + speed_str[unaligned_vector_speed_param], unaligned_vector_speed_param); + } } if (!has_vector()) unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; - if (unaligned_vector_speed_param == RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN && - !check_vector_unaligned_access_emulated_all_cpus() && - IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { - kthread_run(vec_check_unaligned_access_speed_all_cpus, - NULL, "vec_check_unaligned_access_speed_all_cpus"); - } else { - pr_info("vector unaligned access speed set to '%s' by command line\n", - speed_str[unaligned_vector_speed_param]); + if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { for_each_online_cpu(cpu) per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; + } else if (!check_vector_unaligned_access_emulated_all_cpus() && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); } /* diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 60d684c76c5873..02635bac91f175 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -77,6 +77,8 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(cntx, reset_cntx, sizeof(*cntx)); spin_unlock(&vcpu->arch.reset_cntx_lock); + memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); + kvm_riscv_vcpu_fp_reset(vcpu); kvm_riscv_vcpu_vector_reset(vcpu); diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index d1c83a77735e05..0000ecf49b188b 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -143,9 +143,9 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, struct kvm_vcpu *tmp; kvm_for_each_vcpu(i, tmp, vcpu->kvm) { - spin_lock(&vcpu->arch.mp_state_lock); + spin_lock(&tmp->arch.mp_state_lock); WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); - spin_unlock(&vcpu->arch.mp_state_lock); + spin_unlock(&tmp->arch.mp_state_lock); } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index db8161ebb43cbb..99fb986fca6e8d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -332,6 +332,10 @@ config HAVE_MARCH_Z16_FEATURES def_bool n select HAVE_MARCH_Z15_FEATURES +config HAVE_MARCH_Z17_FEATURES + def_bool n + select HAVE_MARCH_Z16_FEATURES + choice prompt "Processor type" default MARCH_Z196 @@ -397,6 +401,14 @@ config MARCH_Z16 Select this to enable optimizations for IBM z16 (3931 and 3932 series). +config MARCH_Z17 + bool "IBM z17" + select HAVE_MARCH_Z17_FEATURES + depends on $(cc-option,-march=z17) + help + Select this to enable optimizations for IBM z17 (9175 and + 9176 series). + endchoice config MARCH_Z10_TUNE @@ -420,6 +432,9 @@ config MARCH_Z15_TUNE config MARCH_Z16_TUNE def_bool TUNE_Z16 || MARCH_Z16 && TUNE_DEFAULT +config MARCH_Z17_TUNE + def_bool TUNE_Z17 || MARCH_Z17 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -464,6 +479,10 @@ config TUNE_Z16 bool "IBM z16" depends on $(cc-option,-mtune=z16) +config TUNE_Z17 + bool "IBM z17" + depends on $(cc-option,-mtune=z17) + endchoice config 64BIT diff --git a/arch/s390/Makefile b/arch/s390/Makefile index b06dc53bfed54c..7679bc16b692bd 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -48,6 +48,7 @@ mflags-$(CONFIG_MARCH_Z13) := -march=z13 mflags-$(CONFIG_MARCH_Z14) := -march=z14 mflags-$(CONFIG_MARCH_Z15) := -march=z15 mflags-$(CONFIG_MARCH_Z16) := -march=z16 +mflags-$(CONFIG_MARCH_Z17) := -march=z17 export CC_FLAGS_MARCH := $(mflags-y) @@ -61,6 +62,7 @@ cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 cflags-$(CONFIG_MARCH_Z14_TUNE) += -mtune=z14 cflags-$(CONFIG_MARCH_Z15_TUNE) += -mtune=z15 cflags-$(CONFIG_MARCH_Z16_TUNE) += -mtune=z16 +cflags-$(CONFIG_MARCH_Z17_TUNE) += -mtune=z17 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6f2c9ce1b15484..24b22f6a9e9959 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -38,7 +38,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -92,7 +91,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y CONFIG_SLUB_STATS=y @@ -395,6 +393,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -405,6 +406,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -628,8 +632,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -654,7 +666,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=y CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -724,11 +735,10 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y -CONFIG_HARDENED_USERCOPY=y -CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_LOCKDOWN_LSM=y @@ -741,6 +751,8 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y +CONFIG_FORTIFY_SOURCE=y +CONFIG_HARDENED_USERCOPY=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -756,7 +768,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -801,7 +812,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -812,9 +822,9 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index f18a7d97ac216d..2b8b42d569bc93 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -36,7 +36,6 @@ CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y @@ -86,7 +85,6 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_IOSCHED_BFQ=y CONFIG_BINFMT_MISC=m CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_SLAB_BUCKETS=y # CONFIG_COMPAT_BRK is not set @@ -385,6 +383,9 @@ CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_EMATCH=y CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -395,6 +396,9 @@ CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_VLAN=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GATE=m CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y @@ -618,8 +622,16 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_MEM=m CONFIG_VIRTIO_INPUT=y +CONFIG_VDPA=m +CONFIG_VDPA_SIM=m +CONFIG_VDPA_SIM_NET=m +CONFIG_VDPA_SIM_BLOCK=m +CONFIG_VDPA_USER=m +CONFIG_MLX5_VDPA_NET=m +CONFIG_VP_VDPA=m CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +CONFIG_VHOST_VDPA=m CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -641,7 +653,6 @@ CONFIG_NILFS2_FS=m CONFIG_BCACHEFS_FS=m CONFIG_BCACHEFS_QUOTA=y CONFIG_BCACHEFS_POSIX_ACL=y -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y @@ -711,6 +722,7 @@ CONFIG_NLS_UTF8=m CONFIG_DLM=m CONFIG_UNICODE=y CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_KEY_NOTIFICATIONS=y CONFIG_SECURITY=y @@ -742,7 +754,6 @@ CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_ARIA=m CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_DES=m @@ -788,7 +799,6 @@ CONFIG_CRYPTO_SHA3_512_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_CHACHA_S390=m CONFIG_CRYPTO_HMAC_S390=m CONFIG_ZCRYPT=m CONFIG_PKEY=m @@ -799,10 +809,10 @@ CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y +CONFIG_CRYPTO_KRB5=m +CONFIG_CRYPTO_KRB5_SELFTESTS=y CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 853b2326a171bb..8163c1702720bc 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -70,7 +70,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y -# CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set diff --git a/arch/s390/include/asm/march.h b/arch/s390/include/asm/march.h index fd9eef3be44ca4..11a71bd1495496 100644 --- a/arch/s390/include/asm/march.h +++ b/arch/s390/include/asm/march.h @@ -33,6 +33,10 @@ #define MARCH_HAS_Z16_FEATURES 1 #endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES +#define MARCH_HAS_Z17_FEATURES 1 +#endif + #endif /* __DECOMPRESSOR */ #endif /* __ASM_S390_MARCH_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dd291c9ad6a61b..9980c17ba22d95 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -602,7 +602,8 @@ SYM_CODE_START(stack_invalid) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_R8(64,%r11),0(%r14) - stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2 + GET_LC %r2 + mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK(%r2) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_invalid diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 33205dd410e470..e657fad7e376f5 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -442,7 +442,7 @@ static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset) ctrset_size = 48; else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5) ctrset_size = 128; - else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7) + else if (cpumf_ctr_info.csvn >= 6 && cpumf_ctr_info.csvn <= 8) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: @@ -858,18 +858,13 @@ static int cpumf_pmu_event_type(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { unsigned int type = event->attr.type; - int err; + int err = -ENOENT; if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) err = __hw_perf_event_init(event, type); else if (event->pmu->type == type) /* Registered as unknown PMU */ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); - else - return -ENOENT; - - if (unlikely(err) && event->destroy) - event->destroy(event); return err; } @@ -1819,8 +1814,6 @@ static int cfdiag_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; err = cfdiag_event_init2(event); - if (unlikely(err)) - event->destroy(event); out: return err; } diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index e4a6bfc910808a..690a293eb10d63 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -237,7 +237,6 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4); CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); - CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); @@ -365,6 +364,83 @@ CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z17, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z17, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z17, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z17, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z17, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z17, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z17, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z17, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z17, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z17, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z17, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z17, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z17, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z17, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_SAMETHRD, 0x00ca); +CPUMF_EVENT_ATTR(cf_z17, CYCLES_DIFFTHRD, 0x00cb); +CPUMF_EVENT_ATTR(cf_z17, INST_SAMETHRD, 0x00cc); +CPUMF_EVENT_ATTR(cf_z17, INST_DIFFTHRD, 0x00cd); +CPUMF_EVENT_ATTR(cf_z17, WRONG_BRANCH_PREDICTION, 0x00ce); +CPUMF_EVENT_ATTR(cf_z17, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z17, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z17, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z17, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z17, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z17, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z17, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z17, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z17, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z17, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_ONCHIP, 0x0110); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_OFFCHIP, 0x0111); +CPUMF_EVENT_ATTR(cf_z17, NNPA_INST_DIFF, 0x0112); +CPUMF_EVENT_ATTR(cf_z17, NNPA_4K_PREFETCH, 0x0114); +CPUMF_EVENT_ATTR(cf_z17, NNPA_COMPL_LOCK, 0x0115); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK, 0x0116); +CPUMF_EVENT_ATTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO, 0x0117); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), @@ -414,7 +490,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_678_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -779,6 +855,87 @@ static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z17_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z17, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z17, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z17, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z17, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z17, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z17, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z17, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z17, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z17, CYCLES_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, CYCLES_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, INST_SAMETHRD), + CPUMF_EVENT_PTR(cf_z17, INST_DIFFTHRD), + CPUMF_EVENT_PTR(cf_z17, WRONG_BRANCH_PREDICTION), + CPUMF_EVENT_PTR(cf_z17, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z17, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z17, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z17, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z17, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z17, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z17, SORTL), + CPUMF_EVENT_PTR(cf_z17, DFLT_CC), + CPUMF_EVENT_PTR(cf_z17, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z17, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z17, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_ONCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_OFFCHIP), + CPUMF_EVENT_PTR(cf_z17, NNPA_INST_DIFF), + CPUMF_EVENT_PTR(cf_z17, NNPA_4K_PREFETCH), + CPUMF_EVENT_PTR(cf_z17, NNPA_COMPL_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK), + CPUMF_EVENT_PTR(cf_z17, NNPA_RETRY_LOCK_WITH_PLO), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z17, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -859,7 +1016,7 @@ __init const struct attribute_group **cpumf_cf_event_group(void) if (ci.csvn >= 1 && ci.csvn <= 5) csvn = cpumcf_svn_12345_pmu_event_attr; else if (ci.csvn >= 6) - csvn = cpumcf_svn_67_pmu_event_attr; + csvn = cpumcf_svn_678_pmu_event_attr; /* Determine model-specific counter set(s) */ get_cpu_id(&cpu_id); @@ -892,6 +1049,10 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 0x3932: model = cpumcf_z16_pmu_event_attr; break; + case 0x9175: + case 0x9176: + model = cpumcf_z17_pmu_event_attr; + break; default: model = none; break; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 5f60248cb46873..ad22799d8a7d91 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -885,9 +885,6 @@ static int cpumsf_pmu_event_init(struct perf_event *event) event->attr.exclude_idle = 0; err = __hw_perf_event_init(event); - if (unlikely(err)) - if (event->destroy) - event->destroy(event); return err; } diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 54e281436a28b9..80b1f7a29f1164 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -294,6 +294,10 @@ static int __init setup_elf_platform(void) case 0x3932: strcpy(elf_platform, "z16"); break; + case 0x9175: + case 0x9176: + strcpy(elf_platform, "z17"); + break; } return 0; } diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9a5d5be8acf41e..d278bf0c09d1b3 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -324,32 +325,87 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u } /** - * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split. + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. * @mm: the mm containing the folio to work on * @folio: the folio - * @split: whether to split a large folio * * Context: Must be called while holding an extra reference to the folio; * the mm lock should not be held. - * Return: 0 if the folio was split successfully; - * -EAGAIN if the folio was not split successfully but another attempt - * can be made, or if @split was set to false; - * -EINVAL in case of other errors. See split_folio(). + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). */ -static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { - int rc; + int rc, tried_splits; lockdep_assert_not_held(&mm->mmap_lock); folio_wait_writeback(folio); lru_add_drain_all(); - if (split) { + + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + folio_lock(folio); rc = split_folio(folio); + if (rc != -EBUSY) { + folio_unlock(folio); + return rc; + } + + /* + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. + */ + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; + } + + /* + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. + */ + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); folio_unlock(folio); - if (rc != -EBUSY) - return rc; + if (unlikely(!inode)) + break; + + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); } return -EAGAIN; } @@ -393,8 +449,11 @@ int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header folio_walk_end(&fw, vma); mmap_read_unlock(mm); - if (rc == -E2BIG || rc == -EBUSY) - rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG); + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } folio_put(folio); return rc; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 610dd44a948b22..a06a000f196ce0 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -95,7 +95,7 @@ static int handle_validity(struct kvm_vcpu *vcpu) vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy, + KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy, current->pid, vcpu->kvm); /* do not warn on invalid runtime instrumentation mode */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2811a6c093b8b4..60c360c18690f6 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3161,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm) if (!gi->origin) return; gisa_clear_ipm(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin); } void kvm_s390_gisa_init(struct kvm *kvm) @@ -3177,7 +3177,7 @@ void kvm_s390_gisa_init(struct kvm *kvm) hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL); memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); gi->origin->next_alert = (u32)virt_to_phys(gi->origin); - VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin); } void kvm_s390_gisa_enable(struct kvm *kvm) @@ -3218,7 +3218,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; - VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); + VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa); } void kvm_s390_gisa_disable(struct kvm *kvm) @@ -3467,7 +3467,7 @@ int __init kvm_s390_gib_init(u8 nisc) } } - KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); + KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc); goto out; out_unreg_gal: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fff863734975eb..3f3175193fd7a7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1022,7 +1022,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); - VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + VM_EVENT(kvm, 3, "New guest asce: 0x%p", (void *) kvm->arch.gmap->asce); break; } @@ -3466,7 +3466,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_gisa_init(kvm); INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); kvm->arch.pv.set_aside = NULL; - KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -3529,7 +3529,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); kvm_s390_vsie_destroy(kvm); - KVM_EVENT(3, "vm 0x%pK destroyed", kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -3650,7 +3650,7 @@ static int sca_switch_to_extended(struct kvm *kvm) free_page((unsigned long)old_sca); - VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)", old_sca, kvm->arch.sca); return 0; } @@ -4027,7 +4027,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto out_free_sie_block; } - VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", + VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p", vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block); diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 9ac92dbf680dbb..9e28f165c114ca 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + TP_printk("create cpu %d at 0x%p, sie block at 0x%p", __entry->id, __entry->vcpu, __entry->sie_block) ); @@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %pK)\n", + TP_printk("enabling channel I/O support (kvm @ %p)\n", __entry->kvm) ); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0776dfde2dba9c..945106b5562db0 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -605,17 +605,15 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, } /* Setup stack and backchain */ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* lgr %w1,%r15 (backchain) */ - EMIT4(0xb9040000, REG_W1, REG_15); + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); /* aghi %r15,-STK_OFF */ EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); - if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) - /* stg %w1,152(%r15) (backchain) */ - EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, - REG_15, 152); + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, + REG_15, 152); } } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 9a929bbcc39722..241f7251c8730f 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -428,6 +428,8 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; } zdev = zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (IS_ERR(zdev)) + return; list_add_tail(&zdev->entry, scan_list); } diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 855f818deb98ef..d5c68ade71ab79 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -53,6 +53,9 @@ static struct facility_def facility_defs[] = { #endif #ifdef CONFIG_HAVE_MARCH_Z15_FEATURES 61, /* miscellaneous-instruction-extension 3 */ +#endif +#ifdef CONFIG_HAVE_MARCH_Z17_FEATURES + 84, /* miscellaneous-instruction-extension 4 */ #endif -1 /* END */ } diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 4464a2ad42ed0f..b6f36c938f1d5c 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -99,4 +99,3 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index ee1b36682155dd..e76694aace2572 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -128,4 +128,3 @@ CONFIG_DEBUG_FS=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index 296ed768cbbb7f..ee3f6db7d8da1f 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -33,4 +33,3 @@ CONFIG_CMDLINE_FROM_BOOTLOADER=y # CONFIG_PROC_FS is not set # CONFIG_SYSFS is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_CRC32 is not set diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index 67716a44463eb9..da176f100e004b 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -110,4 +110,3 @@ CONFIG_NLS_UTF8=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index 77e3185f63e47a..3582af15ad8609 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -56,5 +56,3 @@ CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC16=y -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/kfr2r09-romimage_defconfig b/arch/sh/configs/kfr2r09-romimage_defconfig index 42bf34181a3e09..88fbb65cb9f9e0 100644 --- a/arch/sh/configs/kfr2r09-romimage_defconfig +++ b/arch/sh/configs/kfr2r09-romimage_defconfig @@ -49,4 +49,3 @@ CONFIG_TMPFS=y # CONFIG_NETWORK_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_CRC32 is not set diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index d871623955c59b..924bb3233b0bc4 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -111,4 +111,3 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 6a234761bfd777..0307bb2be79f35 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -58,4 +58,3 @@ CONFIG_ROMFS_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 8d443749550ec0..93b9aa32dc7c74 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -86,5 +86,3 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KOBJECT=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y -CONFIG_CRC_CCITT=m -CONFIG_CRC16=m diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 2d1e65cad2398a..fc2010c241fbb0 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -90,4 +90,3 @@ CONFIG_DEBUG_FS=y CONFIG_CRYPTO_MANAGER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index 6bd6c0ae85d7c9..f28b8c4181c24a 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -105,4 +105,3 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index cde668569cc1dd..3a4239f20ff13b 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -103,4 +103,3 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index c863a11c759207..69568cc4039608 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -87,4 +87,3 @@ CONFIG_MINIX_FS=y CONFIG_NLS_CODEPAGE_932=y CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index 7e4f710d46c737..ecb4bdb5bb58fa 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -92,4 +92,3 @@ CONFIG_MINIX_FS=y CONFIG_NLS_CODEPAGE_932=y CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index cd24cf08210e14..9870d16d971103 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -136,4 +136,3 @@ CONFIG_SH_STANDARD_BIOS=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 472fdf365cad0e..64f9308ee5865d 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -101,6 +101,3 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y -CONFIG_CRC16=y -CONFIG_CRC_ITU_T=y diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 49a4961889dec0..8770a72e6a6310 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -96,4 +96,3 @@ CONFIG_FRAME_POINTER=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index de293792db8414..b15c6406a0e879 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -122,4 +122,3 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_FRAME_POINTER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 96521271758c9d..9501e69eb88644 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -128,4 +128,3 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 48f38ec236b6b0..4d75c92cac10ed 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -120,6 +120,5 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_DEFLATE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index 1b1174a07e3617..cc6292b3235af9 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -193,5 +193,3 @@ CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=y -CONFIG_CRC16=y diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig index 5440bd0ca4ed5b..e6298f22623a9c 100644 --- a/arch/sh/configs/sh7724_generic_defconfig +++ b/arch/sh/configs/sh7724_generic_defconfig @@ -39,4 +39,3 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_CRC32 is not set diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index 57923c3296cc66..b77b3313157e20 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -112,4 +112,3 @@ CONFIG_NLS_UTF8=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig index 4338af8d02d036..2e2b46980b58bb 100644 --- a/arch/sh/configs/sh7770_generic_defconfig +++ b/arch/sh/configs/sh7770_generic_defconfig @@ -41,4 +41,3 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_SYSFS is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_CRC32 is not set diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 8e85f205d8f57c..f022ada363b5b0 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -264,4 +264,3 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC16=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 01b2bdfbf9a892..f1ba0fefe1f94a 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -229,7 +229,6 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC16=m CONFIG_VCC=m CONFIG_PATA_CMD64X=y CONFIG_IP_PNP=y diff --git a/arch/um/Makefile b/arch/um/Makefile index 1d36a613aad83d..9ed792e565c917 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -154,5 +154,6 @@ MRPROPER_FILES += $(HOST_DIR)/include/generated archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -o -name '*.gcov' \) -type f -print | xargs rm -f + $(Q)$(MAKE) -f $(srctree)/Makefile ARCH=$(HEADER_ARCH) clean export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING DEV_NULL_PATH diff --git a/arch/um/include/asm/uaccess.h b/arch/um/include/asm/uaccess.h index 3a08f9029a3f9a..1c6e0ae41b0c5a 100644 --- a/arch/um/include/asm/uaccess.h +++ b/arch/um/include/asm/uaccess.h @@ -55,6 +55,7 @@ do { \ goto err_label; \ } \ *((type *)dst) = get_unaligned((type *)(src)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) @@ -66,6 +67,7 @@ do { \ if (__faulted) \ goto err_label; \ put_unaligned(*((type *)src), (type *)(dst)); \ + barrier(); \ current->thread.segv_continue = NULL; \ } while (0) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ce073150dc20a8..ef2272e92a4321 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -225,20 +225,20 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, panic("Failed to sync kernel TLBs: %d", err); goto out; } - else if (current->mm == NULL) { - if (current->pagefault_disabled) { - if (!mc) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault with pagefaults disabled but no mcontext"); - } - if (!current->thread.segv_continue) { - show_regs(container_of(regs, struct pt_regs, regs)); - panic("Segfault without recovery target"); - } - mc_set_rip(mc, current->thread.segv_continue); - current->thread.segv_continue = NULL; - goto out; + else if (current->pagefault_disabled) { + if (!mc) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault with pagefaults disabled but no mcontext"); } + if (!current->thread.segv_continue) { + show_regs(container_of(regs, struct pt_regs, regs)); + panic("Segfault without recovery target"); + } + mc_set_rip(mc, current->thread.segv_continue); + current->thread.segv_continue = NULL; + goto out; + } + else if (current->mm == NULL) { show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index a05a6ecee75615..6de145f8fe3d93 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused) __func__, errno); } - CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); + CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO)); if (r < 0) printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", __func__, errno); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4b9f378e05f6be..e21cca404943e7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2368,6 +2368,7 @@ config STRICT_SIGALTSTACK_SIZE config CFI_AUTO_DEFAULT bool "Attempt to use FineIBT by default at boot time" depends on FINEIBT + depends on !RUST || RUSTC_VERSION >= 108800 default y help Attempt to use FineIBT by default at boot time. If enabled, @@ -2710,6 +2711,18 @@ config MITIGATION_SSB of speculative execution in a similar way to the Meltdown and Spectre security vulnerabilities. +config MITIGATION_ITS + bool "Enable Indirect Target Selection mitigation" + depends on CPU_SUP_INTEL && X86_64 + depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK + select EXECMEM + default y + help + Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in + BPU on some Intel CPUs that may allow Spectre V2 style attacks. If + disabled, mitigation cannot be enabled via cmdline. + See + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 753b8763abaeb9..d4ce964d97137b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -245,6 +245,76 @@ config MATOM endchoice +config CC_HAS_MARCH_NATIVE + # This flag might not be available in cross-compilers: + def_bool $(cc-option, -march=native) + # LLVM 18 has an easily triggered internal compiler error in core + # networking code with '-march=native' on certain systems: + # https://github.com/llvm/llvm-project/issues/72026 + # LLVM 19 introduces an optimization that resolves some high stack + # usage warnings that only appear wth '-march=native'. + depends on CC_IS_GCC || CLANG_VERSION >= 190100 + + +choice + prompt "x86_64 Compiler Build Optimization" + default GENERIC_CPU + +config X86_NATIVE_CPU + bool "Build and optimize for local/native CPU" + depends on X86_64 + depends on CC_HAS_MARCH_NATIVE + help + Optimize for the current CPU used to compile the kernel. + Use this option if you intend to build the kernel for your + local machine. + + Note that such a kernel might not work optimally on a + different x86 machine. + + If unsure, say N. + +config GENERIC_CPU + bool "Generic-x86-64" + depends on X86_64 + help + Generic x86-64 CPU. + Runs equally well on all x86-64 CPUs. + +config MZEN4 + bool "AMD Ryzen 4" + depends on (CC_IS_GCC && GCC_VERSION >= 130000) || (CC_IS_CLANG && CLANG_VERSION >= 160000) + help + Select this for AMD Family 19h Zen 4 processors. + + Enables -march=znver4 + +endchoice + +config X86_64_VERSION + int "x86-64 compiler ISA level" + range 1 4 + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 && GENERIC_CPU + help + Specify a specific x86-64 compiler ISA level. + + There are three x86-64 ISA levels that work on top of + the x86-64 baseline, namely: x86-64-v2 and x86-64-v3. + + x86-64-v2 brings support for vector instructions up to Streaming SIMD + Extensions 4.2 (SSE4.2) and Supplemental Streaming SIMD Extensions 3 + (SSSE3), the POPCNT instruction, and CMPXCHG16B. + + x86-64-v3 adds vector instructions up to AVX2, MOVBE, and additional + bit-manipulation instructions. + + x86-64-v4 is not included since the kernel does not use AVX512 instructions + + You can find the best version for your CPU by running one of the following: + /lib/ld-linux-x86-64.so.2 --help | grep supported + /lib64/ld-linux-x86-64.so.2 --help | grep supported + config X86_GENERIC bool "Generic x86 support" depends on X86_32 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 594723005d95d9..cbd234e9e743fd 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -173,8 +173,25 @@ else # Use -mskip-rax-setup if supported. KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) +ifdef CONFIG_X86_NATIVE_CPU + KBUILD_CFLAGS += -march=native + KBUILD_RUSTFLAGS += -Ctarget-cpu=native +endif + +ifdef CONFIG_MZEN4 + KBUILD_CFLAGS += -march=znver4 + KBUILD_RUSTFLAGS += -Ctarget-cpu=znver4 +endif + +ifdef CONFIG_GENERIC_CPU +ifeq ($(CONFIG_X86_64_VERSION),1) KBUILD_CFLAGS += -march=x86-64 -mtune=generic KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64 -Ztune-cpu=generic +else + KBUILD_CFLAGS +=-march=x86-64-v$(CONFIG_X86_64_VERSION) + KBUILD_RUSTFLAGS += -Ctarget-cpu=x86-64-v$(CONFIG_X86_64_VERSION) +endif # CONFIG_X86_64_VERSION +endif # CONFIG_GENERIC_CPU KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mcmodel=kernel diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 81f55da8196765..640fcac3af7459 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -59,7 +59,7 @@ KBUILD_CFLAGS += $(CONFIG_CC_IMPLICIT_FALLTHROUGH) $(obj)/bzImage: asflags-y := $(SVGA_MODE) quiet_cmd_image = BUILD $@ - cmd_image = cp $< $@; truncate -s %4K $@; cat $(obj)/vmlinux.bin >>$@ + cmd_image = (dd if=$< bs=4k conv=sync status=none; cat $(filter-out $<,$(real-prereqs))) >$@ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin FORCE $(call if_changed,image) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index dbba332e4a12d7..0e9f84ab4bdcd1 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -38,7 +38,7 @@ void arch_accept_memory(phys_addr_t start, phys_addr_t end) if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sev_snp_enabled()) { + } else if (early_is_sevsnp_guest()) { snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index bb55934c1cee70..0003e4416efd16 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -164,10 +164,7 @@ bool sev_snp_enabled(void) static void __page_state_change(unsigned long paddr, enum psc_op op) { - u64 val; - - if (!sev_snp_enabled()) - return; + u64 val, msr; /* * If private -> shared then invalidate the page before requesting the @@ -176,6 +173,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if (op == SNP_PAGE_STATE_SHARED) pvalidate_4k_page(paddr, paddr, false); + /* Save the current GHCB MSR value */ + msr = sev_es_rd_ghcb_msr(); + /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); VMGEXIT(); @@ -185,6 +185,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + /* Restore the GHCB MSR value */ + sev_es_wr_ghcb_msr(msr); + /* * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. @@ -195,11 +198,17 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) void snp_set_page_private(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE); } void snp_set_page_shared(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_SHARED); } @@ -223,56 +232,10 @@ static bool early_setup_ghcb(void) return true; } -static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc, - phys_addr_t pa, phys_addr_t pa_end) -{ - struct psc_hdr *hdr; - struct psc_entry *e; - unsigned int i; - - hdr = &desc->hdr; - memset(hdr, 0, sizeof(*hdr)); - - e = desc->entries; - - i = 0; - while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) { - hdr->end_entry = i; - - e->gfn = pa >> PAGE_SHIFT; - e->operation = SNP_PAGE_STATE_PRIVATE; - if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) { - e->pagesize = RMP_PG_SIZE_2M; - pa += PMD_SIZE; - } else { - e->pagesize = RMP_PG_SIZE_4K; - pa += PAGE_SIZE; - } - - e++; - i++; - } - - if (vmgexit_psc(boot_ghcb, desc)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pvalidate_pages(desc); - - return pa; -} - void snp_accept_memory(phys_addr_t start, phys_addr_t end) { - struct snp_psc_desc desc = {}; - unsigned int i; - phys_addr_t pa; - - if (!boot_ghcb && !early_setup_ghcb()) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pa = start; - while (pa < end) - pa = __snp_accept_memory(&desc, pa, end); + for (phys_addr_t pa = start; pa < end; pa += PAGE_SIZE) + __page_state_change(pa, SNP_PAGE_STATE_PRIVATE); } void sev_es_shutdown_ghcb(void) @@ -682,3 +645,43 @@ void sev_prep_identity_maps(unsigned long top_level_pgt) sev_verify_cbit(top_level_pgt); } + +bool early_is_sevsnp_guest(void) +{ + static bool sevsnp; + + if (sevsnp) + return true; + + if (!(sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) + return false; + + sevsnp = true; + + if (!snp_vmpl) { + unsigned int eax, ebx, ecx, edx; + + /* + * CPUID Fn8000_001F_EAX[28] - SVSM support + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax & BIT(28)) { + struct msr m; + + /* Obtain the address of the calling area to use */ + boot_rdmsr(MSR_SVSM_CAA, &m); + boot_svsm_caa = (void *)m.q; + boot_svsm_caa_pa = m.q; + + /* + * The real VMPL level cannot be discovered, but the + * memory acceptance routines make no use of that so + * any non-zero value suffices here. + */ + snp_vmpl = U8_MAX; + } + } + return true; +} diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index fc725a981b093b..d3900384b8abb5 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -12,11 +12,15 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 sev_get_status(void); +bool early_is_sevsnp_guest(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 sev_get_status(void) { return 0; } +static inline bool early_is_sevsnp_guest(void) { return false; } #endif diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index b0c1a7a574974a..36beaac713c128 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end) set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); } +static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id) +{ + bool create = event != SVM_VMGEXIT_AP_DESTROY; + struct ghcb_state state; + unsigned long flags; + struct ghcb *ghcb; + int ret = 0; + + local_irq_save(flags); + + ghcb = __sev_get_ghcb(&state); + + vc_ghcb_invalidate(ghcb); + + if (create) + ghcb_set_rax(ghcb, vmsa->sev_features); + + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); + ghcb_set_sw_exit_info_1(ghcb, + ((u64)apic_id << 32) | + ((u64)snp_vmpl << 16) | + event); + ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); + + sev_es_wr_ghcb_msr(__pa(ghcb)); + VMGEXIT(); + + if (!ghcb_sw_exit_info_1_is_valid(ghcb) || + lower_32_bits(ghcb->save.sw_exit_info_1)) { + pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY")); + ret = -EINVAL; + } + + __sev_put_ghcb(&state); + + local_irq_restore(flags); + + return ret; +} + +static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) +{ + int ret; + + if (snp_vmpl) { + struct svsm_call call = {}; + unsigned long flags; + + local_irq_save(flags); + + call.caa = this_cpu_read(svsm_caa); + call.rcx = __pa(va); + + if (make_vmsa) { + /* Protocol 0, Call ID 2 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); + call.rdx = __pa(caa); + call.r8 = apic_id; + } else { + /* Protocol 0, Call ID 3 */ + call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); + } + + ret = svsm_perform_call_protocol(&call); + + local_irq_restore(flags); + } else { + /* + * If the kernel runs at VMPL0, it can change the VMSA + * bit for a page using the RMPADJUST instruction. + * However, for the instruction to succeed it must + * target the permissions of a lesser privileged (higher + * numbered) VMPL level, so use VMPL1. + */ + u64 attrs = 1; + + if (make_vmsa) + attrs |= RMPADJUST_VMSA_PAGE_BIT; + + ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + } + + return ret; +} + +static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) +{ + int err; + + err = snp_set_vmsa(vmsa, NULL, apic_id, false); + if (err) + pr_err("clear VMSA page failed (%u), leaking page\n", err); + else + free_page((unsigned long)vmsa); +} + static void set_pte_enc(pte_t *kpte, int level, void *va) { struct pte_enc_desc d = { @@ -1005,7 +1101,8 @@ static void unshare_all_memory(void) data = per_cpu(runtime_data, cpu); ghcb = (unsigned long)&data->ghcb_page; - if (addr <= ghcb && ghcb <= addr + size) { + /* Handle the case of a huge page containing the GHCB page */ + if (addr <= ghcb && ghcb < addr + size) { skipped_addr = true; break; } @@ -1055,11 +1152,70 @@ void snp_kexec_begin(void) pr_warn("Failed to stop shared<->private conversions\n"); } +/* + * Shutdown all APs except the one handling kexec/kdump and clearing + * the VMSA tag on AP's VMSA pages as they are not being used as + * VMSA page anymore. + */ +static void shutdown_all_aps(void) +{ + struct sev_es_save_area *vmsa; + int apic_id, this_cpu, cpu; + + this_cpu = get_cpu(); + + /* + * APs are already in HLT loop when enc_kexec_finish() callback + * is invoked. + */ + for_each_present_cpu(cpu) { + vmsa = per_cpu(sev_vmsa, cpu); + + /* + * The BSP or offlined APs do not have guest allocated VMSA + * and there is no need to clear the VMSA tag for this page. + */ + if (!vmsa) + continue; + + /* + * Cannot clear the VMSA tag for the currently running vCPU. + */ + if (this_cpu == cpu) { + unsigned long pa; + struct page *p; + + pa = __pa(vmsa); + /* + * Mark the VMSA page of the running vCPU as offline + * so that is excluded and not touched by makedumpfile + * while generating vmcore during kdump. + */ + p = pfn_to_online_page(pa >> PAGE_SHIFT); + if (p) + __SetPageOffline(p); + continue; + } + + apic_id = cpuid_to_apicid[cpu]; + + /* + * Issue AP destroy to ensure AP gets kicked out of guest mode + * to allow using RMPADJUST to remove the VMSA tag on it's + * VMSA page. + */ + vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id); + snp_cleanup_vmsa(vmsa, apic_id); + } + + put_cpu(); +} + void snp_kexec_finish(void) { struct sev_es_runtime_data *data; + unsigned long size, addr; unsigned int level, cpu; - unsigned long size; struct ghcb *ghcb; pte_t *pte; @@ -1069,6 +1225,8 @@ void snp_kexec_finish(void) if (!IS_ENABLED(CONFIG_KEXEC_CORE)) return; + shutdown_all_aps(); + unshare_all_memory(); /* @@ -1085,54 +1243,11 @@ void snp_kexec_finish(void) ghcb = &data->ghcb_page; pte = lookup_address((unsigned long)ghcb, &level); size = page_level_size(level); - set_pte_enc(pte, level, (void *)ghcb); - snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); - } -} - -static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) -{ - int ret; - - if (snp_vmpl) { - struct svsm_call call = {}; - unsigned long flags; - - local_irq_save(flags); - - call.caa = this_cpu_read(svsm_caa); - call.rcx = __pa(va); - - if (make_vmsa) { - /* Protocol 0, Call ID 2 */ - call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); - call.rdx = __pa(caa); - call.r8 = apic_id; - } else { - /* Protocol 0, Call ID 3 */ - call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); - } - - ret = svsm_perform_call_protocol(&call); - - local_irq_restore(flags); - } else { - /* - * If the kernel runs at VMPL0, it can change the VMSA - * bit for a page using the RMPADJUST instruction. - * However, for the instruction to succeed it must - * target the permissions of a lesser privileged (higher - * numbered) VMPL level, so use VMPL1. - */ - u64 attrs = 1; - - if (make_vmsa) - attrs |= RMPADJUST_VMSA_PAGE_BIT; - - ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); + /* Handle the case of a huge page containing the GHCB page */ + addr = (unsigned long)ghcb & page_level_mask(level); + set_pte_enc(pte, level, (void *)addr); + snp_set_memory_private(addr, (size / PAGE_SIZE)); } - - return ret; } #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) @@ -1166,24 +1281,10 @@ static void *snp_alloc_vmsa_page(int cpu) return page_address(p + 1); } -static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) -{ - int err; - - err = snp_set_vmsa(vmsa, NULL, apic_id, false); - if (err) - pr_err("clear VMSA page failed (%u), leaking page\n", err); - else - free_page((unsigned long)vmsa); -} - static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *cur_vmsa, *vmsa; - struct ghcb_state state; struct svsm_ca *caa; - unsigned long flags; - struct ghcb *ghcb; u8 sipi_vector; int cpu, ret; u64 cr4; @@ -1297,33 +1398,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) } /* Issue VMGEXIT AP Creation NAE event */ - local_irq_save(flags); - - ghcb = __sev_get_ghcb(&state); - - vc_ghcb_invalidate(ghcb); - ghcb_set_rax(ghcb, vmsa->sev_features); - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); - ghcb_set_sw_exit_info_1(ghcb, - ((u64)apic_id << 32) | - ((u64)snp_vmpl << 16) | - SVM_VMGEXIT_AP_CREATE); - ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); - - sev_es_wr_ghcb_msr(__pa(ghcb)); - VMGEXIT(); - - if (!ghcb_sw_exit_info_1_is_valid(ghcb) || - lower_32_bits(ghcb->save.sw_exit_info_1)) { - pr_err("SNP AP Creation error\n"); - ret = -EINVAL; - } - - __sev_put_ghcb(&state); - - local_irq_restore(flags); - - /* Perform cleanup if there was an error */ + ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); if (ret) { snp_cleanup_vmsa(vmsa, apic_id); vmsa = NULL; diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index d3caa31240ede5..175958b02f2bfd 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -17,19 +17,20 @@ .pushsection .noinstr.text, "ax" -SYM_FUNC_START(entry_ibpb) +/* Clobbers AX, CX, DX */ +SYM_FUNC_START(write_ibpb) ANNOTATE_NOENDBR movl $MSR_IA32_PRED_CMD, %ecx - movl $PRED_CMD_IBPB, %eax + movl _ASM_RIP(x86_pred_cmd), %eax xorl %edx, %edx wrmsr /* Make sure IBPB clears return stack preductions too. */ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET RET -SYM_FUNC_END(entry_ibpb) +SYM_FUNC_END(write_ibpb) /* For KVM */ -EXPORT_SYMBOL_GPL(entry_ibpb); +EXPORT_SYMBOL_GPL(write_ibpb); .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f40bdf97d390a7..ed04a968cc7d00 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1525,7 +1525,9 @@ SYM_CODE_END(rewind_stack_and_make_dead) * ORC to unwind properly. * * The alignment is for performance and not for safety, and may be safely - * refactored in the future if needed. + * refactored in the future if needed. The .skips are for safety, to ensure + * that all RETs are in the second half of a cacheline to mitigate Indirect + * Target Selection, rather than taking the slowpath via its_return_thunk. */ SYM_FUNC_START(clear_bhb_loop) ANNOTATE_NOENDBR @@ -1536,10 +1538,22 @@ SYM_FUNC_START(clear_bhb_loop) call 1f jmp 5f .align 64, 0xcc + /* + * Shift instructions so that the RET is in the upper half of the + * cacheline and don't take the slowpath to its_return_thunk. + */ + .skip 32 - (.Lret1 - 1f), 0xcc ANNOTATE_INTRA_FUNCTION_CALL 1: call 2f - RET +.Lret1: RET .align 64, 0xcc + /* + * As above shift instructions for RET at .Lret2 as well. + * + * This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc + * but some Clang versions (e.g. 18) don't like this. + */ + .skip 32 - 18, 0xcc 2: movl $5, %eax 3: jmp 4f nop @@ -1547,7 +1561,7 @@ SYM_FUNC_START(clear_bhb_loop) jnz 3b sub $1, %ecx jnz 1b - RET +.Lret2: RET 5: lfence pop %rbp RET diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 49c26ce2b11522..a6fa01ef35a10e 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -38,7 +38,6 @@ struct amd_uncore_ctx { int refcnt; int cpu; struct perf_event **events; - struct hlist_node node; }; struct amd_uncore_pmu { @@ -890,6 +889,39 @@ static void amd_uncore_umc_start(struct perf_event *event, int flags) perf_event_update_userpage(event); } +static void amd_uncore_umc_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new, shift; + s64 delta; + + shift = COUNTER_SHIFT + 1; + prev = local64_read(&hwc->prev_count); + + /* + * UMC counters do not have RDPMC assignments. Read counts directly + * from the corresponding PERF_CTR. + */ + rdmsrl(hwc->event_base, new); + + /* + * Unlike the other uncore counters, UMC counters saturate and set the + * Overflow bit (bit 48) on overflow. Since they do not roll over, + * proactively reset the corresponding PERF_CTR when bit 47 is set so + * that the counter never gets a chance to saturate. + */ + if (new & BIT_ULL(63 - COUNTER_SHIFT)) { + wrmsrl(hwc->event_base, 0); + local64_set(&hwc->prev_count, 0); + } else { + local64_set(&hwc->prev_count, new); + } + + delta = (new << shift) - (prev << shift); + delta >>= shift; + local64_add(delta, &event->count); +} + static void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { @@ -968,7 +1000,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) .del = amd_uncore_del, .start = amd_uncore_umc_start, .stop = amd_uncore_stop, - .read = amd_uncore_read, + .read = amd_uncore_umc_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .module = THIS_MODULE, }; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6866cc5acb0b57..139ad80d1df34f 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -629,7 +629,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (!event->attr.freq && x86_pmu.limit_period) { + if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) @@ -754,7 +754,7 @@ void x86_pmu_enable_all(int added) } } -static inline int is_x86_event(struct perf_event *event) +int is_x86_event(struct perf_event *event) { int i; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09d2d66c9f21fa..c5f385413392b1 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3049,7 +3049,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int bit; int handled = 0; - u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); inc_irq_stat(apic_perf_irqs); @@ -3093,7 +3092,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) handled++; x86_pmu_handle_guest_pebs(regs, &data); static_call(x86_pmu_drain_pebs)(regs, &data); - status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* * PMI throttle may be triggered, which stops the PEBS event. @@ -3104,6 +3102,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (pebs_enabled != cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + + /* + * Above PEBS handler (PEBS counters snapshotting) has updated fixed + * counter 3 and perf metrics counts if they are in counter group, + * unnecessary to update again. + */ + if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && + is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) + status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; } /* @@ -3123,6 +3130,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) static_call(intel_pmu_update_topdown_event)(NULL, NULL); } + status &= hybrid(cpuc->pmu, intel_ctrl); + /* * Checkpointed counters can lead to 'spurious' PMIs because the * rollback caused by the PMI will have cleared the overflow status @@ -4386,7 +4395,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { @@ -7305,8 +7314,17 @@ __init int intel_pmu_init(void) name = "meteorlake_hybrid"; break; + case INTEL_PANTHERLAKE_L: + pr_cont("Pantherlake Hybrid events, "); + name = "pantherlake_hybrid"; + goto lnl_common; + case INTEL_LUNARLAKE_M: case INTEL_ARROWLAKE: + pr_cont("Lunarlake Hybrid events, "); + name = "lunarlake_hybrid"; + + lnl_common: intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = lnl_latency_data; @@ -7328,8 +7346,6 @@ __init int intel_pmu_init(void) intel_pmu_init_skt(&pmu->pmu); intel_pmu_pebs_data_source_lnl(); - pr_cont("Lunarlake Hybrid events, "); - name = "lunarlake_hybrid"; break; case INTEL_ARROWLAKE_H: diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 1f7e1a692a7a48..8d86e91bd5e5c3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1399,8 +1399,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) * + precise_ip < 2 for the non event IP * + For RTM TSX weight we need GPRs for the abort code. */ - gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_GP_REGS); + gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS)) || + ((sample_type & PERF_SAMPLE_REGS_USER) && + (attr->sample_regs_user & PEBS_GP_REGS)); tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == @@ -2123,7 +2125,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, regs->flags &= ~PERF_EFLAGS_EXACT; } - if (sample_type & PERF_SAMPLE_REGS_INTR) + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) adaptive_pebs_save_regs(regs, gprs); } @@ -2377,8 +2379,25 @@ __intel_pmu_pebs_last_event(struct perf_event *event, */ intel_pmu_save_and_restart_reload(event, count); } - } else - intel_pmu_save_and_restart(event); + } else { + /* + * For a non-precise event, it's possible the + * counters-snapshotting records a positive value for the + * overflowed event. Then the HW auto-reload mechanism + * reset the counter to 0 immediately, because the + * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD + * is not set. The counter backwards may be observed in a + * PMI handler. + * + * Since the event value has been updated when processing the + * counters-snapshotting record, only needs to set the new + * period for the counter. + */ + if (is_pebs_counter_event_group(event)) + static_call(x86_pmu_set_period)(event); + else + intel_pmu_save_and_restart(event); + } } static __always_inline void @@ -2446,8 +2465,9 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_ setup_pebs_fixed_sample_data); } -static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask) { + u64 pebs_enabled = cpuc->pebs_enabled & mask; struct perf_event *event; int bit; @@ -2458,7 +2478,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int * It needs to call intel_pmu_save_and_restart_reload() to * update the event->count for this case. */ - for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { + for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) { event = cpuc->events[bit]; if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_save_and_restart_reload(event, 0); @@ -2493,7 +2513,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d } if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, size); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } @@ -2607,7 +2627,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED); if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 60973c209c0e64..76d96df1475a1c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4891,28 +4891,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), /* Free-Running IIO BANDWIDTH IN Counters */ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), { /* end: all zeroes */ }, }; @@ -5485,37 +5485,6 @@ static struct freerunning_counters icx_iio_freerunning[] = { [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, }; -static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type icx_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 9, @@ -5523,7 +5492,7 @@ static struct intel_uncore_type icx_uncore_iio_free_running = { .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, .freerunning = icx_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = icx_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; @@ -6320,69 +6289,13 @@ static struct freerunning_counters spr_iio_freerunning[] = { [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, }; -static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - /* Free-Running IIO BANDWIDTH OUT Counters */ - INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type spr_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 17, .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, .freerunning = spr_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = spr_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2c0ce0e9545e50..46d120597babe5 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -110,14 +110,21 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +int is_x86_event(struct perf_event *event); + +static inline bool check_leader_group(struct perf_event *leader, int flags) +{ + return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false; +} + static inline bool is_branch_counters_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; + return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS); } static inline bool is_pebs_counter_event_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_PEBS_CNTR; + return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); } struct amd_nb { diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index ddeb40930bc802..3ca16e1dbbb833 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -706,3 +706,36 @@ bool hv_is_hyperv_initialized(void) return hypercall_msr.enable; } EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); + +int hv_apicid_to_vp_index(u32 apic_id) +{ + u64 control; + u64 status; + unsigned long irq_flags; + struct hv_get_vp_from_apic_id_in *input; + u32 *output, ret; + + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + input->partition_id = HV_PARTITION_ID_SELF; + input->apic_ids[0] = apic_id; + + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_INDEX_FROM_APIC_ID; + status = hv_do_hypercall(control, input, output); + ret = output[0]; + + local_irq_restore(irq_flags); + + if (!hv_result_success(status)) { + pr_err("failed to get vp index from apic id %d, status %#llx\n", + apic_id, status); + return -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_index); diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 13242ed8ff16fe..5d59e1e05e6491 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -206,41 +206,9 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) return ret; } -static int hv_vtl_apicid_to_vp_id(u32 apic_id) -{ - u64 control; - u64 status; - unsigned long irq_flags; - struct hv_get_vp_from_apic_id_in *input; - u32 *output, ret; - - local_irq_save(irq_flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - memset(input, 0, sizeof(*input)); - input->partition_id = HV_PARTITION_ID_SELF; - input->apic_ids[0] = apic_id; - - output = *this_cpu_ptr(hyperv_pcpu_output_arg); - - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; - status = hv_do_hypercall(control, input, output); - ret = output[0]; - - local_irq_restore(irq_flags); - - if (!hv_result_success(status)) { - pr_err("failed to get vp id from apic id %d, status %#llx\n", - apic_id, status); - return -EINVAL; - } - - return ret; -} - static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { - int vp_id, cpu; + int vp_index, cpu; /* Find the logical CPU for the APIC ID */ for_each_present_cpu(cpu) { @@ -251,18 +219,18 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) return -EINVAL; pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); - vp_id = hv_vtl_apicid_to_vp_id(apicid); + vp_index = hv_apicid_to_vp_index(apicid); - if (vp_id < 0) { + if (vp_index < 0) { pr_err("Couldn't find CPU with APIC ID %d\n", apicid); return -EINVAL; } - if (vp_id > ms_hyperv.max_vp_index) { - pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid); + if (vp_index > ms_hyperv.max_vp_index) { + pr_err("Invalid CPU id %d for APIC ID %d\n", vp_index, apicid); return -EINVAL; } - return hv_vtl_bringup_vcpu(vp_id, cpu, start_eip); + return hv_vtl_bringup_vcpu(vp_index, cpu, start_eip); } int __init hv_vtl_early_init(void) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 77bf05f06b9efa..0cc239cdb4dad8 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -288,7 +289,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) __get_free_page(GFP_KERNEL | __GFP_ZERO); @@ -297,10 +298,27 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) u64 ret, retry = 5; struct hv_enable_vp_vtl *start_vp_input; unsigned long flags; + int cpu, vp_index; if (!vmsa) return -ENOMEM; + /* Find the Hyper-V VP index which might be not the same as APIC ID */ + vp_index = hv_apicid_to_vp_index(apic_id); + if (vp_index < 0 || vp_index > ms_hyperv.max_vp_index) + return -EINVAL; + + /* + * Find the Linux CPU number for addressing the per-CPU data, and it + * might not be the same as APIC ID. + */ + for_each_present_cpu(cpu) { + if (arch_match_cpu_phys_id(cpu, apic_id)) + break; + } + if (cpu >= nr_cpu_ids) + return -EINVAL; + native_store_gdt(&gdtr); vmsa->gdtr.base = gdtr.address; @@ -348,7 +366,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; memset(start_vp_input, 0, sizeof(*start_vp_input)); start_vp_input->partition_id = -1; - start_vp_input->vp_index = cpu; + start_vp_input->vp_index = vp_index; start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 4a37a8bd87fdfa..f2294784babc30 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -6,6 +6,7 @@ #include #include #include +#include #define ALT_FLAGS_SHIFT 16 @@ -124,6 +125,37 @@ static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, } #endif +#ifdef CONFIG_MITIGATION_ITS +extern void its_init_mod(struct module *mod); +extern void its_fini_mod(struct module *mod); +extern void its_free_mod(struct module *mod); +extern u8 *its_static_thunk(int reg); +#else /* CONFIG_MITIGATION_ITS */ +static inline void its_init_mod(struct module *mod) { } +static inline void its_fini_mod(struct module *mod) { } +static inline void its_free_mod(struct module *mod) { } +static inline u8 *its_static_thunk(int reg) +{ + WARN_ONCE(1, "ITS not compiled in"); + + return NULL; +} +#endif + +#if defined(CONFIG_MITIGATION_RETHUNK) && defined(CONFIG_OBJTOOL) +extern bool cpu_wants_rethunk(void); +extern bool cpu_wants_rethunk_at(void *addr); +#else +static __always_inline bool cpu_wants_rethunk(void) +{ + return false; +} +static __always_inline bool cpu_wants_rethunk_at(void *addr) +{ + return false; +} +#endif + #ifdef CONFIG_SMP extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6c2c152d8a67b9..30144ef9ef02fb 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -75,7 +75,7 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -/* Free ( 3*32+ 6) */ +#define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */ /* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ @@ -481,6 +481,7 @@ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32 + 9) /* Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) @@ -533,4 +534,6 @@ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +#define X86_BUG_ITS X86_BUG(1*32 + 6) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 3a97a7eefb512e..be10c188614fe2 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -126,6 +126,8 @@ #define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */ #define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) +#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */ + /* "Hybrid" Processors (P-Core/E-Core) */ #define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a884ab544335e7..7bc174a1f1cb8c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -1472,8 +1473,13 @@ struct kvm_arch { struct once nx_once; #ifdef CONFIG_X86_64 - /* The number of TDP MMU pages across all roots. */ +#ifdef CONFIG_KVM_PROVE_MMU + /* + * The number of TDP MMU pages across all roots. Used only to sanity + * check that KVM isn't leaking TDP MMU pages. + */ atomic64_t tdp_mmu_pages; +#endif /* * List of struct kvm_mmu_pages being used as roots. @@ -2418,4 +2424,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 695e569159c1d1..be7cddc414e4fb 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -17,10 +17,12 @@ struct ucode_cpu_info { void load_ucode_bsp(void); void load_ucode_ap(void); void microcode_bsp_resume(void); +bool __init microcode_loader_disabled(void); #else static inline void load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void microcode_bsp_resume(void) { } +static inline bool __init microcode_loader_disabled(void) { return false; } #endif extern unsigned long initrd_start_early; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index bab5ccfc60a748..0b9a3a307d0655 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -268,11 +268,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); #ifdef CONFIG_AMD_MEM_ENCRYPT bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip); +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; } +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { return 0; } #endif #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) @@ -306,6 +306,7 @@ static __always_inline u64 hv_raw_get_msr(unsigned int reg) { return __rdmsr(reg); } +int hv_apicid_to_vp_index(u32 apic_id); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} @@ -327,6 +328,7 @@ static inline void hv_set_msr(unsigned int reg, u64 value) { } static inline u64 hv_get_msr(unsigned int reg) { return 0; } static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { } static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } +static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; } #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e6134ef2263d50..e7d2f460fcc699 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -211,6 +211,14 @@ * VERW clears CPU Register * File. */ +#define ARCH_CAP_ITS_NO BIT_ULL(62) /* + * Not susceptible to + * Indirect Target Selection. + * This bit is not set by + * HW, but is synthesized by + * VMMs for guests to know + * their affected status. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index ce857ef54cf158..54dc313bcdf018 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -116,13 +116,10 @@ static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx) static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { - if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { - mb(); - clflush((void *)¤t_thread_info()->flags); - mb(); - } + const void *addr = ¤t_thread_info()->flags; - __monitor((void *)¤t_thread_info()->flags, 0, 0); + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); if (!need_resched()) { if (ecx & 1) { diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8a5cc8e70439e1..7d04ade3354115 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -269,7 +269,7 @@ * typically has NO_MELTDOWN). * * While retbleed_untrain_ret() doesn't clobber anything but requires stack, - * entry_ibpb() will clobber AX, CX, DX. + * write_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point * where we have a stack but before any RET instruction. @@ -279,7 +279,7 @@ VALIDATE_UNRET_END CALL_UNTRAIN_RET ALTERNATIVE_2 "", \ - "call entry_ibpb", \ibpb_feature, \ + "call write_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif .endm @@ -336,10 +336,14 @@ #else /* __ASSEMBLER__ */ +#define ITS_THUNK_SIZE 64 + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; +typedef u8 its_thunk_t[ITS_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; +extern its_thunk_t __x86_indirect_its_thunk_array[]; #ifdef CONFIG_MITIGATION_RETHUNK extern void __x86_return_thunk(void); @@ -363,12 +367,18 @@ static inline void srso_return_thunk(void) {} static inline void srso_alias_return_thunk(void) {} #endif +#ifdef CONFIG_MITIGATION_ITS +extern void its_return_thunk(void); +#else +static inline void its_return_thunk(void) {} +#endif + extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); -extern void entry_ibpb(void); +extern void write_ibpb(void); #ifdef CONFIG_X86_64 extern void clear_bhb_loop(void); @@ -514,11 +524,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } -extern u64 x86_pred_cmd; - static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_IBPB); + asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB) + : ASM_CALL_CONSTRAINT + :: "rax", "rcx", "rdx", "memory"); } /* The Intel SPEC CTRL MSR base value cache */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index b3ab80a03365cf..5e883b397ff3f2 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -26,6 +26,7 @@ struct pci_sysdata { #if IS_ENABLED(CONFIG_VMD) struct pci_dev *vmd_dev; /* VMD Device if in Intel VMD domain */ #endif + struct pci_dev *nvme_remap_dev; /* AHCI Device if NVME remapped bus */ }; extern int pci_routeirq; @@ -69,6 +70,11 @@ static inline bool is_vmd(struct pci_bus *bus) #define is_vmd(bus) false #endif /* CONFIG_VMD */ +static inline bool is_nvme_remap(struct pci_bus *bus) +{ + return to_pci_sysdata(bus)->nvme_remap_dev != NULL; +} + /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index a33147520044b4..c88691b15f3c67 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -6,6 +6,8 @@ #include /* for struct page */ #include +#include + #define __HAVE_ARCH_PTE_ALLOC_ONE #define __HAVE_ARCH_PGD_FREE #include @@ -29,16 +31,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {} static inline void paravirt_release_p4d(unsigned long pfn) {} #endif -#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* - * Instead of one PGD, we acquire two PGDs. Being order-1, it is - * both 8k in size and 8k-aligned. That lets us just flip bit 12 - * in a pointer to swap between the two 4k halves. + * In case of Page Table Isolation active, we acquire two PGDs instead of one. + * Being order-1, it is both 8k in size and 8k-aligned. That lets us just + * flip bit 12 in a pointer to swap between the two 4k halves. */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif +static inline unsigned int pgd_allocation_order(void) +{ + if (cpu_feature_enabled(X86_FEATURE_PTI)) + return 1; + return 0; +} /* * Allocate and free page tables. diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index acb85b9346d84b..0020d77a080001 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -116,7 +116,7 @@ enum psc_op { #define GHCB_MSR_VMPL_REQ 0x016 #define GHCB_MSR_VMPL_REQ_LEVEL(v) \ /* GHCBData[39:32] */ \ - (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \ + ((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \ /* GHCBDdata[11:0] */ \ GHCB_MSR_VMPL_REQ) diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index e770c4fc47f4c5..8727c7e21dd1e6 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +/* + * To prevent immediate repeat of single step trap on return from SIGTRAP + * handler if the trap flag (TF) is set without an external debugger attached, + * clear the software event flag in the augmented SS, ensuring no single-step + * trap is pending upon ERETU completion. + * + * Note, this function should be called in sigreturn() before the original + * state is restored to make sure the TF is read from the entry frame. + */ +static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs) +{ + /* + * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction + * is being single-stepped, do not clear the software event flag in the + * augmented SS, thus a debugger won't skip over the following instruction. + */ +#ifdef CONFIG_X86_FRED + if (!(regs->flags & X86_EFLAGS_TF)) + regs->fred_ss.swevent = 0; +#endif +} + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 55a5e656e4b9ea..4f84d421d1cf29 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -16,23 +16,23 @@ #ifdef __ASSEMBLER__ #define ASM_CLAC \ - ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "clac", X86_FEATURE_SMAP + ALTERNATIVE "", "clac", X86_FEATURE_SMAP #define ASM_STAC \ - ALTERNATIVE __stringify(ANNOTATE_IGNORE_ALTERNATIVE), "stac", X86_FEATURE_SMAP + ALTERNATIVE "", "stac", X86_FEATURE_SMAP #else /* __ASSEMBLER__ */ static __always_inline void clac(void) { /* Note: a barrier is implicit in alternative() */ - alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP); + alternative("", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ - alternative(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP); + alternative("", "stac", X86_FEATURE_SMAP); } static __always_inline unsigned long smap_save(void) @@ -59,9 +59,9 @@ static __always_inline void smap_restore(unsigned long flags) /* These macros can be used in asm() statements */ #define ASM_CLAC \ - ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "clac", X86_FEATURE_SMAP) + ALTERNATIVE("", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ - ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "stac", X86_FEATURE_SMAP) + ALTERNATIVE("", "stac", X86_FEATURE_SMAP) #define ASM_CLAC_UNSAFE \ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index dae6a73be40e1c..9fa321a95eb33f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -1729,6 +1731,15 @@ int __init acpi_mps_check(void) { #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE) /* mptable code is not built-in*/ + + /* + * Xen disables ACPI in PV DomU guests but it still emulates APIC and + * supports SMP. Returning early here ensures that APIC is not disabled + * unnecessarily and the guest is not limited to a single vCPU. + */ + if (xen_pv_domain() && !xen_initial_domain()) + return 0; + if (acpi_disabled || acpi_noirq) { pr_warn("MPS support code is not built-in, using acpi=off or acpi=noirq or pci=noacpi may have problem\n"); return 1; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index bf82c6f7d69055..45bcff181cbae9 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,8 @@ #include #include #include +#include +#include int __read_mostly alternatives_patched; @@ -124,6 +127,171 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] = #endif }; +#ifdef CONFIG_FINEIBT +static bool cfi_paranoid __ro_after_init; +#endif + +#ifdef CONFIG_MITIGATION_ITS + +#ifdef CONFIG_MODULES +static struct module *its_mod; +#endif +static void *its_page; +static unsigned int its_offset; + +/* Initialize a thunk with the "jmp *reg; int3" instructions. */ +static void *its_init_thunk(void *thunk, int reg) +{ + u8 *bytes = thunk; + int offset = 0; + int i = 0; + +#ifdef CONFIG_FINEIBT + if (cfi_paranoid) { + /* + * When ITS uses indirect branch thunk the fineibt_paranoid + * caller sequence doesn't fit in the caller site. So put the + * remaining part of the sequence ( + JNE) into the ITS + * thunk. + */ + bytes[i++] = 0xea; /* invalid instruction */ + bytes[i++] = 0x75; /* JNE */ + bytes[i++] = 0xfd; + + offset = 1; + } +#endif + + if (reg >= 8) { + bytes[i++] = 0x41; /* REX.B prefix */ + reg -= 8; + } + bytes[i++] = 0xff; + bytes[i++] = 0xe0 + reg; /* jmp *reg */ + bytes[i++] = 0xcc; + + return thunk + offset; +} + +#ifdef CONFIG_MODULES +void its_init_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + mutex_lock(&text_mutex); + its_mod = mod; + its_page = NULL; +} + +void its_fini_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + WARN_ON_ONCE(its_mod != mod); + + its_mod = NULL; + its_page = NULL; + mutex_unlock(&text_mutex); + + for (int i = 0; i < mod->its_num_pages; i++) { + void *page = mod->its_page_array[i]; + execmem_restore_rox(page, PAGE_SIZE); + } +} + +void its_free_mod(struct module *mod) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + + for (int i = 0; i < mod->its_num_pages; i++) { + void *page = mod->its_page_array[i]; + execmem_free(page); + } + kfree(mod->its_page_array); +} +#endif /* CONFIG_MODULES */ + +static void *its_alloc(void) +{ + void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + + if (!page) + return NULL; + +#ifdef CONFIG_MODULES + if (its_mod) { + void *tmp = krealloc(its_mod->its_page_array, + (its_mod->its_num_pages+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + its_mod->its_page_array = tmp; + its_mod->its_page_array[its_mod->its_num_pages++] = page; + + execmem_make_temp_rw(page, PAGE_SIZE); + } +#endif /* CONFIG_MODULES */ + + return no_free_ptr(page); +} + +static void *its_allocate_thunk(int reg) +{ + int size = 3 + (reg / 8); + void *thunk; + +#ifdef CONFIG_FINEIBT + /* + * The ITS thunk contains an indirect jump and an int3 instruction so + * its size is 3 or 4 bytes depending on the register used. If CFI + * paranoid is used then 3 extra bytes are added in the ITS thunk to + * complete the fineibt_paranoid caller sequence. + */ + if (cfi_paranoid) + size += 3; +#endif + + if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) { + its_page = its_alloc(); + if (!its_page) { + pr_err("ITS page allocation failed\n"); + return NULL; + } + memset(its_page, INT3_INSN_OPCODE, PAGE_SIZE); + its_offset = 32; + } + + /* + * If the indirect branch instruction will be in the lower half + * of a cacheline, then update the offset to reach the upper half. + */ + if ((its_offset + size - 1) % 64 < 32) + its_offset = ((its_offset - 1) | 0x3F) + 33; + + thunk = its_page + its_offset; + its_offset += size; + + return its_init_thunk(thunk, reg); +} + +u8 *its_static_thunk(int reg) +{ + u8 *thunk = __x86_indirect_its_thunk_array[reg]; + +#ifdef CONFIG_FINEIBT + /* Paranoid thunk starts 2 bytes before */ + if (cfi_paranoid) + return thunk - 2; +#endif + return thunk; +} + +#endif + /* * Nomenclature for variable names to simplify and clarify this code and ease * any potential staring at it: @@ -581,7 +749,8 @@ static int emit_indirect(int op, int reg, u8 *bytes) return i; } -static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes, + void *call_dest, void *jmp_dest) { u8 op = insn->opcode.bytes[0]; int i = 0; @@ -602,7 +771,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 switch (op) { case CALL_INSN_OPCODE: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_call_thunk_array[reg], + call_dest, CALL_INSN_SIZE); i += CALL_INSN_SIZE; break; @@ -610,7 +779,7 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 case JMP32_INSN_OPCODE: clang_jcc: __text_gen_insn(bytes+i, op, addr+i, - __x86_indirect_jump_thunk_array[reg], + jmp_dest, JMP32_INSN_SIZE); i += JMP32_INSN_SIZE; break; @@ -625,6 +794,48 @@ static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 return i; } +static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + return __emit_trampoline(addr, insn, bytes, + __x86_indirect_call_thunk_array[reg], + __x86_indirect_jump_thunk_array[reg]); +} + +#ifdef CONFIG_MITIGATION_ITS +static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + u8 *thunk = __x86_indirect_its_thunk_array[reg]; + u8 *tmp = its_allocate_thunk(reg); + + if (tmp) + thunk = tmp; + + return __emit_trampoline(addr, insn, bytes, thunk, thunk); +} + +/* Check if an indirect branch is at ITS-unsafe address */ +static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) +{ + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return false; + + /* Indirect branch opcode is 2 or 3 bytes depending on reg */ + addr += 1 + reg / 8; + + /* Lower-half of the cacheline? */ + return !(addr & 0x20); +} +#else /* CONFIG_MITIGATION_ITS */ + +#ifdef CONFIG_FINEIBT +static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) +{ + return false; +} +#endif + +#endif /* CONFIG_MITIGATION_ITS */ + /* * Rewrite the compiler generated retpoline thunk calls. * @@ -699,6 +910,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) bytes[i++] = 0xe8; /* LFENCE */ } +#ifdef CONFIG_MITIGATION_ITS + /* + * Check if the address of last byte of emitted-indirect is in + * lower-half of the cacheline. Such branches need ITS mitigation. + */ + if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg)) + return emit_its_trampoline(addr, insn, reg, bytes); +#endif + ret = emit_indirect(op, reg, bytes + i); if (ret < 0) return ret; @@ -732,6 +952,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) int len, ret; u8 bytes[16]; u8 op1, op2; + u8 *dest; ret = insn_decode_kernel(&insn, addr); if (WARN_ON_ONCE(ret < 0)) @@ -748,6 +969,12 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) case CALL_INSN_OPCODE: case JMP32_INSN_OPCODE: + /* Check for cfi_paranoid + ITS */ + dest = addr + insn.length + insn.immediate.value; + if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) { + WARN_ON_ONCE(cfi_mode != CFI_FINEIBT); + continue; + } break; case 0x0f: /* escape */ @@ -775,6 +1002,21 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) #ifdef CONFIG_MITIGATION_RETHUNK +bool cpu_wants_rethunk(void) +{ + return cpu_feature_enabled(X86_FEATURE_RETHUNK); +} + +bool cpu_wants_rethunk_at(void *addr) +{ + if (!cpu_feature_enabled(X86_FEATURE_RETHUNK)) + return false; + if (x86_return_thunk != its_return_thunk) + return true; + + return !((unsigned long)addr & 0x20); +} + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -791,7 +1033,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) int i = 0; /* Patch the custom return thunks... */ - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk_at(addr)) { i = JMP32_INSN_SIZE; __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); } else { @@ -808,7 +1050,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { s32 *s; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) static_call_force_reinit(); for (s = start; s < end; s++) { @@ -1022,8 +1264,6 @@ int cfi_get_func_arity(void *func) static bool cfi_rand __ro_after_init = true; static u32 cfi_seed __ro_after_init; -static bool cfi_paranoid __ro_after_init = false; - /* * Re-hash the CFI hash with a boot-time seed while making sure the result is * not a valid ENDBR instruction. @@ -1436,6 +1676,19 @@ static int cfi_rand_callers(s32 *start, s32 *end) return 0; } +static int emit_paranoid_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + u8 *thunk = (void *)__x86_indirect_its_thunk_array[reg] - 2; + +#ifdef CONFIG_MITIGATION_ITS + u8 *tmp = its_allocate_thunk(reg); + if (tmp) + thunk = tmp; +#endif + + return __emit_trampoline(addr, insn, bytes, thunk, thunk); +} + static int cfi_rewrite_callers(s32 *start, s32 *end) { s32 *s; @@ -1477,9 +1730,14 @@ static int cfi_rewrite_callers(s32 *start, s32 *end) memcpy(bytes, fineibt_paranoid_start, fineibt_paranoid_size); memcpy(bytes + fineibt_caller_hash, &hash, 4); - ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); - if (WARN_ON_ONCE(ret != 3)) - continue; + if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + fineibt_paranoid_ind, 11)) { + emit_paranoid_trampoline(addr + fineibt_caller_size, + &insn, 11, bytes + fineibt_caller_size); + } else { + ret = emit_indirect(op, 11, bytes + fineibt_paranoid_ind); + if (WARN_ON_ONCE(ret != 3)) + continue; + } text_poke_early(addr, bytes, fineibt_paranoid_size); } @@ -1706,29 +1964,66 @@ static bool decode_fineibt_bhi(struct pt_regs *regs, unsigned long *target, u32 return false; } +static bool is_paranoid_thunk(unsigned long addr) +{ + u32 thunk; + + __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault); + return (thunk & 0x00FFFFFF) == 0xfd75ea; + +Efault: + return false; +} + /* * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] - * sequence. + * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS + * thunk. */ static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type) { unsigned long addr = regs->ip - fineibt_paranoid_ud; - u32 hash; - if (!cfi_paranoid || !is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) + if (!cfi_paranoid) return false; - __get_kernel_nofault(&hash, addr + fineibt_caller_hash, u32, Efault); - *target = regs->r11 + fineibt_preamble_size; - *type = regs->r10; + if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) { + *target = regs->r11 + fineibt_preamble_size; + *type = regs->r10; + + /* + * Since the trapping instruction is the exact, but LOCK prefixed, + * Jcc.d8 that got us here, the normal fixup will work. + */ + return true; + } /* - * Since the trapping instruction is the exact, but LOCK prefixed, - * Jcc.d8 that got us here, the normal fixup will work. + * The cfi_paranoid + ITS thunk combination results in: + * + * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d + * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d + * a: 4d 8d 5b f0 lea -0x10(%r11), %r11 + * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 + * + * Where the paranoid_thunk looks like: + * + * 1d: (bad) + * __x86_indirect_paranoid_thunk_r11: + * 1e: 75 fd jne 1d + * __x86_indirect_its_thunk_r11: + * 20: 41 ff eb jmp *%r11 + * 23: cc int3 + * */ - return true; + if (is_paranoid_thunk(regs->ip)) { + *target = regs->r11 + fineibt_preamble_size; + *type = regs->r10; + + regs->ip = *target; + return true; + } -Efault: return false; } @@ -2031,6 +2326,8 @@ static noinline void __init alt_reloc_selftest(void) void __init alternative_instructions(void) { + u64 ibt; + int3_selftest(); /* @@ -2057,6 +2354,9 @@ void __init alternative_instructions(void) */ paravirt_set_cap(); + /* Keep CET-IBT disabled until caller/callee are patched */ + ibt = ibt_save(/*disable*/ true); + __apply_fineibt(__retpoline_sites, __retpoline_sites_end, __cfi_sites, __cfi_sites_end, true); @@ -2080,6 +2380,8 @@ void __init alternative_instructions(void) */ apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + ibt_restore(ibt); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 79569f72b8ee50..4e06baab40bb3f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -472,6 +472,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) case 0x60 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; + case 0x50 ... 0x5f: + case 0x90 ... 0xaf: + case 0xc0 ... 0xcf: + setup_force_cpu_cap(X86_FEATURE_ZEN6); + break; default: goto warn; } @@ -805,6 +810,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static const struct x86_cpu_id erratum_1386_microcode[] = { X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, 0x17, 0x01), 0x2, 0x2, 0x0800126e), X86_MATCH_VFM_STEPS(VFM_MAKE(X86_VENDOR_AMD, 0x17, 0x31), 0x0, 0x0, 0x08301052), + {} }; static void fix_erratum_1386(struct cpuinfo_x86 *c) @@ -868,6 +874,16 @@ static void init_amd_zen1(struct cpuinfo_x86 *c) pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); + + /* + * Turn off the Instructions Retired free counter on machines that are + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (c->x86_model < 0x30) { + msr_clear_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + clear_cpu_cap(c, X86_FEATURE_IRPERF); + } } static bool cpu_has_zenbleed_microcode(void) @@ -1051,13 +1067,8 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_feature_enabled(X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - /* - * Turn on the Instructions Retired free counter on machines not - * susceptible to erratum #1054 "Instructions Retired Performance - * Counter May Be Inaccurate". - */ - if (cpu_has(c, X86_FEATURE_IRPERF) && - (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f)) + /* Enable the Instructions Retired free counter */ + if (cpu_has(c, X86_FEATURE_IRPERF)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4386aa6c69e12c..8596ce85026c0d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -49,6 +49,7 @@ static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); static void __init srso_select_mitigation(void); static void __init gds_select_mitigation(void); +static void __init its_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -59,7 +60,6 @@ DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; -EXPORT_SYMBOL_GPL(x86_pred_cmd); static u64 __ro_after_init x86_arch_cap_msr; @@ -67,6 +67,14 @@ static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; +static void __init set_return_thunk(void *thunk) +{ + if (x86_return_thunk != __x86_return_thunk) + pr_warn("x86/bugs: return thunk changed\n"); + + x86_return_thunk = thunk; +} + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { @@ -179,6 +187,7 @@ void __init cpu_select_mitigations(void) */ srso_select_mitigation(); gds_select_mitigation(); + its_select_mitigation(); } /* @@ -1119,7 +1128,7 @@ static void __init retbleed_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); - x86_return_thunk = retbleed_return_thunk; + set_return_thunk(retbleed_return_thunk); if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -1142,7 +1151,7 @@ static void __init retbleed_select_mitigation(void) setup_clear_cpu_cap(X86_FEATURE_RETHUNK); /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ @@ -1154,7 +1163,7 @@ static void __init retbleed_select_mitigation(void) setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); - x86_return_thunk = call_depth_return_thunk; + set_return_thunk(call_depth_return_thunk); break; default: @@ -1188,6 +1197,145 @@ static void __init retbleed_select_mitigation(void) pr_info("%s\n", retbleed_strings[retbleed_mitigation]); } +#undef pr_fmt +#define pr_fmt(fmt) "ITS: " fmt + +enum its_mitigation_cmd { + ITS_CMD_OFF, + ITS_CMD_ON, + ITS_CMD_VMEXIT, + ITS_CMD_RSB_STUFF, +}; + +enum its_mitigation { + ITS_MITIGATION_OFF, + ITS_MITIGATION_VMEXIT_ONLY, + ITS_MITIGATION_ALIGNED_THUNKS, + ITS_MITIGATION_RETPOLINE_STUFF, +}; + +static const char * const its_strings[] = { + [ITS_MITIGATION_OFF] = "Vulnerable", + [ITS_MITIGATION_VMEXIT_ONLY] = "Mitigation: Vulnerable, KVM: Not affected", + [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks", + [ITS_MITIGATION_RETPOLINE_STUFF] = "Mitigation: Retpolines, Stuffing RSB", +}; + +static enum its_mitigation its_mitigation __ro_after_init = ITS_MITIGATION_ALIGNED_THUNKS; + +static enum its_mitigation_cmd its_cmd __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_ITS) ? ITS_CMD_ON : ITS_CMD_OFF; + +static int __init its_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!IS_ENABLED(CONFIG_MITIGATION_ITS)) { + pr_err("Mitigation disabled at compile time, ignoring option (%s)", str); + return 0; + } + + if (!strcmp(str, "off")) { + its_cmd = ITS_CMD_OFF; + } else if (!strcmp(str, "on")) { + its_cmd = ITS_CMD_ON; + } else if (!strcmp(str, "force")) { + its_cmd = ITS_CMD_ON; + setup_force_cpu_bug(X86_BUG_ITS); + } else if (!strcmp(str, "vmexit")) { + its_cmd = ITS_CMD_VMEXIT; + } else if (!strcmp(str, "stuff")) { + its_cmd = ITS_CMD_RSB_STUFF; + } else { + pr_err("Ignoring unknown indirect_target_selection option (%s).", str); + } + + return 0; +} +early_param("indirect_target_selection", its_parse_cmdline); + +static void __init its_select_mitigation(void) +{ + enum its_mitigation_cmd cmd = its_cmd; + + if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) { + its_mitigation = ITS_MITIGATION_OFF; + return; + } + + /* Retpoline+CDT mitigates ITS, bail out */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + boot_cpu_has(X86_FEATURE_CALL_DEPTH)) { + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + goto out; + } + + /* Exit early to avoid irrelevant warnings */ + if (cmd == ITS_CMD_OFF) { + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (spectre_v2_enabled == SPECTRE_V2_NONE) { + pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (!IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || + !IS_ENABLED(CONFIG_MITIGATION_RETHUNK)) { + pr_err("WARNING: ITS mitigation depends on retpoline and rethunk support\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (IS_ENABLED(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)) { + pr_err("WARNING: ITS mitigation is not compatible with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + if (boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + pr_err("WARNING: ITS mitigation is not compatible with lfence mitigation\n"); + its_mitigation = ITS_MITIGATION_OFF; + goto out; + } + + if (cmd == ITS_CMD_RSB_STUFF && + (!boot_cpu_has(X86_FEATURE_RETPOLINE) || !IS_ENABLED(CONFIG_MITIGATION_CALL_DEPTH_TRACKING))) { + pr_err("RSB stuff mitigation not supported, using default\n"); + cmd = ITS_CMD_ON; + } + + switch (cmd) { + case ITS_CMD_OFF: + its_mitigation = ITS_MITIGATION_OFF; + break; + case ITS_CMD_VMEXIT: + if (boot_cpu_has_bug(X86_BUG_ITS_NATIVE_ONLY)) { + its_mitigation = ITS_MITIGATION_VMEXIT_ONLY; + goto out; + } + fallthrough; + case ITS_CMD_ON: + its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS; + if (!boot_cpu_has(X86_FEATURE_RETPOLINE)) + setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS); + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + set_return_thunk(its_return_thunk); + break; + case ITS_CMD_RSB_STUFF: + its_mitigation = ITS_MITIGATION_RETPOLINE_STUFF; + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); + set_return_thunk(call_depth_return_thunk); + if (retbleed_mitigation == RETBLEED_MITIGATION_NONE) { + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; + pr_info("Retbleed mitigation updated to stuffing\n"); + } + break; + } +out: + pr_info("%s\n", its_strings[its_mitigation]); +} + #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt @@ -1592,51 +1740,54 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) rrsba_disabled = true; } -static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode) { /* - * Similar to context switches, there are two types of RSB attacks - * after VM exit: + * WARNING! There are many subtleties to consider when changing *any* + * code related to RSB-related mitigations. Before doing so, carefully + * read the following document, and update if necessary: * - * 1) RSB underflow + * Documentation/admin-guide/hw-vuln/rsb.rst * - * 2) Poisoned RSB entry + * In an overly simplified nutshell: * - * When retpoline is enabled, both are mitigated by filling/clearing - * the RSB. + * - User->user RSB attacks are conditionally mitigated during + * context switches by cond_mitigation -> write_ibpb(). * - * When IBRS is enabled, while #1 would be mitigated by the IBRS branch - * prediction isolation protections, RSB still needs to be cleared - * because of #2. Note that SMEP provides no protection here, unlike - * user-space-poisoned RSB entries. + * - User->kernel and guest->host attacks are mitigated by eIBRS or + * RSB filling. * - * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB - * bug is present then a LITE version of RSB protection is required, - * just a single call needs to retire before a RET is executed. + * Though, depending on config, note that other alternative + * mitigations may end up getting used instead, e.g., IBPB on + * entry/vmexit, call depth tracking, or return thunks. */ + switch (mode) { case SPECTRE_V2_NONE: - return; + break; - case SPECTRE_V2_EIBRS_LFENCE: case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS_RETPOLINE: if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { - setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); } - return; + break; - case SPECTRE_V2_EIBRS_RETPOLINE: case SPECTRE_V2_RETPOLINE: case SPECTRE_V2_LFENCE: case SPECTRE_V2_IBRS: + pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n"); + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); - pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); - return; - } + break; - pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); - dump_stack(); + default: + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n"); + dump_stack(); + break; + } } /* @@ -1695,11 +1846,11 @@ static void __init bhi_select_mitigation(void) return; } - /* Mitigate in hardware if supported */ - if (spec_ctrl_bhi_dis()) + if (!IS_ENABLED(CONFIG_X86_64)) return; - if (!IS_ENABLED(CONFIG_X86_64)) + /* Mitigate in hardware if supported */ + if (spec_ctrl_bhi_dis()) return; if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) { @@ -1830,48 +1981,7 @@ static void __init spectre_v2_select_mitigation(void) spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); - /* - * If Spectre v2 protection has been enabled, fill the RSB during a - * context switch. In general there are two types of RSB attacks - * across context switches, for which the CALLs/RETs may be unbalanced. - * - * 1) RSB underflow - * - * Some Intel parts have "bottomless RSB". When the RSB is empty, - * speculated return targets may come from the branch predictor, - * which could have a user-poisoned BTB or BHB entry. - * - * AMD has it even worse: *all* returns are speculated from the BTB, - * regardless of the state of the RSB. - * - * When IBRS or eIBRS is enabled, the "user -> kernel" attack - * scenario is mitigated by the IBRS branch prediction isolation - * properties, so the RSB buffer filling wouldn't be necessary to - * protect against this type of attack. - * - * The "user -> user" attack scenario is mitigated by RSB filling. - * - * 2) Poisoned RSB entry - * - * If the 'next' in-kernel return stack is shorter than 'prev', - * 'next' could be tricked into speculating with a user-poisoned RSB - * entry. - * - * The "user -> kernel" attack scenario is mitigated by SMEP and - * eIBRS. - * - * The "user -> user" scenario, also known as SpectreBHB, requires - * RSB clearing. - * - * So to mitigate all cases, unconditionally fill RSB on context - * switches. - * - * FIXME: Is this pointless for retbleed-affected AMD? - */ - setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); - pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - - spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + spectre_v2_select_rsb_mitigation(mode); /* * Retpoline protects the kernel, but doesn't protect firmware. IBRS @@ -2646,10 +2756,10 @@ static void __init srso_select_mitigation(void) if (boot_cpu_data.x86 == 0x19) { setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); - x86_return_thunk = srso_alias_return_thunk; + set_return_thunk(srso_alias_return_thunk); } else { setup_force_cpu_cap(X86_FEATURE_SRSO); - x86_return_thunk = srso_return_thunk; + set_return_thunk(srso_return_thunk); } if (has_microcode) srso_mitigation = SRSO_MITIGATION_SAFE_RET; @@ -2676,7 +2786,7 @@ static void __init srso_select_mitigation(void) setup_clear_cpu_cap(X86_FEATURE_RETHUNK); /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ @@ -2701,7 +2811,7 @@ static void __init srso_select_mitigation(void) srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; /* - * There is no need for RSB filling: entry_ibpb() ensures + * There is no need for RSB filling: write_ibpb() ensures * all predictions, including the RSB, are invalidated, * regardless of IBPB implementation. */ @@ -2839,6 +2949,11 @@ static ssize_t rfds_show_state(char *buf) return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]); } +static ssize_t its_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && @@ -3021,6 +3136,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_RFDS: return rfds_show_state(buf); + case X86_BUG_ITS: + return its_show_state(buf); + default: break; } @@ -3100,6 +3218,11 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib { return cpu_show_common(dev, attr, buf, X86_BUG_RFDS); } + +ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_ITS); +} #endif void __warn_thunk(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 12126adbc3a9a7..5de4a879232a6c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1005,17 +1005,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* AMD-defined flags: level 0x80000001 */ + /* + * Check if extended CPUID leaves are implemented: Max extended + * CPUID leaf must be in the 0x80000001-0x8000ffff range. + */ eax = cpuid_eax(0x80000000); - c->extended_cpuid_level = eax; + c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0; - if ((eax & 0xffff0000) == 0x80000000) { - if (eax >= 0x80000001) { - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (c->extended_cpuid_level >= 0x80000001) { + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_8000_0001_ECX] = ecx; - c->x86_capability[CPUID_8000_0001_EDX] = edx; - } + c->x86_capability[CPUID_8000_0001_ECX] = ecx; + c->x86_capability[CPUID_8000_0001_EDX] = edx; } if (c->extended_cpuid_level >= 0x80000007) { @@ -1227,6 +1228,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define GDS BIT(6) /* CPU is affected by Register File Data Sampling */ #define RFDS BIT(7) +/* CPU is affected by Indirect Target Selection */ +#define ITS BIT(8) +/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */ +#define ITS_NATIVE_ONLY BIT(9) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_IVYBRIDGE, X86_STEP_MAX, SRBDS), @@ -1238,22 +1243,25 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPS(INTEL_BROADWELL_G, X86_STEP_MAX, SRBDS), VULNBL_INTEL_STEPS(INTEL_BROADWELL_X, X86_STEP_MAX, MMIO), VULNBL_INTEL_STEPS(INTEL_BROADWELL, X86_STEP_MAX, SRBDS), - VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, 0x5, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_SKYLAKE_X, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS), VULNBL_INTEL_STEPS(INTEL_SKYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPS(INTEL_SKYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), - VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, 0xb, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE_L, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, 0xc, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPS(INTEL_KABYLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | SRBDS | ITS), VULNBL_INTEL_STEPS(INTEL_CANNONLAKE_L, X86_STEP_MAX, RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS), - VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED), - VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS), - VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_D, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_ICELAKE_X, X86_STEP_MAX, MMIO | GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, 0x0, MMIO | RETBLEED | ITS), + VULNBL_INTEL_STEPS(INTEL_COMETLAKE_L, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE_L, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), + VULNBL_INTEL_STEPS(INTEL_TIGERLAKE, X86_STEP_MAX, GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_STEPS(INTEL_LAKEFIELD, X86_STEP_MAX, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPS(INTEL_ROCKETLAKE, X86_STEP_MAX, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY), VULNBL_INTEL_TYPE(INTEL_ALDERLAKE, ATOM, RFDS), VULNBL_INTEL_STEPS(INTEL_ALDERLAKE_L, X86_STEP_MAX, RFDS), VULNBL_INTEL_TYPE(INTEL_RAPTORLAKE, ATOM, RFDS), @@ -1318,6 +1326,32 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) return cpu_matches(cpu_vuln_blacklist, RFDS); } +static bool __init vulnerable_to_its(u64 x86_arch_cap_msr) +{ + /* The "immunity" bit trumps everything else: */ + if (x86_arch_cap_msr & ARCH_CAP_ITS_NO) + return false; + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + /* None of the affected CPUs have BHI_CTRL */ + if (boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + /* + * If a VMM did not expose ITS_NO, assume that a guest could + * be running on a vulnerable hardware or may migrate to such + * hardware. + */ + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; + + if (cpu_matches(cpu_vuln_blacklist, ITS)) + return true; + + return false; +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); @@ -1439,9 +1473,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); - /* When virtualized, eIBRS could be hidden, assume vulnerable */ - if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && - !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + /* + * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with + * BHI_NO still need to use the BHI mitigation to prevent Intra-mode + * attacks. When virtualized, eIBRS could be hidden, assume vulnerable. + */ + if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) && (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || boot_cpu_has(X86_FEATURE_HYPERVISOR))) setup_force_cpu_bug(X86_BUG_BHI); @@ -1449,6 +1486,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET)) setup_force_cpu_bug(X86_BUG_IBPB_NO_RET); + if (vulnerable_to_its(x86_arch_cap_msr)) { + setup_force_cpu_bug(X86_BUG_ITS); + if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY)) + setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY); + } + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b61028cf5c8a3b..96cb992d50ef55 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -199,6 +199,12 @@ static bool need_sha_check(u32 cur_rev) case 0xa70c0: return cur_rev <= 0xa70C009; break; case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; + case 0xb0021: return cur_rev <= 0xb002146; break; + case 0xb1010: return cur_rev <= 0xb101046; break; + case 0xb2040: return cur_rev <= 0xb204031; break; + case 0xb4040: return cur_rev <= 0xb404031; break; + case 0xb6000: return cur_rev <= 0xb600031; break; + case 0xb7000: return cur_rev <= 0xb700031; break; default: break; } @@ -214,8 +220,7 @@ static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsi struct sha256_state s; int i; - if (x86_family(bsp_cpuid_1_eax) < 0x17 || - x86_family(bsp_cpuid_1_eax) > 0x19) + if (x86_family(bsp_cpuid_1_eax) < 0x17) return true; if (!need_sha_check(cur_rev)) @@ -1093,15 +1098,17 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz static int __init save_microcode_in_initrd(void) { - unsigned int cpuid_1_eax = native_cpuid_eax(1); struct cpuinfo_x86 *c = &boot_cpu_data; struct cont_desc desc = { 0 }; + unsigned int cpuid_1_eax; enum ucode_state ret; struct cpio_data cp; - if (dis_ucode_ldr || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) + if (microcode_loader_disabled() || c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) return 0; + cpuid_1_eax = native_cpuid_eax(1); + if (!find_blobs_in_containers(&cp)) return -EINVAL; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b3658d11e7b692..e8021d3e58824a 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -41,8 +41,8 @@ #include "internal.h" -static struct microcode_ops *microcode_ops; -bool dis_ucode_ldr = true; +static struct microcode_ops *microcode_ops; +static bool dis_ucode_ldr = false; bool force_minrev = IS_ENABLED(CONFIG_MICROCODE_LATE_FORCE_MINREV); module_param(force_minrev, bool, S_IRUSR | S_IWUSR); @@ -84,6 +84,9 @@ static bool amd_check_current_patch_level(void) u32 lvl, dummy, i; u32 *levels; + if (x86_cpuid_vendor() != X86_VENDOR_AMD) + return false; + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); levels = final_levels; @@ -95,27 +98,29 @@ static bool amd_check_current_patch_level(void) return false; } -static bool __init check_loader_disabled_bsp(void) +bool __init microcode_loader_disabled(void) { - static const char *__dis_opt_str = "dis_ucode_ldr"; - const char *cmdline = boot_command_line; - const char *option = __dis_opt_str; + if (dis_ucode_ldr) + return true; /* - * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not - * completely accurate as xen pv guests don't see that CPUID bit set but - * that's good enough as they don't land on the BSP path anyway. + * Disable when: + * + * 1) The CPU does not support CPUID. + * + * 2) Bit 31 in CPUID[1]:ECX is clear + * The bit is reserved for hypervisor use. This is still not + * completely accurate as XEN PV guests don't see that CPUID bit + * set, but that's good enough as they don't land on the BSP + * path anyway. + * + * 3) Certain AMD patch levels are not allowed to be + * overwritten. */ - if (native_cpuid_ecx(1) & BIT(31)) - return true; - - if (x86_cpuid_vendor() == X86_VENDOR_AMD) { - if (amd_check_current_patch_level()) - return true; - } - - if (cmdline_find_option_bool(cmdline, option) <= 0) - dis_ucode_ldr = false; + if (!have_cpuid_p() || + native_cpuid_ecx(1) & BIT(31) || + amd_check_current_patch_level()) + dis_ucode_ldr = true; return dis_ucode_ldr; } @@ -125,7 +130,10 @@ void __init load_ucode_bsp(void) unsigned int cpuid_1_eax; bool intel = true; - if (!have_cpuid_p()) + if (cmdline_find_option_bool(boot_command_line, "dis_ucode_ldr") > 0) + dis_ucode_ldr = true; + + if (microcode_loader_disabled()) return; cpuid_1_eax = native_cpuid_eax(1); @@ -146,9 +154,6 @@ void __init load_ucode_bsp(void) return; } - if (check_loader_disabled_bsp()) - return; - if (intel) load_ucode_intel_bsp(&early_data); else @@ -159,6 +164,11 @@ void load_ucode_ap(void) { unsigned int cpuid_1_eax; + /* + * Can't use microcode_loader_disabled() here - .init section + * hell. It doesn't have to either - the BSP variant must've + * parsed cmdline already anyway. + */ if (dis_ucode_ldr) return; @@ -686,6 +696,8 @@ static int load_late_locked(void) return load_late_stop_cpus(true); case UCODE_NFOUND: return -ENOENT; + case UCODE_OK: + return 0; default: return -EBADFD; } @@ -810,7 +822,7 @@ static int __init microcode_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; int error; - if (dis_ucode_ldr) + if (microcode_loader_disabled()) return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 819199bc0119b2..2a397da43923ba 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -389,7 +389,7 @@ static int __init save_builtin_microcode(void) if (xchg(&ucode_patch_va, NULL) != UCODE_BSP_LOADED) return 0; - if (dis_ucode_ldr || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + if (microcode_loader_disabled() || boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return 0; uci.mc = get_microcode_blob(&uci, true); diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index 5df621752fefac..50a9702ae4e2b5 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -94,7 +94,6 @@ static inline unsigned int x86_cpuid_family(void) return x86_family(eax); } -extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e2c6b471d2302a..8c18327eb10bbb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -593,7 +593,7 @@ static void get_fixed_ranges(mtrr_type *frs) void mtrr_save_fixed_ranges(void *info) { - if (boot_cpu_has(X86_FEATURE_MTRR)) + if (mtrr_state.have_fixed) get_fixed_ranges(mtrr_state.fixed_ranges); } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 93ec829015f134..cc4a54145c83d8 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3553,6 +3553,22 @@ static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp) free_rmid(rgrp->closid, rgrp->mon.rmid); } +/* + * We allow creating mon groups only with in a directory called "mon_groups" + * which is present in every ctrl_mon group. Check if this is a valid + * "mon_groups" directory. + * + * 1. The directory should be named "mon_groups". + * 2. The mon group itself should "not" be named "mon_groups". + * This makes sure "mon_groups" directory always has a ctrl_mon group + * as parent. + */ +static bool is_mon_groups(struct kernfs_node *kn, const char *name) +{ + return (!strcmp(rdt_kn_name(kn), "mon_groups") && + strcmp(name, "mon_groups")); +} + static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, const char *name, umode_t mode, enum rdt_group_type rtype, struct rdtgroup **r) @@ -3568,6 +3584,15 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, goto out_unlock; } + /* + * Check that the parent directory for a monitor group is a "mon_groups" + * directory. + */ + if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) { + ret = -EPERM; + goto out_unlock; + } + if (rtype == RDTMON_GROUP && (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { @@ -3751,22 +3776,6 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, return ret; } -/* - * We allow creating mon groups only with in a directory called "mon_groups" - * which is present in every ctrl_mon group. Check if this is a valid - * "mon_groups" directory. - * - * 1. The directory should be named "mon_groups". - * 2. The mon group itself should "not" be named "mon_groups". - * This makes sure "mon_groups" directory always has a ctrl_mon group - * as parent. - */ -static bool is_mon_groups(struct kernfs_node *kn, const char *name) -{ - return (!strcmp(rdt_kn_name(kn), "mon_groups") && - strcmp(name, "mon_groups")); -} - static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) { @@ -3782,11 +3791,8 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn) return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode); - /* - * If RDT monitoring is supported and the parent directory is a valid - * "mon_groups" directory, add a monitoring subdirectory. - */ - if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name)) + /* Else, attempt to add a monitoring subdirectory. */ + if (resctrl_arch_mon_capable()) return rdtgroup_mkdir_mon(parent_kn, name, mode); return -EPERM; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 57120f0749cc3c..9920122018a0b3 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) void __init e820__register_nosave_regions(unsigned long limit_pfn) { int i; - unsigned long pfn = 0; + u64 last_addr = 0; for (i = 0; i < e820_table->nr_entries; i++) { struct e820_entry *entry = &e820_table->entries[i]; - if (pfn < PFN_UP(entry->addr)) - register_nosave_region(pfn, PFN_UP(entry->addr)); - - pfn = PFN_DOWN(entry->addr + entry->size); - if (entry->type != E820_TYPE_RAM) - register_nosave_region(PFN_UP(entry->addr), pfn); + continue; - if (pfn >= limit_pfn) - break; + if (last_addr < entry->addr) + register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr)); + + last_addr = entry->addr + entry->size; } + + register_nosave_region(PFN_DOWN(last_addr), limit_pfn); } #ifdef CONFIG_ACPI @@ -1300,6 +1299,14 @@ void __init e820__memblock_setup(void) memblock_add(entry->addr, entry->size); } + /* + * 32-bit systems are limited to 4BG of memory even with HIGHMEM and + * to even less without it. + * Discard memory after max_pfn - the actual limit detected at runtime. + */ + if (IS_ENABLED(CONFIG_X86_32)) + memblock_remove(PFN_PHYS(max_pfn), -1); + /* Throw away partial pages: */ memblock_trim_memory(PAGE_SIZE); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 611f27e3890c28..3aad78bfcb267f 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -389,10 +389,10 @@ static int __init setup_early_printk(char *buf) keep = (strstr(buf, "keep") != NULL); while (*buf != '\0') { - if (!strncmp(buf, "mmio", 4)) { - early_mmio_serial_init(buf + 4); + if (!strncmp(buf, "mmio32", 6)) { + buf += 6; + early_mmio_serial_init(buf); early_console_register(&early_serial_console, keep); - buf += 4; } if (!strncmp(buf, "serial", 6)) { buf += 6; @@ -407,9 +407,9 @@ static int __init setup_early_printk(char *buf) } #ifdef CONFIG_PCI if (!strncmp(buf, "pciserial", 9)) { - early_pci_serial_init(buf + 9); + buf += 9; /* Keep from match the above "pciserial" */ + early_pci_serial_init(buf); early_console_register(&early_serial_console, keep); - buf += 9; /* Keep from match the above "serial" */ } #endif if (!strncmp(buf, "vga", 3) && diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cace6e8d7cc77a..5eb1514af5593e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -354,7 +354,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(ip)) __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index de001b2146abf3..375f2d7f1762d4 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -145,10 +145,6 @@ void __init __no_stack_protector mk_early_pgtbl_32(void) *ptr = (unsigned long)ptep + PAGE_OFFSET; #ifdef CONFIG_MICROCODE_INITRD32 - /* Running on a hypervisor? */ - if (native_cpuid_ecx(1) & BIT(31)) - return; - params = (struct boot_params *)__pa_nodebug(&boot_params); if (!params->hdr.ramdisk_size || !params->hdr.ramdisk_image) return; diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 80e262bb627fe1..cb9852ad609893 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -46,7 +46,8 @@ bool __init pit_timer_init(void) * VMMs otherwise steal CPU time just to pointlessly waggle * the (masked) IRQ. */ - clockevent_i8253_disable(); + scoped_guard(irq) + clockevent_i8253_disable(); return false; } clockevent_i8253_init(true); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 6290dd120f5e45..ff40f09ad9116c 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } -static void task_update_io_bitmap(struct task_struct *tsk) +static void task_update_io_bitmap(void) { + struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; if (t->iopl_emul == 3 || t->io_bitmap) { @@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk) struct io_bitmap *iobm = tsk->thread.io_bitmap; tsk->thread.io_bitmap = NULL; - task_update_io_bitmap(tsk); + /* + * Don't touch the TSS when invoked on a failed fork(). TSS + * reflects the state of @current and not the state of @tsk. + */ + if (tsk == current) + task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) } t->iopl_emul = level; - task_update_io_bitmap(current); - + task_update_io_bitmap(); return 0; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 81f9b78e0f7baa..6cd5d2d6c58af6 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -419,7 +419,7 @@ static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs) bool handled = false; for (i = 0; i < 4; i++) - pir_copy[i] = pir[i]; + pir_copy[i] = READ_ONCE(pir[i]); for (i = 0; i < 4; i++) { if (!pir_copy[i]) diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 80265162aefff9..1f325304c4a842 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -42,7 +42,7 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { - free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)image->arch.pgd, pgd_allocation_order()); image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); @@ -59,7 +59,7 @@ static void machine_kexec_free_page_tables(struct kimage *image) static int machine_kexec_alloc_page_tables(struct kimage *image) { image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PGD_ALLOCATION_ORDER); + pgd_allocation_order()); #ifdef CONFIG_X86_PAE image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index a7998f35170175..ff07558b7ebc65 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -266,6 +266,8 @@ int module_finalize(const Elf_Ehdr *hdr, ibt_endbr = s; } + its_init_mod(me); + if (retpolines || cfi) { void *rseg = NULL, *cseg = NULL; unsigned int rsize = 0, csize = 0; @@ -286,6 +288,9 @@ int module_finalize(const Elf_Ehdr *hdr, void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); } + + its_fini_mod(me); + if (returns) { void *rseg = (void *)returns->sh_addr; apply_returns(rseg, rseg + returns->sh_size); @@ -326,4 +331,5 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); + its_free_mod(mod); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 962c3ce39323e7..4940fcd409251c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -181,6 +181,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; + clear_tsk_thread_flag(p, TIF_IO_BITMAP); p->thread.iopl_warn = 0; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -469,6 +470,11 @@ void native_tss_update_io_bitmap(void) } else { struct io_bitmap *iobm = t->io_bitmap; + if (WARN_ON_ONCE(!iobm)) { + clear_thread_flag(TIF_IO_BITMAP); + native_tss_invalidate_io_bitmap(); + } + /* * Only copy bitmap data when the sequence number differs. The * update time is accounted to the incoming task. @@ -907,13 +913,10 @@ static __init bool prefer_mwait_c1_over_halt(void) static __cpuidle void mwait_idle(void) { if (!current_set_polling_and_test()) { - if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { - mb(); /* quirk */ - clflush((void *)¤t_thread_info()->flags); - mb(); /* quirk */ - } + const void *addr = ¤t_thread_info()->flags; - __monitor((void *)¤t_thread_info()->flags, 0, 0); + alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr)); + __monitor(addr, 0, 0); if (!need_resched()) { __sti_mwait(0, 0); raw_local_irq_disable(); diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 98123ff10506c6..42bbc42bd3503c 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn) struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; + prevent_single_step_upon_eretu(regs); + if (!access_ok(frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn) struct rt_sigframe_ia32 __user *frame; sigset_t set; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ee9453891901b7..d483b585c6c604 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d6cf1e23c2a326..96355ab9aed953 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1238,10 +1238,6 @@ void play_dead_common(void) local_irq_disable(); } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1287,6 +1283,50 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ +static inline void mwait_play_dead_cpuid_hint(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return; + if (!this_cpu_has(X86_FEATURE_MWAIT)) + return; + if (!this_cpu_has(X86_FEATURE_CLFLUSH)) + return; + + eax = CPUID_LEAF_MWAIT; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + /* + * eax will be 0 if EDX enumeration is not valid. + * Initialized below to cstate, sub_cstate value when EDX is valid. + */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { + eax = 0; + } else { + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + } + + mwait_play_dead(eax); +} + /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1337,9 +1377,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - /* Below returns only on error. */ - cpuidle_play_dead(); - hlt_play_dead(); + mwait_play_dead_cpuid_hint(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index a59c72e7764522..c3d7ff44b29adc 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -81,7 +81,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, break; case RET: - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk_at(insn)) code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; @@ -90,7 +90,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case JCC: if (!func) { func = __static_call_return; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) + if (cpu_wants_rethunk()) func = x86_return_thunk; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ccdc45e5b75961..cda5f8362e9da5 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -466,10 +466,18 @@ SECTIONS } /* - * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + * COMPILE_TEST kernels can be large - CONFIG_KASAN, for example, can cause + * this. Let's assume that nobody will be running a COMPILE_TEST kernel and + * let's assert that fuller build coverage is more valuable than being able to + * run a COMPILE_TEST kernel. + */ +#ifndef CONFIG_COMPILE_TEST +/* + * The ASSERT() sync to . is intentional, for binutils 2.14 compatibility: */ . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE"); +#endif /* needed for Clang - see arch/x86/entry/entry.S */ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); @@ -497,6 +505,16 @@ PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); "SRSO function pair won't alias"); #endif +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline"); +. = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart"); +. = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array"); +#endif + +#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) +. = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline"); +#endif + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5e4d4934c0d3c2..571c906ffcbfe2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1427,8 +1427,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } break; case 0xa: { /* Architectural Performance Monitoring */ - union cpuid10_eax eax; - union cpuid10_edx edx; + union cpuid10_eax eax = { }; + union cpuid10_edx edx = { }; if (!enable_pmu || !static_cpu_has(X86_FEATURE_ARCH_PERFMON)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; @@ -1444,8 +1444,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) if (kvm_pmu_cap.version) edx.split.anythread_deprecated = 1; - edx.split.reserved1 = 0; - edx.split.reserved2 = 0; entry->eax = eax.full; entry->ebx = kvm_pmu_cap.events_mask; @@ -1763,7 +1761,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; /* AMD Extended Performance Monitoring and Debug */ case 0x80000022: { - union cpuid_0x80000022_ebx ebx; + union cpuid_0x80000022_ebx ebx = { }; entry->ecx = entry->edx = 0; if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 050a0e229a4d17..f2b36d32ef40e5 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -104,6 +104,9 @@ void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { + if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) + kvm_mmu_free_obsolete_roots(vcpu); + /* * Checking root.hpa is sufficient even when KVM has mirror root. * We can have either: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 63bb77ee1bb16b..8d1b632e33d288 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5974,6 +5974,7 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu) __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu); __kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu); } +EXPORT_SYMBOL_GPL(kvm_mmu_free_obsolete_roots); static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, int *bytes) @@ -7669,9 +7670,30 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm) } #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, + int level) +{ + return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG; +} + +static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn, + int level) +{ + lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG; +} + +static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn, + int level) +{ + lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; +} + bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range) { + struct kvm_memory_slot *slot = range->slot; + int level; + /* * Zap SPTEs even if the slot can't be mapped PRIVATE. KVM x86 only * supports KVM_MEMORY_ATTRIBUTE_PRIVATE, and so it *seems* like KVM @@ -7686,6 +7708,38 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm))) return false; + if (WARN_ON_ONCE(range->end <= range->start)) + return false; + + /* + * If the head and tail pages of the range currently allow a hugepage, + * i.e. reside fully in the slot and don't have mixed attributes, then + * add each corresponding hugepage range to the ongoing invalidation, + * e.g. to prevent KVM from creating a hugepage in response to a fault + * for a gfn whose attributes aren't changing. Note, only the range + * of gfns whose attributes are being modified needs to be explicitly + * unmapped, as that will unmap any existing hugepages. + */ + for (level = PG_LEVEL_2M; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) { + gfn_t start = gfn_round_for_level(range->start, level); + gfn_t end = gfn_round_for_level(range->end - 1, level); + gfn_t nr_pages = KVM_PAGES_PER_HPAGE(level); + + if ((start != range->start || start + nr_pages > range->end) && + start >= slot->base_gfn && + start + nr_pages <= slot->base_gfn + slot->npages && + !hugepage_test_mixed(slot, start, level)) + kvm_mmu_invalidate_range_add(kvm, start, start + nr_pages); + + if (end == start) + continue; + + if ((end + nr_pages) > range->end && + (end + nr_pages) <= (slot->base_gfn + slot->npages) && + !hugepage_test_mixed(slot, end, level)) + kvm_mmu_invalidate_range_add(kvm, end, end + nr_pages); + } + /* Unmap the old attribute page. */ if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE) range->attr_filter = KVM_FILTER_SHARED; @@ -7695,23 +7749,7 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, return kvm_unmap_gfn_range(kvm, range); } -static bool hugepage_test_mixed(struct kvm_memory_slot *slot, gfn_t gfn, - int level) -{ - return lpage_info_slot(gfn, slot, level)->disallow_lpage & KVM_LPAGE_MIXED_FLAG; -} - -static void hugepage_clear_mixed(struct kvm_memory_slot *slot, gfn_t gfn, - int level) -{ - lpage_info_slot(gfn, slot, level)->disallow_lpage &= ~KVM_LPAGE_MIXED_FLAG; -} -static void hugepage_set_mixed(struct kvm_memory_slot *slot, gfn_t gfn, - int level) -{ - lpage_info_slot(gfn, slot, level)->disallow_lpage |= KVM_LPAGE_MIXED_FLAG; -} static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long attrs) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7cc0564f5f97e5..21a3b816624239 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -40,7 +40,9 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) kvm_tdp_mmu_invalidate_roots(kvm, KVM_VALID_ROOTS); kvm_tdp_mmu_zap_invalidated_roots(kvm, false); - WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); +#ifdef CONFIG_KVM_PROVE_MMU + KVM_MMU_WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); +#endif WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -325,13 +327,17 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, +1); +#ifdef CONFIG_KVM_PROVE_MMU atomic64_inc(&kvm->arch.tdp_mmu_pages); +#endif } static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, -1); +#ifdef CONFIG_KVM_PROVE_MMU atomic64_dec(&kvm->arch.tdp_mmu_pages); +#endif } /** diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c index 699e551ec93bab..9864c057187d8a 100644 --- a/arch/x86/kvm/smm.c +++ b/arch/x86/kvm/smm.c @@ -131,6 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) kvm_mmu_reset_context(vcpu); } +EXPORT_SYMBOL_GPL(kvm_smm_changed); void process_smi(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 65fd245a9953ce..7338879d1c0c48 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -796,12 +796,15 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) struct amd_svm_iommu_ir *ir; u64 entry; + if (WARN_ON_ONCE(!pi->ir_data)) + return -EINVAL; + /** * In some cases, the existing irte is updated and re-set, * so we need to check here if it's already been * added * to the ir_list. */ - if (pi->ir_data && (pi->prev_ga_tag != 0)) { + if (pi->prev_ga_tag) { struct kvm *kvm = svm->vcpu.kvm; u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); @@ -820,7 +823,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -896,10 +899,10 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) return 0; pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", @@ -933,6 +936,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -951,33 +956,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -993,6 +971,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0bc708ee278877..a7a7dc5073363b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3173,9 +3173,14 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) kvfree(svm->sev_es.ghcb_sa); } +static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) +{ + return (((u64)control->exit_code_hi) << 32) | control->exit_code; +} + static void dump_ghcb(struct vcpu_svm *svm) { - struct ghcb *ghcb = svm->sev_es.ghcb; + struct vmcb_control_area *control = &svm->vmcb->control; unsigned int nbits; /* Re-use the dump_invalid_vmcb module parameter */ @@ -3184,18 +3189,24 @@ static void dump_ghcb(struct vcpu_svm *svm) return; } - nbits = sizeof(ghcb->save.valid_bitmap) * 8; + nbits = sizeof(svm->sev_es.valid_bitmap) * 8; - pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); + /* + * Print KVM's snapshot of the GHCB values that were (unsuccessfully) + * used to handle the exit. If the guest has since modified the GHCB + * itself, dumping the raw GHCB won't help debug why KVM was unable to + * handle the VMGEXIT that KVM observed. + */ + pr_err("GHCB (GPA=%016llx) snapshot:\n", svm->vmcb->control.ghcb_gpa); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", - ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); + kvm_ghcb_get_sw_exit_code(control), kvm_ghcb_sw_exit_code_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", - ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); + control->exit_info_1, kvm_ghcb_sw_exit_info_1_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", - ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); + control->exit_info_2, kvm_ghcb_sw_exit_info_2_is_valid(svm)); pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", - ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); - pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); + svm->sev_es.sw_scratch, kvm_ghcb_sw_scratch_is_valid(svm)); + pr_err("%-20s%*pb\n", "valid_bitmap", nbits, svm->sev_es.valid_bitmap); } static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) @@ -3266,11 +3277,6 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) -{ - return (((u64)control->exit_code_hi) << 32) | control->exit_code; -} - static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d5d0c5c3300bc1..a89c271a1951f4 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -607,9 +607,6 @@ static void svm_disable_virtualization_cpu(void) kvm_cpu_svm_disable(); amd_pmu_disable_virt(); - - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) - msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); } static int svm_enable_virtualization_cpu(void) @@ -687,9 +684,6 @@ static int svm_enable_virtualization_cpu(void) rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi); } - if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) - msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); - return 0; } @@ -1518,6 +1512,63 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } +#ifdef CONFIG_CPU_MITIGATIONS +static DEFINE_SPINLOCK(srso_lock); +static atomic_t srso_nr_vms; + +static void svm_srso_clear_bp_spec_reduce(void *ign) +{ + struct svm_cpu_data *sd = this_cpu_ptr(&svm_data); + + if (!sd->bp_spec_reduce_set) + return; + + msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + sd->bp_spec_reduce_set = false; +} + +static void svm_srso_vm_destroy(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + if (atomic_dec_return(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + /* + * Verify a new VM didn't come along, acquire the lock, and increment + * the count before this task acquired the lock. + */ + if (atomic_read(&srso_nr_vms)) + return; + + on_each_cpu(svm_srso_clear_bp_spec_reduce, NULL, 1); +} + +static void svm_srso_vm_init(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE)) + return; + + /* + * Acquire the lock on 0 => 1 transitions to ensure a potential 1 => 0 + * transition, i.e. destroying the last VM, is fully complete, e.g. so + * that a delayed IPI doesn't clear BP_SPEC_REDUCE after a vCPU runs. + */ + if (atomic_inc_not_zero(&srso_nr_vms)) + return; + + guard(spinlock)(&srso_lock); + + atomic_inc(&srso_nr_vms); +} +#else +static void svm_srso_vm_init(void) { } +static void svm_srso_vm_destroy(void) { } +#endif + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1550,6 +1601,11 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); + if (cpu_feature_enabled(X86_FEATURE_SRSO_BP_SPEC_REDUCE) && + !sd->bp_spec_reduce_set) { + sd->bp_spec_reduce_set = true; + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + } svm->guest_state_loaded = true; } @@ -2231,6 +2287,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu) */ if (!sev_es_guest(vcpu->kvm)) { clear_page(svm->vmcb); +#ifdef CONFIG_KVM_SMM + if (is_smm(vcpu)) + kvm_smm_changed(vcpu, false); +#endif kvm_vcpu_reset(vcpu, true); } @@ -5036,6 +5096,8 @@ static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); sev_vm_destroy(kvm); + + svm_srso_vm_destroy(); } static int svm_vm_init(struct kvm *kvm) @@ -5061,6 +5123,7 @@ static int svm_vm_init(struct kvm *kvm) return ret; } + svm_srso_vm_init(); return 0; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index d4490eaed55dd4..f16b068c4228b8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -335,6 +335,8 @@ struct svm_cpu_data { u32 next_asid; u32 min_asid; + bool bp_spec_reduce_set; + struct vmcb *save_area; unsigned long save_area_pa; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ccda95e53f626d..ba736cbb0587cd 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -11,6 +11,13 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#ifdef CREATE_TRACE_POINTS +#define tracing_kvm_rip_read(vcpu) ({ \ + typeof(vcpu) __vcpu = vcpu; \ + __vcpu->arch.guest_state_protected ? 0 : kvm_rip_read(__vcpu); \ + }) +#endif + /* * Tracepoint for guest mode entry. */ @@ -28,7 +35,7 @@ TRACE_EVENT(kvm_entry, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->rip = kvm_rip_read(vcpu); + __entry->rip = tracing_kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, @@ -319,7 +326,7 @@ TRACE_EVENT(name, \ ), \ \ TP_fast_assign( \ - __entry->guest_rip = kvm_rip_read(vcpu); \ + __entry->guest_rip = tracing_kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ __entry->requests = READ_ONCE(vcpu->requests); \ @@ -423,7 +430,7 @@ TRACE_EVENT(kvm_page_fault, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->guest_rip = kvm_rip_read(vcpu); + __entry->guest_rip = tracing_kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index ec08fa3caf43ce..d70e5b90087d86 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -31,6 +31,8 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu); */ static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); +#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING + static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { return &(to_vmx(vcpu)->pi_desc); @@ -89,9 +91,20 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) * current pCPU if the task was migrated. */ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { - raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu); + + /* + * In addition to taking the wakeup lock for the regular/IRQ + * context, tell lockdep it is being taken for the "sched out" + * context as well. vCPU loads happens in task context, and + * this is taking the lock of the *previous* CPU, i.e. can race + * with both the scheduler and the wakeup handler. + */ + raw_spin_lock(spinlock); + spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_); list_del(&vmx->pi_wakeup_list); - raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + spin_release(&spinlock->dep_map, _RET_IP_); + raw_spin_unlock(spinlock); } dest = cpu_physical_id(cpu); @@ -148,11 +161,23 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); struct pi_desc old, new; - unsigned long flags; - local_irq_save(flags); + lockdep_assert_irqs_disabled(); - raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + /* + * Acquire the wakeup lock using the "sched out" context to workaround + * a lockdep false positive. When this is called, schedule() holds + * various per-CPU scheduler locks. When the wakeup handler runs, it + * holds this CPU's wakeup lock while calling try_to_wake_up(), which + * can eventually take the aforementioned scheduler locks, which causes + * lockdep to assume there is deadlock. + * + * Deadlock can't actually occur because IRQs are disabled for the + * entirety of the sched_out critical section, i.e. the wakeup handler + * can't run while the scheduler locks are held. + */ + raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu), + PI_LOCK_SCHED_OUT); list_add_tail(&vmx->pi_wakeup_list, &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); @@ -176,8 +201,6 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) */ if (pi_test_on(&new)) __apic_send_IPI_self(POSTED_INTR_WAKEUP_VECTOR); - - local_irq_restore(flags); } static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu) @@ -274,6 +297,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; @@ -312,21 +336,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - + !kvm_irq_is_postable(&irq)) continue; - } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; @@ -334,11 +345,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); + if (!set) + continue; + + enable_remapped_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", __func__); @@ -346,6 +358,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } } + if (enable_remapped_mode) + ret = irq_set_vcpu_affinity(host_irq, NULL); + ret = 0; out: srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c841817a914a34..be7bb6d20129db 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1584,7 +1584,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc); ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO | ARCH_CAP_ITS_NO) static u64 kvm_get_arch_capabilities(void) { @@ -1618,6 +1618,8 @@ static u64 kvm_get_arch_capabilities(void) data |= ARCH_CAP_MDS_NO; if (!boot_cpu_has_bug(X86_BUG_RFDS)) data |= ARCH_CAP_RFDS_NO; + if (!boot_cpu_has_bug(X86_BUG_ITS)) + data |= ARCH_CAP_ITS_NO; if (!boot_cpu_has(X86_FEATURE_RTM)) { /* @@ -4597,7 +4599,7 @@ static bool kvm_is_vm_type_supported(unsigned long type) return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); } -static inline u32 kvm_sync_valid_fields(struct kvm *kvm) +static inline u64 kvm_sync_valid_fields(struct kvm *kvm) { return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS; } @@ -11098,7 +11100,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * Profile KVM exit RIPs: */ - if (unlikely(prof_on == KVM_PROFILING)) { + if (unlikely(prof_on == KVM_PROFILING && + !vcpu->arch.guest_state_protected)) { unsigned long rip = kvm_rip_read(vcpu); profile_hit(KVM_PROFILING, (void *)rip); } @@ -11492,7 +11495,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_queued_exception *ex = &vcpu->arch.exception; struct kvm_run *kvm_run = vcpu->run; - u32 sync_valid_fields; + u64 sync_valid_fields; int r; r = kvm_mmu_post_init_vm(vcpu->kvm); @@ -11786,6 +11789,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, if (kvm_mpx_supported()) kvm_load_guest_fpu(vcpu); + kvm_vcpu_srcu_read_lock(vcpu); + r = kvm_apic_accept_events(vcpu); if (r < 0) goto out; @@ -11799,6 +11804,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, mp_state->mp_state = vcpu->arch.mp_state; out: + kvm_vcpu_srcu_read_unlock(vcpu); + if (kvm_mpx_supported()) kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); @@ -13552,25 +13559,27 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); -bool kvm_arch_has_irq_bypass(void) -{ - return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; int ret; - irqfd->producer = prod; kvm_arch_start_assignment(irqfd->kvm); + + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = prod; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 1); if (ret) kvm_arch_end_assignment(irqfd->kvm); + spin_unlock_irq(&kvm->irqfds.lock); + + return ret; } @@ -13580,9 +13589,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, int ret; struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; WARN_ON(irqfd->producer != prod); - irqfd->producer = NULL; /* * When producer of consumer is unregistered, we change back to @@ -13590,12 +13599,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = NULL; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + spin_unlock_irq(&kvm->irqfds.lock); + + kvm_arch_end_assignment(irqfd->kvm); } @@ -13608,7 +13623,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return !!memcmp(&old->msi, &new->msi, sizeof(new->msi)); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a26c43abd47d8d..39374949daa2f6 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -367,6 +367,54 @@ SYM_FUNC_END(call_depth_return_thunk) #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ +#ifdef CONFIG_MITIGATION_ITS + +.macro ITS_THUNK reg + +/* + * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b) + * that complete the fineibt_paranoid caller sequence. + */ +1: .byte 0xea +SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + jne 1b +SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + ANNOTATE_RETPOLINE_SAFE + jmp *%\reg + int3 + .align 32, 0xcc /* fill to the end of the line */ + .skip 32 - (__x86_indirect_its_thunk_\reg - 1b), 0xcc /* skip to the next upper half */ +.endm + +/* ITS mitigation requires thunks be aligned to upper half of cacheline */ +.align 64, 0xcc +.skip 29, 0xcc + +#define GEN(reg) ITS_THUNK reg +#include +#undef GEN + + .align 64, 0xcc +SYM_FUNC_ALIAS(__x86_indirect_its_thunk_array, __x86_indirect_its_thunk_rax) +SYM_CODE_END(__x86_indirect_its_thunk_array) + +.align 64, 0xcc +.skip 32, 0xcc +SYM_CODE_START(its_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(its_return_thunk) +EXPORT_SYMBOL(its_return_thunk) + +#endif /* CONFIG_MITIGATION_ITS */ + /* * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688fc..cd3fd5155f6ece 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -35,7 +35,7 @@ # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. # -# REX2 Prefix +# REX2 Prefix Superscripts # - (!REX2): REX2 is not allowed # - (REX2): REX2 variant e.g. JMPABS @@ -286,10 +286,10 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) (!REX2) -e1: LOOPE/LOOPZ Jb (f64) (!REX2) -e2: LOOP Jb (f64) (!REX2) -e3: JrCXZ Jb (f64) (!REX2) +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) e4: IN AL,Ib (!REX2) e5: IN eAX,Ib (!REX2) e6: OUT Ib,AL (!REX2) @@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2) # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) (!REX2) -e9: JMP-near Jz (f64) (!REX2) -ea: JMP-far Ap (i64) (!REX2) -eb: JMP-short Jb (f64) (!REX2) +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) ec: IN AL,DX (!REX2) ed: IN eAX,DX (!REX2) ee: OUT DX,AL (!REX2) @@ -478,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) (!REX2) -81: JNO Jz (f64) (!REX2) -82: JB/JC/JNAE Jz (f64) (!REX2) -83: JAE/JNB/JNC Jz (f64) (!REX2) -84: JE/JZ Jz (f64) (!REX2) -85: JNE/JNZ Jz (f64) (!REX2) -86: JBE/JNA Jz (f64) (!REX2) -87: JA/JNBE Jz (f64) (!REX2) -88: JS Jz (f64) (!REX2) -89: JNS Jz (f64) (!REX2) -8a: JP/JPE Jz (f64) (!REX2) -8b: JNP/JPO Jz (f64) (!REX2) -8c: JL/JNGE Jz (f64) (!REX2) -8d: JNL/JGE Jz (f64) (!REX2) -8e: JLE/JNG Jz (f64) (!REX2) -8f: JNLE/JG Jz (f64) (!REX2) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ad662cc4605c69..bb8d99e717b9e7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -565,7 +566,7 @@ static void __init lowmem_pfn_init(void) "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" #define MSG_HIGHMEM_TRIMMED \ - "Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n" + "Warning: only 4GB will be used. Support for CONFIG_HIGHMEM64G was removed!\n" /* * We have more RAM than fits into lowmem - we try to put it into * highmem, also taking the highmem=x boot parameter into account: @@ -755,6 +756,8 @@ void mark_rodata_ro(void) pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); + execmem_cache_make_ro(); + kernel_set_to_readonly = 1; #ifdef CONFIG_CPA_DEBUG diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7c4f6f591f2b24..949a447f75ec7e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1391,6 +1392,8 @@ void mark_rodata_ro(void) (end - start) >> 10); set_memory_ro(start, (end - start) >> PAGE_SHIFT); + execmem_cache_make_ro(); + kernel_set_to_readonly = 1; /* diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a05fcddfc811af..f7ae44d3dd9e01 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -360,7 +360,7 @@ static inline pgd_t *_pgd_alloc(struct mm_struct *mm) * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); /* * Now PAE kernel is not running as a Xen domain. We can allocate @@ -380,7 +380,7 @@ static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pgd_t *_pgd_alloc(struct mm_struct *mm) { - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); + return __pgd_alloc(mm, pgd_allocation_order()); } static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index e459d97ef39772..b6d6750e4bd121 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -667,9 +667,9 @@ static void cond_mitigation(struct task_struct *next) prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec); /* - * Avoid user/user BTB poisoning by flushing the branch predictor - * when switching between processes. This stops one process from - * doing Spectre-v2 attacks on another. + * Avoid user->user BTB/RSB poisoning by flushing them when switching + * between processes. This stops one process from doing Spectre-v2 + * attacks on another. * * Both, the conditional and the always IBPB mode use the mm * pointer to avoid the IBPB when switching between tasks of the @@ -899,8 +899,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, cond_mitigation(tsk); /* - * Let nmi_uaccess_okay() and finish_asid_transition() - * know that CR3 is changing. + * Indicate that CR3 is about to change. nmi_uaccess_okay() + * and others are sensitive to the window where mm_cpumask(), + * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); barrier(); @@ -1204,8 +1205,16 @@ static void flush_tlb_func(void *info) static bool should_flush_tlb(int cpu, void *data) { + struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu); struct flush_tlb_info *info = data; + /* + * Order the 'loaded_mm' and 'is_lazy' against their + * write ordering in switch_mm_irqs_off(). Ensure + * 'is_lazy' is at least as new as 'loaded_mm'. + */ + smp_rmb(); + /* Lazy TLB will get flushed at the next context switch. */ if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu)) return false; @@ -1214,8 +1223,15 @@ static bool should_flush_tlb(int cpu, void *data) if (!info->mm) return true; + /* + * While switching, the remote CPU could have state from + * either the prev or next mm. Assume the worst and flush. + */ + if (loaded_mm == LOADED_MM_SWITCHING) + return true; + /* The target mm is loaded, and the CPU is not lazy. */ - if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm) + if (loaded_mm == info->mm) return true; /* In cpumask, but not the loaded mm? Periodically remove by flushing. */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9e5fe2ba858f08..ea4dd5b393aaf8 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -41,6 +41,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) +#define EMIT5(b1, b2, b3, b4, b5) \ + do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0) #define EMIT1_off32(b1, off) \ do { EMIT1(b1); EMIT(off, 4); } while (0) @@ -661,7 +663,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { + OPTIMIZER_HIDE_VAR(reg); + emit_jump(&prog, its_static_thunk(reg), ip); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { @@ -683,7 +688,7 @@ static void emit_return(u8 **pprog, u8 *ip) { u8 *prog = *pprog; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (cpu_wants_rethunk()) { emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ @@ -1502,6 +1507,48 @@ static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr) #define PRIV_STACK_GUARD_SZ 8 #define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL +static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, + struct bpf_prog *bpf_prog) +{ + u8 *prog = *pprog; + u8 *func; + + if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) { + /* The clearing sequence clobbers eax and ecx. */ + EMIT1(0x50); /* push rax */ + EMIT1(0x51); /* push rcx */ + ip += 2; + + func = (u8 *)clear_bhb_loop; + ip += x86_call_depth_emit_accounting(&prog, func, ip); + + if (emit_call(&prog, func, ip)) + return -EINVAL; + EMIT1(0x59); /* pop rcx */ + EMIT1(0x58); /* pop rax */ + } + /* Insert IBHF instruction */ + if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) && + cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) || + cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) { + /* + * Add an Indirect Branch History Fence (IBHF). IBHF acts as a + * fence preventing branch history from before the fence from + * affecting indirect branches after the fence. This is + * specifically used in cBPF jitted code to prevent Intra-mode + * BHI attacks. The IBHF instruction is designed to be a NOP on + * hardware that doesn't need or support it. The REP and REX.W + * prefixes are required by the microcode, and they also ensure + * that the NOP is unlikely to be used in existing code. + * + * IBHF is not a valid instruction in 32-bit mode. + */ + EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */ + } + *pprog = prog; + return 0; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { @@ -2544,6 +2591,13 @@ st: if (is_imm8(insn->off)) seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; + if (bpf_prog_was_classic(bpf_prog) && + !capable(CAP_SYS_ADMIN)) { + u8 *ip = image + addrs[i - 1]; + + if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) + return -EINVAL; + } if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index ddb798603201ef..7c20387d82029a 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -723,12 +723,15 @@ int pci_ext_cfg_avail(void) return 0; } -#if IS_ENABLED(CONFIG_VMD) struct pci_dev *pci_real_dma_dev(struct pci_dev *dev) { +#if IS_ENABLED(CONFIG_VMD) if (is_vmd(dev->bus)) return to_pci_sysdata(dev->bus)->vmd_dev; +#endif + + if (is_nvme_remap(dev->bus)) + return to_pci_sysdata(dev->bus)->nvme_remap_dev; return dev; } -#endif diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac57259a432b8c..a4b4ebd41b8fab 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -73,7 +73,7 @@ int __init efi_alloc_page_tables(void) gfp_t gfp_mask; gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order()); if (!efi_pgd) goto fail; @@ -96,7 +96,7 @@ int __init efi_alloc_page_tables(void) if (pgtable_l5_enabled()) free_page((unsigned long)pgd_page_vaddr(*pgd)); free_pgd: - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); + free_pages((unsigned long)efi_pgd, pgd_allocation_order()); fail: return -ENOMEM; } diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 8c534c36adfa28..66f066b8fedad2 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -26,7 +26,7 @@ /* code below belongs to the image kernel */ .align PAGE_SIZE SYM_FUNC_START(restore_registers) - ANNOTATE_NOENDBR + ENDBR /* go back to the original page tables */ movq %r9, %cr3 @@ -120,7 +120,7 @@ SYM_FUNC_END(restore_image) /* code below has been relocated to a safe page */ SYM_FUNC_START(core_restore_code) - ANNOTATE_NOENDBR + ENDBR /* switch to temporary page tables */ movq %rax, %cr3 /* flush TLB */ diff --git a/arch/x86/um/shared/sysdep/faultinfo_32.h b/arch/x86/um/shared/sysdep/faultinfo_32.h index ab5c8e47049c31..9193a7790a71a4 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_32.h +++ b/arch/x86/um/shared/sysdep/faultinfo_32.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movl $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ diff --git a/arch/x86/um/shared/sysdep/faultinfo_64.h b/arch/x86/um/shared/sysdep/faultinfo_64.h index 26fb4835d3e9a3..61e4ca1e0ab58d 100644 --- a/arch/x86/um/shared/sysdep/faultinfo_64.h +++ b/arch/x86/um/shared/sysdep/faultinfo_64.h @@ -31,8 +31,8 @@ struct faultinfo { #define ___backtrack_faulted(_faulted) \ asm volatile ( \ - "mov $0, %0\n" \ "movq $__get_kernel_nofault_faulted_%=,%1\n" \ + "mov $0, %0\n" \ "jmp _end_%=\n" \ "__get_kernel_nofault_faulted_%=:\n" \ "mov $1, %0;" \ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 43dcd8c7badc08..53282dc7d5ac5b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -70,6 +70,9 @@ EXPORT_SYMBOL(xen_start_flags); */ struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info; +/* Number of pages released from the initial allocation. */ +unsigned long xen_released_pages; + static __ref void xen_get_vendor(void) { init_cpu_devs(); @@ -100,10 +103,6 @@ noinstr void *__xen_hypercall_setfunc(void) void (*func)(void); /* - * Xen is supported only on CPUs with CPUID, so testing for - * X86_FEATURE_CPUID is a test for early_cpu_init() having been - * run. - * * Note that __xen_hypercall_setfunc() is noinstr only due to a nasty * dependency chain: it is being called via the xen_hypercall static * call when running as a PVH or HVM guest. Hypercalls need to be @@ -115,8 +114,7 @@ noinstr void *__xen_hypercall_setfunc(void) */ instrumentation_begin(); - if (!boot_cpu_has(X86_FEATURE_CPUID)) - xen_get_vendor(); + xen_get_vendor(); if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) @@ -466,6 +464,13 @@ int __init arch_xen_unpopulated_init(struct resource **res) xen_free_unpopulated_pages(1, &pg); } + /* + * Account for the region being in the physmap but unpopulated. + * The value in xen_released_pages is used by the balloon + * driver to know how much of the physmap is unpopulated and + * set an accurate initial memory target. + */ + xen_released_pages += xen_extra_mem[i].n_pfns; /* Zero so region is not also added to the balloon driver. */ xen_extra_mem[i].n_pfns = 0; } diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 0e3d930bcb89e8..9d25d9373945cb 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include #include #include @@ -123,8 +125,23 @@ static void __init pvh_arch_setup(void) { pvh_reserve_extra_memory(); - if (xen_initial_domain()) + if (xen_initial_domain()) { xen_add_preferred_consoles(); + + /* + * Disable usage of CPU idle and frequency drivers: when + * running as hardware domain the exposed native ACPI tables + * causes idle and/or frequency drivers to attach and + * malfunction. It's Xen the entity that controls the idle and + * frequency states. + * + * For unprivileged domains the exposed ACPI tables are + * fabricated and don't contain such data. + */ + disable_cpuidle(); + disable_cpufreq(); + WARN_ON(xen_set_default_idle()); + } } void __init xen_pvh_init(struct boot_params *boot_params) diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 10c660fae8b300..7237d56a9d3f01 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -54,14 +54,20 @@ struct mc_debug_data { static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); static struct mc_debug_data mc_debug_data_early __initdata; -static DEFINE_PER_CPU(struct mc_debug_data *, mc_debug_data) = - &mc_debug_data_early; static struct mc_debug_data __percpu *mc_debug_data_ptr; DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); static struct static_key mc_debug __ro_after_init; static bool mc_debug_enabled __initdata; +static struct mc_debug_data * __ref get_mc_debug(void) +{ + if (!mc_debug_data_ptr) + return &mc_debug_data_early; + + return this_cpu_ptr(mc_debug_data_ptr); +} + static int __init xen_parse_mc_debug(char *arg) { mc_debug_enabled = true; @@ -71,20 +77,16 @@ static int __init xen_parse_mc_debug(char *arg) } early_param("xen_mc_debug", xen_parse_mc_debug); -void mc_percpu_init(unsigned int cpu) -{ - per_cpu(mc_debug_data, cpu) = per_cpu_ptr(mc_debug_data_ptr, cpu); -} - static int __init mc_debug_enable(void) { unsigned long flags; + struct mc_debug_data __percpu *mcdb; if (!mc_debug_enabled) return 0; - mc_debug_data_ptr = alloc_percpu(struct mc_debug_data); - if (!mc_debug_data_ptr) { + mcdb = alloc_percpu(struct mc_debug_data); + if (!mcdb) { pr_err("xen_mc_debug inactive\n"); static_key_slow_dec(&mc_debug); return -ENOMEM; @@ -93,7 +95,7 @@ static int __init mc_debug_enable(void) /* Be careful when switching to percpu debug data. */ local_irq_save(flags); xen_mc_flush(); - mc_percpu_init(0); + mc_debug_data_ptr = mcdb; local_irq_restore(flags); pr_info("xen_mc_debug active\n"); @@ -155,7 +157,7 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); if (static_key_false(&mc_debug)) { - mcdb = __this_cpu_read(mc_debug_data); + mcdb = get_mc_debug(); memcpy(mcdb->entries, b->entries, b->mcidx * sizeof(struct multicall_entry)); } @@ -235,7 +237,7 @@ struct multicall_space __xen_mc_entry(size_t args) ret.mc = &b->entries[b->mcidx]; if (static_key_false(&mc_debug)) { - struct mc_debug_data *mcdb = __this_cpu_read(mc_debug_data); + struct mc_debug_data *mcdb = get_mc_debug(); mcdb->caller[b->mcidx] = __builtin_return_address(0); mcdb->argsz[b->mcidx] = args; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c3db71d96c434a..3823e52aef523c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -37,9 +37,6 @@ #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) -/* Number of pages released from the initial allocation. */ -unsigned long xen_released_pages; - /* Memory map would allow PCI passthrough. */ bool xen_pv_pci_possible; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 688ff59318ae22..9bb8ff8bff30a6 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -305,7 +305,6 @@ static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle) return rc; xen_pmu_init(cpu); - mc_percpu_init(cpu); /* * Why is this a BUG? If the hypercall fails then everything can be diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 109af12f764727..461bb1526502a0 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -226,9 +226,7 @@ SYM_CODE_END(xen_early_idt_handler_array) push %rax mov $__HYPERVISOR_iret, %eax syscall /* Do the IRET. */ -#ifdef CONFIG_MITIGATION_SLS - int3 -#endif + ud2 /* The SYSCALL should never return. */ .endm SYM_CODE_START(xen_iret) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 63c13a2ccf556a..25e318ef27d6b0 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -261,9 +261,6 @@ void xen_mc_callback(void (*fn)(void *), void *data); */ struct multicall_space xen_mc_extend_args(unsigned long op, size_t arg_size); -/* Do percpu data initialization for multicalls. */ -void mc_percpu_init(unsigned int cpu); - extern bool is_xen_pmu; irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id); diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 27f11320b8d12d..e98585dd83e0f4 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -16,6 +16,20 @@ config MQ_IOSCHED_KYBER synchronous writes, it will self-tune queue depths to achieve that goal. +config MQ_IOSCHED_ADIOS + tristate "Adaptive Deadline I/O scheduler" + default m + help + The Adaptive Deadline I/O Scheduler (ADIOS) is a multi-queue I/O + scheduler with learning-based adaptive latency control. + +config MQ_IOSCHED_DEFAULT_ADIOS + bool "Enable ADIOS I/O scheduler as default MQ I/O scheduler" + depends on MQ_IOSCHED_ADIOS=y + default n + help + Enable the ADIOS I/O scheduler as the default scheduler for MQ I/O. + config IOSCHED_BFQ tristate "BFQ I/O scheduler" select BLK_ICQ diff --git a/block/Makefile b/block/Makefile index 3a941dc0d27fb2..94e3bf608bd2d1 100644 --- a/block/Makefile +++ b/block/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o +obj-$(CONFIG_MQ_IOSCHED_ADIOS) += adios.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o @@ -37,3 +38,10 @@ obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \ blk-crypto-sysfs.o obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o + +all: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules + +clean: + make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean + diff --git a/block/adios.c b/block/adios.c new file mode 100644 index 00000000000000..6f6fb237df69dd --- /dev/null +++ b/block/adios.c @@ -0,0 +1,1382 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Adaptive Deadline I/O Scheduler (ADIOS) + * Copyright (C) 2025 Masahito Suzuki + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "elevator.h" +#include "blk.h" +#include "blk-mq.h" +#include "blk-mq-sched.h" + +#define ADIOS_VERSION "2.0.0" + +// Define operation types supported by ADIOS +enum adios_op_type { + ADIOS_READ = 0, + ADIOS_WRITE = 1, + ADIOS_DISCARD = 2, + ADIOS_OTHER = 3, + ADIOS_OPTYPES = 4, +}; + +// Global variable to control the latency +static u64 default_global_latency_window = 32000000ULL; +// Ratio below which batch queues should be refilled +static u8 default_bq_refill_below_ratio = 25; + +// Dynamic thresholds for shrinkage +static u32 default_lm_shrink_at_kreqs = 5000; +static u32 default_lm_shrink_at_gbytes = 50; +static u32 default_lm_shrink_resist = 2; + +// Latency targets for each operation type +static u64 default_latency_target[ADIOS_OPTYPES] = { + [ADIOS_READ] = 1ULL * NSEC_PER_MSEC, + [ADIOS_WRITE] = 2000ULL * NSEC_PER_MSEC, + [ADIOS_DISCARD] = 8000ULL * NSEC_PER_MSEC, + [ADIOS_OTHER] = 0ULL * NSEC_PER_MSEC, +}; + +// Maximum batch size limits for each operation type +static u32 default_batch_limit[ADIOS_OPTYPES] = { + [ADIOS_READ] = 24, + [ADIOS_WRITE] = 48, + [ADIOS_DISCARD] = 1, + [ADIOS_OTHER] = 1, +}; + +static u32 default_dl_prio[2] = {7, 0}; + +// Thresholds for latency model control +#define LM_BLOCK_SIZE_THRESHOLD 4096 +#define LM_SAMPLES_THRESHOLD 1024 +#define LM_INTERVAL_THRESHOLD 1500 +#define LM_OUTLIER_PERCENTILE 99 +#define LM_LAT_BUCKET_COUNT 64 + +// Structure to hold latency bucket data for small requests +struct latency_bucket_small { + u64 sum_latency; + u32 count; +}; + +// Structure to hold latency bucket data for large requests +struct latency_bucket_large { + u64 sum_latency; + u64 sum_block_size; + u32 count; +}; + +// New structure to hold per-cpu buckets, improving data locality and code clarity. +struct per_cpu_lm_buckets { + struct latency_bucket_small small_bucket[LM_LAT_BUCKET_COUNT]; + struct latency_bucket_large large_bucket[LM_LAT_BUCKET_COUNT]; +}; + +// Structure to hold the latency model context data +struct latency_model { + seqlock_t lock; + u64 base; + u64 slope; + u64 small_sum_delay; + u64 small_count; + u64 large_sum_delay; + u64 large_sum_bsize; + u64 last_update_jiffies; + + // Per-CPU buckets to avoid lock contention on the completion path + struct per_cpu_lm_buckets __percpu *pcpu_buckets; + + u32 lm_shrink_at_kreqs; + u32 lm_shrink_at_gbytes; + u8 lm_shrink_resist; +}; + +#define ADIOS_BQ_PAGES 2 + +// Adios scheduler data +struct adios_data { + spinlock_t pq_lock; + struct list_head prio_queue; + + struct rb_root_cached dl_tree[2]; + spinlock_t lock; + u8 dl_queued; + s64 dl_bias; + s32 dl_prio[2]; + + u64 global_latency_window; + u64 latency_target[ADIOS_OPTYPES]; + u32 batch_limit[ADIOS_OPTYPES]; + u32 batch_actual_max_size[ADIOS_OPTYPES]; + u32 batch_actual_max_total; + u32 async_depth; + u8 bq_refill_below_ratio; + + u8 bq_page; + bool more_bq_ready; + struct list_head batch_queue[ADIOS_BQ_PAGES][ADIOS_OPTYPES]; + u32 batch_count[ADIOS_BQ_PAGES][ADIOS_OPTYPES]; + spinlock_t bq_lock; + + struct per_cpu_lm_buckets *aggr_buckets; + + struct latency_model latency_model[ADIOS_OPTYPES]; + struct timer_list update_timer; + + atomic64_t total_pred_lat; + + struct kmem_cache *rq_data_pool; + struct kmem_cache *dl_group_pool; + + struct request_queue *queue; +}; + +// List of requests with the same deadline in the deadline-sorted tree +struct dl_group { + struct rb_node node; + struct list_head rqs; + u64 deadline; +} __attribute__((aligned(64))); + +// Structure to hold scheduler-specific data for each request +struct adios_rq_data { + struct list_head *dl_group; + struct list_head dl_node; + + struct request *rq; + u64 deadline; + u64 pred_lat; + u32 block_size; +} __attribute__((aligned(64))); + +static const int adios_prio_to_weight[40] = { + /* -20 */ 88761, 71755, 56483, 46273, 36291, + /* -15 */ 29154, 23254, 18705, 14949, 11916, + /* -10 */ 9548, 7620, 6100, 4904, 3906, + /* -5 */ 3121, 2501, 1991, 1586, 1277, + /* 0 */ 1024, 820, 655, 526, 423, + /* 5 */ 335, 272, 215, 172, 137, + /* 10 */ 110, 87, 70, 56, 45, + /* 15 */ 36, 29, 23, 18, 15, +}; + +// Count the number of entries in aggregated small buckets +static u32 lm_count_small_entries(struct latency_bucket_small *buckets) { + u32 total_count = 0; + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) + total_count += buckets[i].count; + return total_count; +} + +// Update the small buckets in the latency model from aggregated data +static bool lm_update_small_buckets(struct latency_model *model, + struct latency_bucket_small *buckets, + u32 total_count, bool count_all) { + u64 sum_latency = 0; + u32 sum_count = 0; + u32 cumulative_count = 0, threshold_count = 0; + u8 outlier_threshold_bucket = 0; + u8 outlier_percentile = LM_OUTLIER_PERCENTILE; + u8 reduction; + + if (count_all) + outlier_percentile = 100; + + // Calculate the threshold count for outlier detection + threshold_count = (total_count * outlier_percentile) / 100; + + // Identify the bucket that corresponds to the outlier threshold + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { + cumulative_count += buckets[i].count; + if (cumulative_count >= threshold_count) { + outlier_threshold_bucket = i; + break; + } + } + + // Calculate the average latency, excluding outliers + for (u8 i = 0; i <= outlier_threshold_bucket; i++) { + struct latency_bucket_small *bucket = &buckets[i]; + if (i < outlier_threshold_bucket) { + sum_latency += bucket->sum_latency; + sum_count += bucket->count; + } else { + // The threshold bucket's contribution is proportional + u64 remaining_count = + threshold_count - (cumulative_count - bucket->count); + if (bucket->count > 0) { + sum_latency += + div_u64((bucket->sum_latency * remaining_count), bucket->count); + sum_count += remaining_count; + } + } + } + + // Shrink the model if it reaches at the readjustment threshold + if (model->small_count >= 1000ULL * model->lm_shrink_at_kreqs) { + reduction = model->lm_shrink_resist; + if (model->small_count >> reduction) { + model->small_sum_delay -= model->small_sum_delay >> reduction; + model->small_count -= model->small_count >> reduction; + } + } + + // Accumulate the average latency into the statistics + model->small_sum_delay += sum_latency; + model->small_count += sum_count; + + return true; +} + +// Count the number of entries in aggregated large buckets +static u32 lm_count_large_entries(struct latency_bucket_large *buckets) { + u32 total_count = 0; + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) + total_count += buckets[i].count; + return total_count; +} + +// Update the large buckets in the latency model from aggregated data +static bool lm_update_large_buckets(struct latency_model *model, + struct latency_bucket_large *buckets, + u32 total_count, bool count_all) { + s64 sum_latency = 0; + u64 sum_block_size = 0, intercept; + u32 cumulative_count = 0, threshold_count = 0; + u8 outlier_threshold_bucket = 0; + u8 outlier_percentile = LM_OUTLIER_PERCENTILE; + u8 reduction; + + if (count_all) + outlier_percentile = 100; + + // Calculate the threshold count for outlier detection + threshold_count = (total_count * outlier_percentile) / 100; + + // Identify the bucket that corresponds to the outlier threshold + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { + cumulative_count += buckets[i].count; + if (cumulative_count >= threshold_count) { + outlier_threshold_bucket = i; + break; + } + } + + // Calculate the average latency and block size, excluding outliers + for (u8 i = 0; i <= outlier_threshold_bucket; i++) { + struct latency_bucket_large *bucket = &buckets[i]; + if (i < outlier_threshold_bucket) { + sum_latency += bucket->sum_latency; + sum_block_size += bucket->sum_block_size; + } else { + // The threshold bucket's contribution is proportional + u64 remaining_count = + threshold_count - (cumulative_count - bucket->count); + if (bucket->count > 0) { + sum_latency += + div_u64((bucket->sum_latency * remaining_count), bucket->count); + sum_block_size += + div_u64((bucket->sum_block_size * remaining_count), bucket->count); + } + } + } + + // Shrink the model if it reaches at the readjustment threshold + if (model->large_sum_bsize >= 0x40000000ULL * model->lm_shrink_at_gbytes) { + reduction = model->lm_shrink_resist; + if (model->large_sum_bsize >> reduction) { + model->large_sum_delay -= model->large_sum_delay >> reduction; + model->large_sum_bsize -= model->large_sum_bsize >> reduction; + } + } + + // Accumulate the average delay into the statistics + intercept = model->base * threshold_count; + if (sum_latency > intercept) + sum_latency -= intercept; + + model->large_sum_delay += sum_latency; + model->large_sum_bsize += sum_block_size; + + return true; +} + +// Update the latency model parameters and statistics +static void latency_model_update( + struct adios_data *ad, struct latency_model *model) { + u64 now; + u32 small_count, large_count; + bool time_elapsed; + bool small_processed = false, large_processed = false; + struct per_cpu_lm_buckets *aggr = ad->aggr_buckets; + struct latency_bucket_small *asb; + struct latency_bucket_large *alb; + struct per_cpu_lm_buckets *pcpu_b; + unsigned long flags; + int cpu; + + memset(aggr, 0, sizeof(*aggr)); + + write_seqlock_irqsave(&model->lock, flags); + + // Aggregate data from all CPUs and reset per-cpu buckets. + for_each_possible_cpu(cpu) { + pcpu_b = per_cpu_ptr(model->pcpu_buckets, cpu); + + for (u8 i = 0; i < LM_LAT_BUCKET_COUNT; i++) { + if (pcpu_b->small_bucket[i].count) { + asb = &aggr->small_bucket[i]; + asb->count += pcpu_b->small_bucket[i].count; + asb->sum_latency += pcpu_b->small_bucket[i].sum_latency; + } + if (pcpu_b->large_bucket[i].count) { + alb = &aggr->large_bucket[i]; + alb->count += pcpu_b->large_bucket[i].count; + alb->sum_latency += pcpu_b->large_bucket[i].sum_latency; + alb->sum_block_size += pcpu_b->large_bucket[i].sum_block_size; + } + } + // Reset per-cpu buckets after aggregating + memset(pcpu_b, 0, sizeof(*pcpu_b)); + } + + // Whether enough time has elapsed since the last update + now = jiffies; + time_elapsed = unlikely(!model->base) || model->last_update_jiffies + + msecs_to_jiffies(LM_INTERVAL_THRESHOLD) <= now; + + // Count the number of entries in aggregated buckets + small_count = lm_count_small_entries(aggr->small_bucket); + large_count = lm_count_large_entries(aggr->large_bucket); + + // Update small buckets + if (small_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= small_count || !model->base)) + small_processed = lm_update_small_buckets( + model, aggr->small_bucket, small_count, !model->base); + // Update large buckets + if (large_count && (time_elapsed || + LM_SAMPLES_THRESHOLD <= large_count || !model->slope)) + large_processed = lm_update_large_buckets( + model, aggr->large_bucket, large_count, !model->slope); + + // Update the base parameter if small bucket was processed + if (small_processed && likely(model->small_count)) + model->base = div_u64(model->small_sum_delay, model->small_count); + + // Update the slope parameter if large bucket was processed + if (large_processed && likely(model->large_sum_bsize)) + model->slope = div_u64(model->large_sum_delay, + DIV_ROUND_UP_ULL(model->large_sum_bsize, 1024)); + + // Reset statistics and update last updated jiffies if time has elapsed + if (time_elapsed) + model->last_update_jiffies = now; + + write_sequnlock_irqrestore(&model->lock, flags); +} + +// Determine the bucket index for a given measured and predicted latency +static u8 lm_input_bucket_index(u64 measured, u64 predicted) { + u8 bucket_index; + + if (measured < predicted * 2) + bucket_index = div_u64((measured * 20), predicted); + else if (measured < predicted * 5) + bucket_index = div_u64((measured * 10), predicted) + 20; + else + bucket_index = div_u64((measured * 3), predicted) + 40; + + return bucket_index; +} + +// Input latency data into the latency model +static void latency_model_input(struct adios_data *ad, + struct latency_model *model, u32 block_size, u64 latency, u64 pred_lat) { + u8 bucket_index; + struct per_cpu_lm_buckets *buckets; + + buckets = this_cpu_ptr(model->pcpu_buckets); + + if (block_size <= LM_BLOCK_SIZE_THRESHOLD) { + // Handle small requests + bucket_index = lm_input_bucket_index(latency, model->base ?: 1); + + if (bucket_index >= LM_LAT_BUCKET_COUNT) + bucket_index = LM_LAT_BUCKET_COUNT - 1; + + buckets->small_bucket[bucket_index].count++; + buckets->small_bucket[bucket_index].sum_latency += latency; + + if (unlikely(!model->base)) { + latency_model_update(ad, model); + return; + } + } else { + // Handle large requests + if (!model->base || !pred_lat) + return; + + bucket_index = lm_input_bucket_index(latency, pred_lat); + + if (bucket_index >= LM_LAT_BUCKET_COUNT) + bucket_index = LM_LAT_BUCKET_COUNT - 1; + + buckets->large_bucket[bucket_index].count++; + buckets->large_bucket[bucket_index].sum_latency += latency; + buckets->large_bucket[bucket_index].sum_block_size += block_size; + } +} + +// Predict the latency for a given block size using the latency model +static u64 latency_model_predict(struct latency_model *model, u32 block_size) { + u64 result, base, slope; + unsigned int seq; + + do { + seq = read_seqbegin(&model->lock); + base = model->base; + slope = model->slope; + } while (read_seqretry(&model->lock, seq)); + + result = base; + if (block_size > LM_BLOCK_SIZE_THRESHOLD) + result += slope * + DIV_ROUND_UP_ULL(block_size - LM_BLOCK_SIZE_THRESHOLD, 1024); + + return result; +} + +// Determine the type of operation based on request flags +static u8 adios_optype(struct request *rq) { + switch (rq->cmd_flags & REQ_OP_MASK) { + case REQ_OP_READ: + return ADIOS_READ; + case REQ_OP_WRITE: + return ADIOS_WRITE; + case REQ_OP_DISCARD: + return ADIOS_DISCARD; + default: + return ADIOS_OTHER; + } +} + +static inline u8 adios_optype_not_read(struct request *rq) { + return (rq->cmd_flags & REQ_OP_MASK) != REQ_OP_READ; +} + +// Helper function to retrieve adios_rq_data from a request +static inline struct adios_rq_data *get_rq_data(struct request *rq) { + return rq->elv.priv[0]; +} + +// Add a request to the deadline-sorted red-black tree +static void add_to_dl_tree( + struct adios_data *ad, bool dl_idx, struct request *rq) { + struct rb_root_cached *root = &ad->dl_tree[dl_idx]; + struct rb_node **link = &(root->rb_root.rb_node), *parent = NULL; + bool leftmost = true; + struct adios_rq_data *rd = get_rq_data(rq); + struct dl_group *dlg; + + rd->block_size = blk_rq_bytes(rq); + u8 optype = adios_optype(rq); + rd->pred_lat = + latency_model_predict(&ad->latency_model[optype], rd->block_size); + rd->deadline = + rq->start_time_ns + ad->latency_target[optype] + rd->pred_lat; + + while (*link) { + dlg = rb_entry(*link, struct dl_group, node); + s64 diff = rd->deadline - dlg->deadline; + + parent = *link; + if (diff < 0) { + link = &((*link)->rb_left); + } else if (diff > 0) { + link = &((*link)->rb_right); + leftmost = false; + } else { // diff == 0 + goto found; + } + } + + dlg = rb_entry_safe(parent, struct dl_group, node); + if (!dlg || dlg->deadline != rd->deadline) { + dlg = kmem_cache_zalloc(ad->dl_group_pool, GFP_ATOMIC); + if (!dlg) + return; + dlg->deadline = rd->deadline; + INIT_LIST_HEAD(&dlg->rqs); + rb_link_node(&dlg->node, parent, link); + rb_insert_color_cached(&dlg->node, root, leftmost); + } +found: + list_add_tail(&rd->dl_node, &dlg->rqs); + rd->dl_group = &dlg->rqs; + ad->dl_queued |= 1 << dl_idx; +} + +// Remove a request from the deadline-sorted red-black tree +static void del_from_dl_tree( + struct adios_data *ad, bool dl_idx, struct request *rq) { + struct rb_root_cached *root = &ad->dl_tree[dl_idx]; + struct adios_rq_data *rd = get_rq_data(rq); + struct dl_group *dlg = container_of(rd->dl_group, struct dl_group, rqs); + + list_del_init(&rd->dl_node); + if (list_empty(&dlg->rqs)) { + rb_erase_cached(&dlg->node, root); + kmem_cache_free(ad->dl_group_pool, dlg); + } + rd->dl_group = NULL; + + if (RB_EMPTY_ROOT(&ad->dl_tree[dl_idx].rb_root)) + ad->dl_queued &= ~(1 << dl_idx); +} + +// Remove a request from the scheduler +static void remove_request(struct adios_data *ad, struct request *rq) { + bool dl_idx = adios_optype_not_read(rq); + struct request_queue *q = rq->q; + struct adios_rq_data *rd = get_rq_data(rq); + + list_del_init(&rq->queuelist); + + // We might not be on the rbtree, if we are doing an insert merge + if (rd->dl_group) + del_from_dl_tree(ad, dl_idx, rq); + + elv_rqhash_del(q, rq); + if (q->last_merge == rq) + q->last_merge = NULL; +} + +// Convert a queue depth to the corresponding word depth for shallow allocation +static int to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) { + struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; + const unsigned int nrr = hctx->queue->nr_requests; + + return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; +} + +// Limit the depth of request allocation for asynchronous and write requests +static void adios_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { + struct adios_data *ad = data->q->elevator->elevator_data; + + // Do not throttle synchronous reads + if (op_is_sync(opf) && !op_is_write(opf)) + return; + + data->shallow_depth = to_word_depth(data->hctx, ad->async_depth); +} + +// Update async_depth when the number of requests in the queue changes +static void adios_depth_updated(struct blk_mq_hw_ctx *hctx) { + struct request_queue *q = hctx->queue; + struct adios_data *ad = q->elevator->elevator_data; + struct blk_mq_tags *tags = hctx->sched_tags; + + ad->async_depth = q->nr_requests; + + sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1); +} + +// Handle request merging after a merge operation +static void adios_request_merged(struct request_queue *q, struct request *req, + enum elv_merge type) { + bool dl_idx = adios_optype_not_read(req); + struct adios_data *ad = q->elevator->elevator_data; + + // Reposition request in the deadline-sorted tree + del_from_dl_tree(ad, dl_idx, req); + add_to_dl_tree(ad, dl_idx, req); +} + +// Handle merging of requests after one has been merged into another +static void adios_merged_requests(struct request_queue *q, struct request *req, + struct request *next) { + struct adios_data *ad = q->elevator->elevator_data; + + lockdep_assert_held(&ad->lock); + + // kill knowledge of next, this one is a goner + remove_request(ad, next); +} + +// Try to merge a bio into an existing rq before associating it with an rq +static bool adios_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) { + struct adios_data *ad = q->elevator->elevator_data; + struct request *free = NULL; + bool ret; + + scoped_guard(spinlock_irqsave, &ad->lock) + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); + + if (free) + blk_mq_free_request(free); + + return ret; +} + +// Insert a request into the scheduler +static void insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + blk_insert_t insert_flags, struct list_head *free) { + bool dl_idx = adios_optype_not_read(rq); + struct request_queue *q = hctx->queue; + struct adios_data *ad = q->elevator->elevator_data; + + if (insert_flags & BLK_MQ_INSERT_AT_HEAD) { + scoped_guard(spinlock_irqsave, &ad->pq_lock) + list_add_tail(&rq->queuelist, &ad->prio_queue); + return; + } + + guard(spinlock_irqsave)(&ad->lock); + + if (blk_mq_sched_try_insert_merge(q, rq, free)) + return; + + add_to_dl_tree(ad, dl_idx, rq); + + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); + if (!q->last_merge) + q->last_merge = rq; + } +} + +// Insert multiple requests into the scheduler +static void adios_insert_requests(struct blk_mq_hw_ctx *hctx, + struct list_head *list, + blk_insert_t insert_flags) { + struct request *rq; + LIST_HEAD(free); + + while (!list_empty(list)) { + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + insert_request(hctx, rq, insert_flags, &free); + } + + blk_mq_free_requests(&free); +} + +// Prepare a request before it is inserted into the scheduler +static void adios_prepare_request(struct request *rq) { + struct adios_data *ad = rq->q->elevator->elevator_data; + struct adios_rq_data *rd; + + rq->elv.priv[0] = NULL; + + /* Allocate adios_rq_data from the memory pool */ + rd = kmem_cache_zalloc(ad->rq_data_pool, GFP_ATOMIC); + if (WARN(!rd, "adios_prepare_request: " + "Failed to allocate memory from rq_data_pool. rd is NULL\n")) + return; + + rd->rq = rq; + rq->elv.priv[0] = rd; +} + +static struct adios_rq_data *get_dl_first_rd(struct adios_data *ad, bool idx) { + struct rb_root_cached *root = &ad->dl_tree[idx]; + struct rb_node *first = rb_first_cached(root); + struct dl_group *dl_group = rb_entry(first, struct dl_group, node); + + return list_first_entry(&dl_group->rqs, struct adios_rq_data, dl_node); +} + +// Select the next request to dispatch from the deadline-sorted red-black tree +static struct request *next_request(struct adios_data *ad) { + struct adios_rq_data *rd; + bool dl_idx, bias_idx, reduce_bias; + + if (!ad->dl_queued) + return NULL; + + dl_idx = ad->dl_queued >> 1; + rd = get_dl_first_rd(ad, dl_idx); + + bias_idx = ad->dl_bias < 0; + reduce_bias = (bias_idx == dl_idx); + + if (ad->dl_queued == 0x3) { + struct adios_rq_data *trd[2]; + trd[0] = get_dl_first_rd(ad, 0); + trd[1] = rd; + + rd = trd[bias_idx]; + + reduce_bias = + (trd[bias_idx]->deadline > trd[((u8)bias_idx + 1) % 2]->deadline); + } + + if (reduce_bias) { + s64 sign = ((int)bias_idx << 1) - 1; + if (unlikely(!rd->pred_lat)) + ad->dl_bias = sign; + else { + ad->dl_bias += sign * (s64)((rd->pred_lat * + adios_prio_to_weight[ad->dl_prio[bias_idx] + 20]) >> 10); + } + } + + return rd->rq; +} + +// Reset the batch queue counts for a given page +static void reset_batch_counts(struct adios_data *ad, u8 page) { + memset(&ad->batch_count[page], 0, sizeof(ad->batch_count[page])); +} + +// Initialize all batch queues +static void init_batch_queues(struct adios_data *ad) { + for (u8 page = 0; page < ADIOS_BQ_PAGES; page++) { + reset_batch_counts(ad, page); + + for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) + INIT_LIST_HEAD(&ad->batch_queue[page][optype]); + } +} + +// Fill the batch queues with requests from the deadline-sorted red-black tree +static bool fill_batch_queues(struct adios_data *ad, u64 current_lat) { + u32 count = 0; + u32 optype_count[ADIOS_OPTYPES] = {0}; + u8 page = (ad->bq_page + 1) % ADIOS_BQ_PAGES; + + reset_batch_counts(ad, page); + + scoped_guard(spinlock_irqsave, &ad->lock) + while (true) { + struct request *rq = next_request(ad); + if (!rq) + break; + + struct adios_rq_data *rd = get_rq_data(rq); + u8 optype = adios_optype(rq); + current_lat += rd->pred_lat; + + // Check batch size and total predicted latency + if (count && (!ad->latency_model[optype].base || + ad->batch_count[page][optype] >= ad->batch_limit[optype] || + current_lat > ad->global_latency_window)) { + break; + } + + remove_request(ad, rq); + + // Add request to the corresponding batch queue + list_add_tail(&rq->queuelist, &ad->batch_queue[page][optype]); + ad->batch_count[page][optype]++; + atomic64_add(rd->pred_lat, &ad->total_pred_lat); + optype_count[optype]++; + count++; + } + + if (count) { + ad->more_bq_ready = true; + for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) { + if (ad->batch_actual_max_size[optype] < optype_count[optype]) + ad->batch_actual_max_size[optype] = optype_count[optype]; + } + if (ad->batch_actual_max_total < count) + ad->batch_actual_max_total = count; + } + return count; +} + +// Flip to the next batch queue page +static void flip_bq_page(struct adios_data *ad) { + ad->more_bq_ready = false; + ad->bq_page = (ad->bq_page + 1) % ADIOS_BQ_PAGES; +} + +// Dispatch a request from the batch queues +static struct request *dispatch_from_bq(struct adios_data *ad) { + struct request *rq = NULL; + u64 tpl; + + guard(spinlock_irqsave)(&ad->bq_lock); + + tpl = atomic64_read(&ad->total_pred_lat); + + if (!ad->more_bq_ready && (!tpl || + tpl < div_u64(ad->global_latency_window * ad->bq_refill_below_ratio, 100))) + fill_batch_queues(ad, tpl); + +again: + // Check if there are any requests in the batch queues + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { + if (!list_empty(&ad->batch_queue[ad->bq_page][i])) { + rq = list_first_entry(&ad->batch_queue[ad->bq_page][i], + struct request, queuelist); + list_del_init(&rq->queuelist); + return rq; + } + } + + // If there's more batch queue page available, flip to it and retry + if (ad->more_bq_ready) { + flip_bq_page(ad); + goto again; + } + + return NULL; +} + +// Dispatch a request from the priority queue +static struct request *dispatch_from_pq(struct adios_data *ad) { + struct request *rq = NULL; + + guard(spinlock_irqsave)(&ad->pq_lock); + + if (!list_empty(&ad->prio_queue)) { + rq = list_first_entry(&ad->prio_queue, struct request, queuelist); + list_del_init(&rq->queuelist); + } + return rq; +} + +// Dispatch a request to the hardware queue +static struct request *adios_dispatch_request(struct blk_mq_hw_ctx *hctx) { + struct adios_data *ad = hctx->queue->elevator->elevator_data; + struct request *rq; + + rq = dispatch_from_pq(ad); + if (rq) goto found; + rq = dispatch_from_bq(ad); + if (!rq) return NULL; +found: + rq->rq_flags |= RQF_STARTED; + return rq; +} + +// Timer callback function to periodically update latency models +static void update_timer_callback(struct timer_list *t) { + struct adios_data *ad = from_timer(ad, t, update_timer); + + for (u8 optype = 0; optype < ADIOS_OPTYPES; optype++) + latency_model_update(ad, &ad->latency_model[optype]); +} + +// Handle the completion of a request +static void adios_completed_request(struct request *rq, u64 now) { + struct adios_data *ad = rq->q->elevator->elevator_data; + struct adios_rq_data *rd = get_rq_data(rq); + + atomic64_sub(rd->pred_lat, &ad->total_pred_lat); + + if (!rq->io_start_time_ns || !rd->block_size) + return; + u64 latency = now - rq->io_start_time_ns; + u8 optype = adios_optype(rq); + latency_model_input(ad, &ad->latency_model[optype], + rd->block_size, latency, rd->pred_lat); + timer_reduce(&ad->update_timer, jiffies + msecs_to_jiffies(100)); +} + +// Clean up after a request is finished +static void adios_finish_request(struct request *rq) { + struct adios_data *ad = rq->q->elevator->elevator_data; + + if (rq->elv.priv[0]) { + // Free adios_rq_data back to the memory pool + kmem_cache_free(ad->rq_data_pool, get_rq_data(rq)); + rq->elv.priv[0] = NULL; + } +} + +static inline bool pq_has_work(struct adios_data *ad) { + guard(spinlock_irqsave)(&ad->pq_lock); + return !list_empty(&ad->prio_queue); +} + +static inline bool bq_has_work(struct adios_data *ad) { + guard(spinlock_irqsave)(&ad->bq_lock); + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) + if (!list_empty(&ad->batch_queue[ad->bq_page][i])) + return true; + + return ad->more_bq_ready; +} + +static inline bool dl_tree_has_work(struct adios_data *ad) { + guard(spinlock_irqsave)(&ad->lock); + return ad->dl_queued; +} + +// Check if there are any requests available for dispatch +static bool adios_has_work(struct blk_mq_hw_ctx *hctx) { + struct adios_data *ad = hctx->queue->elevator->elevator_data; + + return pq_has_work(ad) || bq_has_work(ad) || dl_tree_has_work(ad); +} + +// Initialize the scheduler-specific data for a hardware queue +static int adios_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + adios_depth_updated(hctx); + return 0; +} + +// Initialize the scheduler-specific data when initializing the request queue +static int adios_init_sched(struct request_queue *q, struct elevator_type *e) { + struct adios_data *ad; + struct elevator_queue *eq; + int ret = -ENOMEM; + int cpu = 0; + + eq = elevator_alloc(q, e); + if (!eq) + return ret; + + ad = kzalloc_node(sizeof(*ad), GFP_KERNEL, q->node); + if (!ad) + goto put_eq; + + // Create a memory pool for adios_rq_data + ad->rq_data_pool = kmem_cache_create("rq_data_pool", + sizeof(struct adios_rq_data), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ad->rq_data_pool) { + pr_err("adios: Failed to create rq_data_pool\n"); + goto free_ad; + } + + /* Create a memory pool for dl_group */ + ad->dl_group_pool = kmem_cache_create("dl_group_pool", + sizeof(struct dl_group), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ad->dl_group_pool) { + pr_err("adios: Failed to create dl_group_pool\n"); + goto destroy_rq_data_pool; + } + + eq->elevator_data = ad; + + ad->global_latency_window = default_global_latency_window; + ad->bq_refill_below_ratio = default_bq_refill_below_ratio; + + INIT_LIST_HEAD(&ad->prio_queue); + for (u8 i = 0; i < 2; i++) + ad->dl_tree[i] = RB_ROOT_CACHED; + ad->dl_bias = 0; + ad->dl_queued = 0x0; + for (u8 i = 0; i < 2; i++) + ad->dl_prio[i] = default_dl_prio[i]; + + ad->aggr_buckets = kzalloc(sizeof(*ad->aggr_buckets), GFP_KERNEL); + if (!ad->aggr_buckets) { + pr_err("adios: Failed to allocate aggregation buckets\n"); + goto destroy_dl_group_pool; + } + + for (cpu = 0; cpu < ADIOS_OPTYPES; cpu++) { + struct latency_model *model = &ad->latency_model[cpu]; + seqlock_init(&model->lock); + + model->pcpu_buckets = alloc_percpu(struct per_cpu_lm_buckets); + if (!model->pcpu_buckets) + goto free_buckets; + + model->last_update_jiffies = jiffies; + model->lm_shrink_at_kreqs = default_lm_shrink_at_kreqs; + model->lm_shrink_at_gbytes = default_lm_shrink_at_gbytes; + model->lm_shrink_resist = default_lm_shrink_resist; + + ad->latency_target[cpu] = default_latency_target[cpu]; + ad->batch_limit[cpu] = default_batch_limit[cpu]; + } + timer_setup(&ad->update_timer, update_timer_callback, 0); + init_batch_queues(ad); + + spin_lock_init(&ad->lock); + spin_lock_init(&ad->pq_lock); + spin_lock_init(&ad->bq_lock); + + /* We dispatch from request queue wide instead of hw queue */ + blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q); + + ad->queue = q; + blk_stat_enable_accounting(q); + + q->elevator = eq; + return 0; + +free_buckets: + pr_err("adios: Failed to allocate per-cpu buckets\n"); + while (--cpu >= 0) { + struct latency_model *prev_model = &ad->latency_model[cpu]; + free_percpu(prev_model->pcpu_buckets); + } + kfree(ad->aggr_buckets); +destroy_dl_group_pool: + kmem_cache_destroy(ad->dl_group_pool); +destroy_rq_data_pool: + kmem_cache_destroy(ad->rq_data_pool); +free_ad: + kfree(ad); +put_eq: + kobject_put(&eq->kobj); + return ret; +} + +// Clean up and free resources when exiting the scheduler +static void adios_exit_sched(struct elevator_queue *e) { + struct adios_data *ad = e->elevator_data; + + timer_shutdown_sync(&ad->update_timer); + + WARN_ON_ONCE(!list_empty(&ad->prio_queue)); + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { + struct latency_model *model = &ad->latency_model[i]; + free_percpu(model->pcpu_buckets); + } + kfree(ad->aggr_buckets); + + if (ad->rq_data_pool) + kmem_cache_destroy(ad->rq_data_pool); + + if (ad->dl_group_pool) + kmem_cache_destroy(ad->dl_group_pool); + + blk_stat_disable_accounting(ad->queue); + + kfree(ad); +} + +// Define sysfs attributes for read operation latency model +#define SYSFS_OPTYPE_DECL(name, optype) \ +static ssize_t adios_lat_model_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + struct latency_model *model = &ad->latency_model[optype]; \ + ssize_t len = 0; \ + u64 base, slope; \ + unsigned int seq; \ + do { \ + seq = read_seqbegin(&model->lock); \ + base = model->base; \ + slope = model->slope; \ + } while (read_seqretry(&model->lock, seq)); \ + len += sprintf(page, "base : %llu ns\n", base); \ + len += sprintf(page + len, "slope: %llu ns/KiB\n", slope);\ + return len; \ +} \ +static ssize_t adios_lat_target_##name##_store( \ + struct elevator_queue *e, const char *page, size_t count) { \ + struct adios_data *ad = e->elevator_data; \ + unsigned long nsec; \ + int ret; \ + ret = kstrtoul(page, 10, &nsec); \ + if (ret) \ + return ret; \ + ad->latency_model[optype].base = 0ULL; \ + ad->latency_target[optype] = nsec; \ + return count; \ +} \ +static ssize_t adios_lat_target_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + return sprintf(page, "%llu\n", ad->latency_target[optype]); \ +} \ +static ssize_t adios_batch_limit_##name##_store( \ + struct elevator_queue *e, const char *page, size_t count) { \ + unsigned long max_batch; \ + int ret; \ + ret = kstrtoul(page, 10, &max_batch); \ + if (ret || max_batch == 0) \ + return -EINVAL; \ + struct adios_data *ad = e->elevator_data; \ + ad->batch_limit[optype] = max_batch; \ + return count; \ +} \ +static ssize_t adios_batch_limit_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + return sprintf(page, "%u\n", ad->batch_limit[optype]); \ +} + +SYSFS_OPTYPE_DECL(read, ADIOS_READ); +SYSFS_OPTYPE_DECL(write, ADIOS_WRITE); +SYSFS_OPTYPE_DECL(discard, ADIOS_DISCARD); + +// Show the maximum batch size actually achieved for each operation type +static ssize_t adios_batch_actual_max_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + u32 total_count, read_count, write_count, discard_count; + + total_count = ad->batch_actual_max_total; + read_count = ad->batch_actual_max_size[ADIOS_READ]; + write_count = ad->batch_actual_max_size[ADIOS_WRITE]; + discard_count = ad->batch_actual_max_size[ADIOS_DISCARD]; + + return sprintf(page, + "Total : %u\nDiscard: %u\nRead : %u\nWrite : %u\n", + total_count, discard_count, read_count, write_count); +} + +// Set the global latency window +static ssize_t adios_global_latency_window_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + unsigned long nsec; + int ret; + + ret = kstrtoul(page, 10, &nsec); + if (ret) + return ret; + + ad->global_latency_window = nsec; + + return count; +} + +// Show the global latency window +static ssize_t adios_global_latency_window_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + return sprintf(page, "%llu\n", ad->global_latency_window); +} + +// Show the bq_refill_below_ratio +static ssize_t adios_bq_refill_below_ratio_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + return sprintf(page, "%d\n", ad->bq_refill_below_ratio); +} + +// Set the bq_refill_below_ratio +static ssize_t adios_bq_refill_below_ratio_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + int ratio; + int ret; + + ret = kstrtoint(page, 10, &ratio); + if (ret || ratio < 0 || ratio > 100) + return -EINVAL; + + ad->bq_refill_below_ratio = ratio; + + return count; +} + +// Show the read priority +static ssize_t adios_read_priority_show( + struct elevator_queue *e, char *page) { + struct adios_data *ad = e->elevator_data; + return sprintf(page, "%d\n", ad->dl_prio[0]); +} + +// Set the read priority +static ssize_t adios_read_priority_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + int prio; + int ret; + + ret = kstrtoint(page, 10, &prio); + if (ret || prio < -20 || prio > 19) + return -EINVAL; + + guard(spinlock_irqsave)(&ad->lock); + ad->dl_prio[0] = prio; + ad->dl_bias = 0; + + return count; +} + +// Reset batch queue statistics +static ssize_t adios_reset_bq_stats_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + unsigned long val; + int ret; + + ret = kstrtoul(page, 10, &val); + if (ret || val != 1) + return -EINVAL; + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) + ad->batch_actual_max_size[i] = 0; + + ad->batch_actual_max_total = 0; + + return count; +} + +// Reset the latency model parameters +static ssize_t adios_reset_lat_model_store( + struct elevator_queue *e, const char *page, size_t count) { + struct adios_data *ad = e->elevator_data; + unsigned long val; + int ret; + + ret = kstrtoul(page, 10, &val); + if (ret || val != 1) + return -EINVAL; + + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { + struct latency_model *model = &ad->latency_model[i]; + write_seqlock_bh(&model->lock); + model->base = 0ULL; + model->slope = 0ULL; + model->small_sum_delay = 0ULL; + model->small_count = 0ULL; + model->large_sum_delay = 0ULL; + model->large_sum_bsize = 0ULL; + write_sequnlock_bh(&model->lock); + } + + return count; +} + +// Show the ADIOS version +static ssize_t adios_version_show(struct elevator_queue *e, char *page) { + return sprintf(page, "%s\n", ADIOS_VERSION); +} + +// Define sysfs attributes for dynamic thresholds +#define SHRINK_THRESHOLD_ATTR_RW(name, model_field, min_value, max_value) \ +static ssize_t adios_shrink_##name##_store( \ + struct elevator_queue *e, const char *page, size_t count) { \ + struct adios_data *ad = e->elevator_data; \ + unsigned long val; \ + int ret; \ + ret = kstrtoul(page, 10, &val); \ + if (ret || val < min_value || val > max_value) \ + return -EINVAL; \ + for (u8 i = 0; i < ADIOS_OPTYPES; i++) { \ + struct latency_model *model = &ad->latency_model[i]; \ + write_seqlock_bh(&model->lock); \ + model->model_field = val; \ + write_sequnlock_bh(&model->lock); \ + } \ + return count; \ +} \ +static ssize_t adios_shrink_##name##_show( \ + struct elevator_queue *e, char *page) { \ + struct adios_data *ad = e->elevator_data; \ + u32 val = 0; \ + unsigned int seq; \ + struct latency_model *model = &ad->latency_model[0]; \ + do { \ + seq = read_seqbegin(&model->lock); \ + val = model->model_field; \ + } while (read_seqretry(&model->lock, seq)); \ + return sprintf(page, "%u\n", val); \ +} + +SHRINK_THRESHOLD_ATTR_RW(at_kreqs, lm_shrink_at_kreqs, 1, 100000) +SHRINK_THRESHOLD_ATTR_RW(at_gbytes, lm_shrink_at_gbytes, 1, 1000) +SHRINK_THRESHOLD_ATTR_RW(resist, lm_shrink_resist, 1, 3) + +// Define sysfs attributes +#define AD_ATTR(name, show_func, store_func) \ + __ATTR(name, 0644, show_func, store_func) +#define AD_ATTR_RW(name) \ + __ATTR(name, 0644, adios_##name##_show, adios_##name##_store) +#define AD_ATTR_RO(name) \ + __ATTR(name, 0444, adios_##name##_show, NULL) +#define AD_ATTR_WO(name) \ + __ATTR(name, 0200, NULL, adios_##name##_store) + +// Define sysfs attributes for ADIOS scheduler +static struct elv_fs_entry adios_sched_attrs[] = { + AD_ATTR_RO(batch_actual_max), + AD_ATTR_RW(bq_refill_below_ratio), + AD_ATTR_RW(global_latency_window), + + AD_ATTR_RW(batch_limit_read), + AD_ATTR_RW(batch_limit_write), + AD_ATTR_RW(batch_limit_discard), + + AD_ATTR_RO(lat_model_read), + AD_ATTR_RO(lat_model_write), + AD_ATTR_RO(lat_model_discard), + + AD_ATTR_RW(lat_target_read), + AD_ATTR_RW(lat_target_write), + AD_ATTR_RW(lat_target_discard), + + AD_ATTR_RW(shrink_at_kreqs), + AD_ATTR_RW(shrink_at_gbytes), + AD_ATTR_RW(shrink_resist), + + AD_ATTR_RW(read_priority), + + AD_ATTR_WO(reset_bq_stats), + AD_ATTR_WO(reset_lat_model), + AD_ATTR(adios_version, adios_version_show, NULL), + + __ATTR_NULL +}; + +// Define the ADIOS scheduler type +static struct elevator_type mq_adios = { + .ops = { + .next_request = elv_rb_latter_request, + .former_request = elv_rb_former_request, + .limit_depth = adios_limit_depth, + .depth_updated = adios_depth_updated, + .request_merged = adios_request_merged, + .requests_merged = adios_merged_requests, + .bio_merge = adios_bio_merge, + .insert_requests = adios_insert_requests, + .prepare_request = adios_prepare_request, + .dispatch_request = adios_dispatch_request, + .completed_request = adios_completed_request, + .finish_request = adios_finish_request, + .has_work = adios_has_work, + .init_hctx = adios_init_hctx, + .init_sched = adios_init_sched, + .exit_sched = adios_exit_sched, + }, + .elevator_attrs = adios_sched_attrs, + .elevator_name = "adios", + .elevator_owner = THIS_MODULE, +}; +MODULE_ALIAS("mq-adios-iosched"); + +#define ADIOS_PROGNAME "Adaptive Deadline I/O Scheduler" +#define ADIOS_AUTHOR "Masahito Suzuki" + +// Initialize the ADIOS scheduler module +static int __init adios_init(void) { + printk(KERN_INFO "%s %s by %s\n", + ADIOS_PROGNAME, ADIOS_VERSION, ADIOS_AUTHOR); + return elv_register(&mq_adios); +} + +// Exit the ADIOS scheduler module +static void __exit adios_exit(void) { + elv_unregister(&mq_adios); +} + +module_init(adios_init); +module_exit(adios_exit); + +MODULE_AUTHOR(ADIOS_AUTHOR); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(ADIOS_PROGNAME); \ No newline at end of file diff --git a/block/bdev.c b/block/bdev.c index 4844d1e27b6fbc..889ec6e002d7ed 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev) get_order(bsize)); } -int set_blocksize(struct file *file, int size) +/** + * bdev_validate_blocksize - check that this block size is acceptable + * @bdev: blockdevice to check + * @block_size: block size to check + * + * For block device users that do not use buffer heads or the block device + * page cache, make sure that this block size can be used with the device. + * + * Return: On success zero is returned, negative error code on failure. + */ +int bdev_validate_blocksize(struct block_device *bdev, int block_size) { - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = I_BDEV(inode); - - if (blk_validate_block_size(size)) + if (blk_validate_block_size(block_size)) return -EINVAL; /* Size cannot be smaller than the size supported by the device */ - if (size < bdev_logical_block_size(bdev)) + if (block_size < bdev_logical_block_size(bdev)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(bdev_validate_blocksize); + +int set_blocksize(struct file *file, int size) +{ + struct inode *inode = file->f_mapping->host; + struct block_device *bdev = I_BDEV(inode); + int ret; + + ret = bdev_validate_blocksize(bdev, size); + if (ret) + return ret; + if (!file->private_data) return -EINVAL; /* Don't change the size if it is same as current */ if (inode->i_blkbits != blksize_bits(size)) { + /* + * Flush and truncate the pagecache before we reconfigure the + * mapping geometry because folio sizes are variable now. If a + * reader has already allocated a folio whose size is smaller + * than the new min_order but invokes readahead after the new + * min_order becomes visible, readahead will think there are + * "zero" blocks per folio and crash. Take the inode and + * invalidation locks to avoid racing with + * read/write/fallocate. + */ + inode_lock(inode); + filemap_invalidate_lock(inode->i_mapping); + sync_blockdev(bdev); + kill_bdev(bdev); + inode->i_blkbits = blksize_bits(size); mapping_set_folio_min_order(inode->i_mapping, get_order(size)); kill_bdev(bdev); + filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); } return 0; } @@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part) blkdev_put_whole(whole); } -struct block_device *blkdev_get_no_open(dev_t dev) +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload) { struct block_device *bdev; struct inode *inode; inode = ilookup(blockdev_superblock, dev); - if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { + if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) { blk_request_module(dev); inode = ilookup(blockdev_superblock, dev); if (inode) @@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, if (ret) return ERR_PTR(ret); - bdev = blkdev_get_no_open(dev); + bdev = blkdev_get_no_open(dev, true); if (!bdev) return ERR_PTR(-ENXIO); @@ -1272,21 +1310,17 @@ void sync_bdevs(bool wait) /* * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices. */ -void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask) { - struct inode *backing_inode; struct block_device *bdev; - backing_inode = d_backing_inode(path->dentry); - /* - * Note that backing_inode is the inode of a block device node file, - * not the block device's internal inode. Therefore it is *not* valid - * to use I_BDEV() here; the block device has to be looked up by i_rdev + * Note that d_backing_inode() returns the block device node inode, not + * the block device's internal inode. Therefore it is *not* valid to + * use I_BDEV() here; the block device has to be looked up by i_rdev * instead. */ - bdev = blkdev_get_no_open(backing_inode->i_rdev); + bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false); if (!bdev) return; diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c index e524c609be5066..9c665766479201 100644 --- a/block/bio-integrity-auto.c +++ b/block/bio-integrity-auto.c @@ -9,6 +9,7 @@ * not aware of PI. */ #include +#include #include #include "blk.h" @@ -43,6 +44,29 @@ static void bio_integrity_verify_fn(struct work_struct *work) bio_endio(bio); } +#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) +static bool bip_should_check(struct bio_integrity_payload *bip) +{ + return bip->bip_flags & BIP_CHECK_FLAGS; +} + +static bool bi_offload_capable(struct blk_integrity *bi) +{ + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + return bi->tuple_size == sizeof(struct crc64_pi_tuple); + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + return bi->tuple_size == sizeof(struct t10_pi_tuple); + default: + pr_warn_once("%s: unknown integrity checksum type:%d\n", + __func__, bi->csum_type); + fallthrough; + case BLK_INTEGRITY_CSUM_NONE: + return false; + } +} + /** * __bio_integrity_endio - Integrity I/O completion function * @bio: Protected bio @@ -54,12 +78,12 @@ static void bio_integrity_verify_fn(struct work_struct *work) */ bool __bio_integrity_endio(struct bio *bio) { - struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_data *bid = container_of(bip, struct bio_integrity_data, bip); - if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) { + if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && + bip_should_check(bip)) { INIT_WORK(&bid->work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bid->work); return false; @@ -84,6 +108,7 @@ bool bio_integrity_prep(struct bio *bio) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct bio_integrity_data *bid; + bool set_flags = true; gfp_t gfp = GFP_NOIO; unsigned int len; void *buf; @@ -100,19 +125,24 @@ bool bio_integrity_prep(struct bio *bio) switch (bio_op(bio)) { case REQ_OP_READ: - if (bi->flags & BLK_INTEGRITY_NOVERIFY) - return true; + if (bi->flags & BLK_INTEGRITY_NOVERIFY) { + if (bi_offload_capable(bi)) + return true; + set_flags = false; + } break; case REQ_OP_WRITE: - if (bi->flags & BLK_INTEGRITY_NOGENERATE) - return true; - /* * Zero the memory allocated to not leak uninitialized kernel * memory to disk for non-integrity metadata where nothing else * initializes the memory. */ - if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) + if (bi->flags & BLK_INTEGRITY_NOGENERATE) { + if (bi_offload_capable(bi)) + return true; + set_flags = false; + gfp |= __GFP_ZERO; + } else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE) gfp |= __GFP_ZERO; break; default: @@ -137,19 +167,21 @@ bool bio_integrity_prep(struct bio *bio) bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY; bip_set_seed(&bid->bip, bio->bi_iter.bi_sector); - if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) - bid->bip.bip_flags |= BIP_IP_CHECKSUM; - if (bi->csum_type) - bid->bip.bip_flags |= BIP_CHECK_GUARD; - if (bi->flags & BLK_INTEGRITY_REF_TAG) - bid->bip.bip_flags |= BIP_CHECK_REFTAG; + if (set_flags) { + if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) + bid->bip.bip_flags |= BIP_IP_CHECKSUM; + if (bi->csum_type) + bid->bip.bip_flags |= BIP_CHECK_GUARD; + if (bi->flags & BLK_INTEGRITY_REF_TAG) + bid->bip.bip_flags |= BIP_CHECK_REFTAG; + } if (bio_integrity_add_page(bio, virt_to_page(buf), len, offset_in_page(buf)) < len) goto err_end_io; /* Auto-generate integrity metadata if this is a write */ - if (bio_data_dir(bio) == WRITE) + if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip)) blk_integrity_generate(bio); else bid->saved_bio_iter = bio->bi_iter; diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 608594a154a5b9..43ef6bd06c85eb 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -66,16 +66,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, } EXPORT_SYMBOL(bio_integrity_alloc); -static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs, - bool dirty) +static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs) { int i; - for (i = 0; i < nr_vecs; i++) { - if (dirty && !PageCompound(bv[i].bv_page)) - set_page_dirty_lock(bv[i].bv_page); + for (i = 0; i < nr_vecs; i++) unpin_user_page(bv[i].bv_page); - } } static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip) @@ -91,7 +87,7 @@ static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip) ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter); WARN_ON_ONCE(ret != bytes); - bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true); + bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs); } /** @@ -111,8 +107,7 @@ void bio_integrity_unmap_user(struct bio *bio) return; } - bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, - bio_data_dir(bio) == READ); + bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt); } /** @@ -198,7 +193,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec, } if (write) - bio_integrity_unpin_bvec(bvec, nr_vecs, false); + bio_integrity_unpin_bvec(bvec, nr_vecs); else memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec)); @@ -319,7 +314,7 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) return 0; release_pages: - bio_integrity_unpin_bvec(bvec, nr_bvecs, false); + bio_integrity_unpin_bvec(bvec, nr_bvecs); free_bvec: if (bvec != stack_vec) kfree(bvec); diff --git a/block/bio.c b/block/bio.c index 4e6c85a33d74db..4be592d37fb666 100644 --- a/block/bio.c +++ b/block/bio.c @@ -611,7 +611,7 @@ struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask) { struct bio *bio; - if (nr_vecs > UIO_MAXIOV) + if (nr_vecs > BIO_MAX_INLINE_VECS) return NULL; return kmalloc(struct_size(bio, bi_inline_vecs, nr_vecs), gfp_mask); } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 5905f277057bc5..ce93706555c5b9 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) return -EINVAL; input = skip_spaces(input); - bdev = blkdev_get_no_open(MKDEV(major, minor)); + bdev = blkdev_get_no_open(MKDEV(major, minor), false); if (!bdev) return -ENODEV; if (bdev_is_partition(bdev)) { diff --git a/block/blk-integrity.c b/block/blk-integrity.c index a1678f0a9f81f9..e4e2567061f9db 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -117,13 +117,8 @@ int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, { int ret; struct iov_iter iter; - unsigned int direction; - if (op_is_write(req_op(rq))) - direction = ITER_DEST; - else - direction = ITER_SOURCE; - iov_iter_ubuf(&iter, direction, ubuf, bytes); + iov_iter_ubuf(&iter, rq_data_dir(rq), ubuf, bytes); ret = bio_integrity_map_user(rq->bio, &iter); if (ret) return ret; diff --git a/block/blk-settings.c b/block/blk-settings.c index 6b2dbe645d23aa..4817e7ca03f83c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi, /* * For read-ahead of large files to be effective, we need to read ahead * at least twice the optimal I/O size. + * + * There is no hardware limitation for the read-ahead size and the user + * might have increased the read-ahead size through sysfs, so don't ever + * decrease it. */ - bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES); + bdi->ra_pages = max3(bdi->ra_pages, + lim->io_opt * 2 / PAGE_SIZE, + VM_READAHEAD_PAGES); bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a2882751f0d21c..1f9b45b0b9ee76 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -909,6 +909,8 @@ int blk_register_queue(struct gendisk *disk) out_debugfs_remove: blk_debugfs_remove(disk); mutex_unlock(&q->sysfs_lock); + if (queue_is_mq(q)) + blk_mq_sysfs_unregister(disk); out_put_queue_kobj: kobject_put(&disk->queue_kobj); return ret; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index d6dd2e04787491..7437de947120ed 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -644,6 +644,18 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw, u64 bps_limit = tg_bps_limit(tg, rw); u32 iops_limit = tg_iops_limit(tg, rw); + /* + * If the queue is empty, carryover handling is not needed. In such cases, + * tg->[bytes/io]_disp should be reset to 0 to avoid impacting the dispatch + * of subsequent bios. The same handling applies when the previous BPS/IOPS + * limit was set to max. + */ + if (tg->service_queue.nr_queued[rw] == 0) { + tg->bytes_disp[rw] = 0; + tg->io_disp[rw] = 0; + return; + } + /* * If config is updated while bios are still throttled, calculate and * accumulate how many bytes/ios are waited across changes. And @@ -656,8 +668,8 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw, if (iops_limit != UINT_MAX) *ios = calculate_io_allowed(iops_limit, jiffy_elapsed) - tg->io_disp[rw]; - tg->bytes_disp[rw] -= *bytes; - tg->io_disp[rw] -= *ios; + tg->bytes_disp[rw] = -*bytes; + tg->io_disp[rw] = -*ios; } static void tg_update_carryover(struct throtl_grp *tg) @@ -665,10 +677,8 @@ static void tg_update_carryover(struct throtl_grp *tg) long long bytes[2] = {0}; int ios[2] = {0}; - if (tg->service_queue.nr_queued[READ]) - __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]); - if (tg->service_queue.nr_queued[WRITE]) - __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]); + __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]); + __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]); /* see comments in struct throtl_grp for meaning of these fields. */ throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__, diff --git a/block/blk-throttle.h b/block/blk-throttle.h index 7964cc041e0697..f9f8666891abc3 100644 --- a/block/blk-throttle.h +++ b/block/blk-throttle.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_THROTTLE_H #define BLK_THROTTLE_H diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0c77244a35c92e..45c91016cef38a 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, op = REQ_OP_ZONE_RESET; /* Invalidate the page cache, including dirty pages. */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); ret = blkdev_truncate_zone_range(bdev, mode, &zrange); if (ret) @@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors); fail: - if (cmd == BLKRESETZONE) + if (cmd == BLKRESETZONE) { filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); + } return ret; } @@ -1303,7 +1306,6 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) spin_unlock_irqrestore(&zwplug->lock, flags); bdev = bio->bi_bdev; - submit_bio_noacct_nocheck(bio); /* * blk-mq devices will reuse the extra reference on the request queue @@ -1311,8 +1313,12 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) * path for BIO-based devices will not do that. So drop this extra * reference here. */ - if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) + if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) { + bdev->bd_disk->fops->submit_bio(bio); blk_queue_exit(bdev->bd_disk->queue); + } else { + blk_mq_submit_bio(bio); + } put_zwplug: /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ diff --git a/block/blk.h b/block/blk.h index 006e3be433d287..594eeba7b9495c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done) wait_for_completion_io(done); } +struct block_device *blkdev_get_no_open(dev_t dev, bool autoload); +void blkdev_put_no_open(struct block_device *bdev); + #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); @@ -477,7 +480,8 @@ static inline void blk_zone_update_request_bio(struct request *rq, * the original BIO sector so that blk_zone_write_plug_bio_endio() can * lookup the zone write plug. */ - if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio)) + if (req_op(rq) == REQ_OP_ZONE_APPEND || + bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) bio->bi_iter.bi_sector = rq->__sector; } void blk_zone_write_plug_bio_endio(struct bio *bio); diff --git a/block/elevator.c b/block/elevator.c index b4d08026b02cef..6757ed11bf608f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -556,11 +556,23 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) return NULL; +#ifdef CONFIG_MQ_IOSCHED_DEFAULT_ADIOS + return elevator_find_get("adios"); +#endif // CONFIG_MQ_IOSCHED_DEFAULT_ADIOS + if (q->nr_hw_queues != 1 && !blk_mq_is_shared_tags(q->tag_set->flags)) +#if defined(CONFIG_CACHY) + return elevator_find_get("mq-deadline"); +#else return NULL; +#endif +#if defined(CONFIG_CACHY) && defined(CONFIG_IOSCHED_BFQ) + return elevator_find_get("bfq"); +#else return elevator_find_get("mq-deadline"); +#endif } /* @@ -744,7 +756,6 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, ssize_t elv_iosched_show(struct gendisk *disk, char *name) { struct request_queue *q = disk->queue; - struct elevator_queue *eq = q->elevator; struct elevator_type *cur = NULL, *e; int len = 0; @@ -753,7 +764,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name) len += sprintf(name+len, "[none] "); } else { len += sprintf(name+len, "none "); - cur = eq->type; + cur = q->elevator->type; } spin_lock(&elv_list_lock); diff --git a/block/fops.c b/block/fops.c index be9f1dbea9ce0a..82b672d15ea4f8 100644 --- a/block/fops.c +++ b/block/fops.c @@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (ret) return ret; - bdev = blkdev_get_no_open(inode->i_rdev); + bdev = blkdev_get_no_open(inode->i_rdev, true); if (!bdev) return -ENXIO; @@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = direct_write_fallback(iocb, from, ret, blkdev_buffered_write(iocb, from)); } else { + /* + * Take i_rwsem and invalidate_lock to avoid racing with + * set_blocksize changing i_blkbits/folio order and punching + * out the pagecache. + */ + inode_lock_shared(bd_inode); ret = blkdev_buffered_write(iocb, from); + inode_unlock_shared(bd_inode); } if (ret > 0) @@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { + struct inode *bd_inode = bdev_file_inode(iocb->ki_filp); struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); loff_t size = bdev_nr_bytes(bdev); loff_t pos = iocb->ki_pos; @@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) goto reexpand; } + /* + * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize + * changing i_blkbits/folio order and punching out the pagecache. + */ + inode_lock_shared(bd_inode); ret = filemap_read(iocb, to, ret); + inode_unlock_shared(bd_inode); reexpand: if (unlikely(shorted)) @@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, if ((start | len) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; + inode_lock(inode); filemap_invalidate_lock(inode->i_mapping); /* @@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, fail: filemap_invalidate_unlock(inode->i_mapping); + inode_unlock(inode); return error; } diff --git a/block/ioctl.c b/block/ioctl.c index faa40f383e2736..e472cc1030c60c 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, if (err) return err; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) @@ -174,6 +175,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, blk_finish_plug(&plug); fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, end > bdev_nr_bytes(bdev)) return -EINVAL; + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } @@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, return -EINVAL; /* Invalidate the page cache, including dirty pages */ + inode_lock(bdev->bd_mapping->host); filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) @@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, fail: filemap_invalidate_unlock(bdev->bd_mapping); + inode_unlock(bdev->bd_mapping->host); return err; } diff --git a/block/ioprio.c b/block/ioprio.c index 73301a261429ff..f0ee2798539c01 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -46,12 +46,8 @@ int ioprio_check_cap(int ioprio) */ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) return -EPERM; - fallthrough; - /* rt has prio field too */ - case IOPRIO_CLASS_BE: - if (level >= IOPRIO_NR_LEVELS) - return -EINVAL; break; + case IOPRIO_CLASS_BE: case IOPRIO_CLASS_IDLE: break; case IOPRIO_CLASS_NONE: diff --git a/crypto/ahash.c b/crypto/ahash.c index 9f57b925b116da..2d9eec2b2b1c6e 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -315,16 +315,7 @@ EXPORT_SYMBOL_GPL(crypto_ahash_setkey); static bool ahash_request_hasvirt(struct ahash_request *req) { - struct ahash_request *r2; - - if (ahash_request_isvirt(req)) - return true; - - list_for_each_entry(r2, &req->base.list, base.list) - if (ahash_request_isvirt(r2)) - return true; - - return false; + return ahash_request_isvirt(req); } static int ahash_reqchain_virt(struct ahash_save_req_state *state, @@ -472,7 +463,6 @@ static int ahash_do_req_chain(struct ahash_request *req, bool update = op == crypto_ahash_alg(tfm)->update; struct ahash_save_req_state *state; struct ahash_save_req_state state0; - struct ahash_request *r2; u8 *page = NULL; int err; @@ -509,7 +499,6 @@ static int ahash_do_req_chain(struct ahash_request *req, state->offset = 0; state->nbytes = 0; INIT_LIST_HEAD(&state->head); - list_splice_init(&req->base.list, &state->head); if (page) sg_init_one(&state->sg, page, PAGE_SIZE); @@ -540,9 +529,6 @@ static int ahash_do_req_chain(struct ahash_request *req, out_set_chain: req->base.err = err; - list_for_each_entry(r2, &req->base.list, base.list) - r2->base.err = err; - return err; } @@ -551,19 +537,10 @@ int crypto_ahash_init(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = crypto_shash_init(prepare_shash_desc(req, tfm)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = prepare_shash_desc(r2, tfm); - r2->base.err = crypto_shash_init(desc); - } - return err; } @@ -620,19 +597,10 @@ int crypto_ahash_update(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_update(req, ahash_request_ctx(req)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = shash_ahash_update(r2, desc); - } - return err; } @@ -645,19 +613,10 @@ int crypto_ahash_final(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = crypto_shash_final(ahash_request_ctx(req), req->result); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = crypto_shash_final(desc, r2->result); - } - return err; } @@ -670,19 +629,10 @@ int crypto_ahash_finup(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_finup(req, ahash_request_ctx(req)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = shash_ahash_finup(r2, desc); - } - return err; } @@ -757,19 +707,10 @@ int crypto_ahash_digest(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_digest(req, prepare_shash_desc(req, tfm)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = prepare_shash_desc(r2, tfm); - r2->base.err = shash_ahash_digest(r2, desc); - } - return err; } @@ -1133,20 +1074,5 @@ int ahash_register_instance(struct crypto_template *tmpl, } EXPORT_SYMBOL_GPL(ahash_register_instance); -void ahash_request_free(struct ahash_request *req) -{ - struct ahash_request *tmp; - struct ahash_request *r2; - - if (unlikely(!req)) - return; - - list_for_each_entry_safe(r2, tmp, &req->base.list, base.list) - kfree_sensitive(r2); - - kfree_sensitive(req); -} -EXPORT_SYMBOL_GPL(ahash_request_free); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 5498a87249d3e7..e3f1a4852737b0 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -265,10 +265,6 @@ static int hash_accept(struct socket *sock, struct socket *newsock, goto out_free_state; err = crypto_ahash_import(&ctx2->req, state); - if (err) { - sock_orphan(sk2); - sock_put(sk2); - } out_free_state: kfree_sensitive(state); diff --git a/crypto/api.c b/crypto/api.c index 3416e98128a059..8592d3dccc64e6 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -220,10 +220,19 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg, if (crypto_is_test_larval(larval)) crypto_larval_kill(larval); alg = ERR_PTR(-ETIMEDOUT); - } else if (!alg) { + } else if (!alg || PTR_ERR(alg) == -EEXIST) { + int err = alg ? -EEXIST : -EAGAIN; + + /* + * EEXIST is expected because two probes can be scheduled + * at the same time with one using alg_name and the other + * using driver_name. Do a re-lookup but do not retry in + * case we hit a quirk like gcm_base(ctr(aes),...) which + * will never match. + */ alg = &larval->alg; alg = crypto_alg_lookup(alg->cra_name, type, mask) ?: - ERR_PTR(-EAGAIN); + ERR_PTR(err); } else if (IS_ERR(alg)) ; else if (crypto_is_test_larval(larval) && diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index bf165d321440d5..89dc887d2c5c7e 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -188,6 +188,8 @@ static int software_key_query(const struct kernel_pkey_params *params, ptr = pkey_pack_u32(ptr, pkey->paramlen); memcpy(ptr, pkey->params, pkey->paramlen); + memset(info, 0, sizeof(*info)); + if (issig) { sig = crypto_alloc_sig(alg_name, 0, 0); if (IS_ERR(sig)) { @@ -203,6 +205,7 @@ static int software_key_query(const struct kernel_pkey_params *params, goto error_free_tfm; len = crypto_sig_keysize(sig); + info->key_size = len; info->max_sig_size = crypto_sig_maxsize(sig); info->max_data_size = crypto_sig_digestsize(sig); @@ -211,6 +214,9 @@ static int software_key_query(const struct kernel_pkey_params *params, info->supported_ops |= KEYCTL_SUPPORTS_SIGN; if (strcmp(params->encoding, "pkcs1") == 0) { + info->max_enc_size = len / BITS_PER_BYTE; + info->max_dec_size = len / BITS_PER_BYTE; + info->supported_ops |= KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT; @@ -230,18 +236,17 @@ static int software_key_query(const struct kernel_pkey_params *params, goto error_free_tfm; len = crypto_akcipher_maxsize(tfm); + info->key_size = len * BITS_PER_BYTE; info->max_sig_size = len; info->max_data_size = len; + info->max_enc_size = len; + info->max_dec_size = len; info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT; if (pkey->key_is_private) info->supported_ops |= KEYCTL_SUPPORTS_DECRYPT; } - info->key_size = len * 8; - info->max_enc_size = len; - info->max_dec_size = len; - ret = 0; error_free_tfm: diff --git a/crypto/ecdsa-p1363.c b/crypto/ecdsa-p1363.c index 4454f1f8f33f58..e0c55c64711c83 100644 --- a/crypto/ecdsa-p1363.c +++ b/crypto/ecdsa-p1363.c @@ -21,7 +21,8 @@ static int ecdsa_p1363_verify(struct crypto_sig *tfm, const void *digest, unsigned int dlen) { struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); - unsigned int keylen = crypto_sig_keysize(ctx->child); + unsigned int keylen = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), + BITS_PER_BYTE); unsigned int ndigits = DIV_ROUND_UP_POW2(keylen, sizeof(u64)); struct ecdsa_raw_sig sig; @@ -45,7 +46,8 @@ static unsigned int ecdsa_p1363_max_size(struct crypto_sig *tfm) { struct ecdsa_p1363_ctx *ctx = crypto_sig_ctx(tfm); - return 2 * crypto_sig_keysize(ctx->child); + return 2 * DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), + BITS_PER_BYTE); } static unsigned int ecdsa_p1363_digest_size(struct crypto_sig *tfm) diff --git a/crypto/ecdsa-x962.c b/crypto/ecdsa-x962.c index 90a04f4b9a2f55..ee71594d10a069 100644 --- a/crypto/ecdsa-x962.c +++ b/crypto/ecdsa-x962.c @@ -82,7 +82,7 @@ static int ecdsa_x962_verify(struct crypto_sig *tfm, int err; sig_ctx.ndigits = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), - sizeof(u64)); + sizeof(u64) * BITS_PER_BYTE); err = asn1_ber_decoder(&ecdsasignature_decoder, &sig_ctx, src, slen); if (err < 0) @@ -103,7 +103,8 @@ static unsigned int ecdsa_x962_max_size(struct crypto_sig *tfm) { struct ecdsa_x962_ctx *ctx = crypto_sig_ctx(tfm); struct sig_alg *alg = crypto_sig_alg(ctx->child); - int slen = crypto_sig_keysize(ctx->child); + int slen = DIV_ROUND_UP_POW2(crypto_sig_keysize(ctx->child), + BITS_PER_BYTE); /* * Verify takes ECDSA-Sig-Value (described in RFC 5480) as input, diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c index 117526d15ddebf..a70b60a90a3c76 100644 --- a/crypto/ecdsa.c +++ b/crypto/ecdsa.c @@ -167,7 +167,7 @@ static unsigned int ecdsa_key_size(struct crypto_sig *tfm) { struct ecc_ctx *ctx = crypto_sig_ctx(tfm); - return DIV_ROUND_UP(ctx->curve->nbits, 8); + return ctx->curve->nbits; } static unsigned int ecdsa_digest_size(struct crypto_sig *tfm) diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index b3dd8a3ddeb796..2c0602f0cd406f 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -249,7 +249,7 @@ static unsigned int ecrdsa_key_size(struct crypto_sig *tfm) * Verify doesn't need any output, so it's just informational * for keyctl to determine the key bit size. */ - return ctx->pub_key.ndigits * sizeof(u64); + return ctx->pub_key.ndigits * sizeof(u64) * BITS_PER_BYTE; } static unsigned int ecrdsa_max_size(struct crypto_sig *tfm) diff --git a/crypto/krb5/rfc3961_simplified.c b/crypto/krb5/rfc3961_simplified.c index 79180d28baa9fb..e49cbdec7c404d 100644 --- a/crypto/krb5/rfc3961_simplified.c +++ b/crypto/krb5/rfc3961_simplified.c @@ -89,6 +89,7 @@ int crypto_shash_update_sg(struct shash_desc *desc, struct scatterlist *sg, sg_miter_start(&miter, sg, sg_nents(sg), SG_MITER_FROM_SG | SG_MITER_LOCAL); + sg_miter_skip(&miter, offset); for (i = 0; i < len; i += n) { sg_miter_next(&miter); n = min(miter.length, len - i); diff --git a/crypto/lrw.c b/crypto/lrw.c index 391ae0f7641ff9..15f579a768614d 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -322,7 +322,7 @@ static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb) err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); - if (err == -ENOENT) { + if (err == -ENOENT && memcmp(cipher_name, "ecb(", 4)) { err = -ENAMETOOLONG; if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) @@ -356,7 +356,7 @@ static int lrw_create(struct crypto_template *tmpl, struct rtattr **tb) /* Alas we screwed up the naming so we have to mangle the * cipher name. */ - if (!strncmp(cipher_name, "ecb(", 4)) { + if (!memcmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(ecb_name, cipher_name + 4, sizeof(ecb_name)); diff --git a/crypto/rsassa-pkcs1.c b/crypto/rsassa-pkcs1.c index d01ac75635e008..94fa5e9600e79d 100644 --- a/crypto/rsassa-pkcs1.c +++ b/crypto/rsassa-pkcs1.c @@ -301,7 +301,7 @@ static unsigned int rsassa_pkcs1_key_size(struct crypto_sig *tfm) { struct rsassa_pkcs1_ctx *ctx = crypto_sig_ctx(tfm); - return ctx->key_size; + return ctx->key_size * BITS_PER_BYTE; } static int rsassa_pkcs1_set_pub_key(struct crypto_sig *tfm, diff --git a/crypto/scompress.c b/crypto/scompress.c index d435d4b24469d4..ffeedcf20b0f18 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -111,10 +111,14 @@ static void scomp_free_streams(struct scomp_alg *alg) struct crypto_acomp_stream __percpu *stream = alg->stream; int i; + alg->stream = NULL; + if (!stream) + return; + for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); - if (!ps->ctx) + if (IS_ERR_OR_NULL(ps->ctx)) break; alg->free_ctx(ps->ctx); @@ -132,6 +136,8 @@ static int scomp_alloc_streams(struct scomp_alg *alg) if (!stream) return -ENOMEM; + alg->stream = stream; + for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); @@ -143,8 +149,6 @@ static int scomp_alloc_streams(struct scomp_alg *alg) spin_lock_init(&ps->lock); } - - alg->stream = stream; return 0; } @@ -159,11 +163,10 @@ static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) if (ret) goto unlock; } - if (!scomp_scratch_users) { + if (!scomp_scratch_users++) { ret = crypto_scomp_alloc_scratches(); if (ret) - goto unlock; - scomp_scratch_users++; + scomp_scratch_users--; } unlock: mutex_unlock(&scomp_lock); @@ -211,8 +214,8 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) spage = nth_page(spage, soff / PAGE_SIZE); soff = offset_in_page(soff); - n = slen / PAGE_SIZE; - n += (offset_in_page(slen) + soff - 1) / PAGE_SIZE; + n = (slen - 1) / PAGE_SIZE; + n += (offset_in_page(slen - 1) + soff) / PAGE_SIZE; if (PageHighMem(nth_page(spage, n)) && size_add(soff, slen) > PAGE_SIZE) break; @@ -239,9 +242,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) dpage = nth_page(dpage, doff / PAGE_SIZE); doff = offset_in_page(doff); - n = dlen / PAGE_SIZE; - n += (offset_in_page(dlen) + doff - 1) / PAGE_SIZE; - if (PageHighMem(dpage + n) && + n = (dlen - 1) / PAGE_SIZE; + n += (offset_in_page(dlen - 1) + doff) / PAGE_SIZE; + if (PageHighMem(nth_page(dpage, n)) && size_add(doff, dlen) > PAGE_SIZE) break; dst = kmap_local_page(dpage) + doff; diff --git a/crypto/sig.c b/crypto/sig.c index dfc7cae9080282..53a3dd6fbe3fe6 100644 --- a/crypto/sig.c +++ b/crypto/sig.c @@ -102,6 +102,11 @@ static int sig_default_set_key(struct crypto_sig *tfm, return -ENOSYS; } +static unsigned int sig_default_size(struct crypto_sig *tfm) +{ + return DIV_ROUND_UP_POW2(crypto_sig_keysize(tfm), BITS_PER_BYTE); +} + static int sig_prepare_alg(struct sig_alg *alg) { struct crypto_alg *base = &alg->base; @@ -117,9 +122,9 @@ static int sig_prepare_alg(struct sig_alg *alg) if (!alg->key_size) return -EINVAL; if (!alg->max_size) - alg->max_size = alg->key_size; + alg->max_size = sig_default_size; if (!alg->digest_size) - alg->digest_size = alg->key_size; + alg->digest_size = sig_default_size; base->cra_type = &crypto_sig_type; base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index abd609d4c8efee..82977ea25db390 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -58,9 +58,6 @@ module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); #endif -/* Multibuffer is unlimited. Set arbitrary limit for testing. */ -#define MAX_MB_MSGS 16 - #ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS /* a perfect nop */ @@ -3329,48 +3326,27 @@ static int test_acomp(struct crypto_acomp *tfm, int ctcount, int dtcount) { const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)); - struct scatterlist *src = NULL, *dst = NULL; - struct acomp_req *reqs[MAX_MB_MSGS] = {}; - char *decomp_out[MAX_MB_MSGS] = {}; - char *output[MAX_MB_MSGS] = {}; - struct crypto_wait wait; - struct acomp_req *req; - int ret = -ENOMEM; unsigned int i; + char *output, *decomp_out; + int ret; + struct scatterlist src, dst; + struct acomp_req *req; + struct crypto_wait wait; - src = kmalloc_array(MAX_MB_MSGS, sizeof(*src), GFP_KERNEL); - if (!src) - goto out; - dst = kmalloc_array(MAX_MB_MSGS, sizeof(*dst), GFP_KERNEL); - if (!dst) - goto out; - - for (i = 0; i < MAX_MB_MSGS; i++) { - reqs[i] = acomp_request_alloc(tfm); - if (!reqs[i]) - goto out; - - acomp_request_set_callback(reqs[i], - CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - crypto_req_done, &wait); - if (i) - acomp_request_chain(reqs[i], reqs[0]); - - output[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); - if (!output[i]) - goto out; + output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); + if (!output) + return -ENOMEM; - decomp_out[i] = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); - if (!decomp_out[i]) - goto out; + decomp_out = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); + if (!decomp_out) { + kfree(output); + return -ENOMEM; } for (i = 0; i < ctcount; i++) { unsigned int dlen = COMP_BUF_SIZE; int ilen = ctemplate[i].inlen; void *input_vec; - int j; input_vec = kmemdup(ctemplate[i].input, ilen, GFP_KERNEL); if (!input_vec) { @@ -3378,61 +3354,70 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } + memset(output, 0, dlen); crypto_init_wait(&wait); - sg_init_one(src, input_vec, ilen); + sg_init_one(&src, input_vec, ilen); + sg_init_one(&dst, output, dlen); - for (j = 0; j < MAX_MB_MSGS; j++) { - sg_init_one(dst + j, output[j], dlen); - acomp_request_set_params(reqs[j], src, dst + j, ilen, dlen); + req = acomp_request_alloc(tfm); + if (!req) { + pr_err("alg: acomp: request alloc failed for %s\n", + algo); + kfree(input_vec); + ret = -ENOMEM; + goto out; } - req = reqs[0]; + acomp_request_set_params(req, &src, &dst, ilen, dlen); + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &wait); + ret = crypto_wait_req(crypto_acomp_compress(req), &wait); if (ret) { pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", i + 1, algo, -ret); kfree(input_vec); + acomp_request_free(req); goto out; } ilen = req->dlen; dlen = COMP_BUF_SIZE; + sg_init_one(&src, output, ilen); + sg_init_one(&dst, decomp_out, dlen); crypto_init_wait(&wait); - for (j = 0; j < MAX_MB_MSGS; j++) { - sg_init_one(src + j, output[j], ilen); - sg_init_one(dst + j, decomp_out[j], dlen); - acomp_request_set_params(reqs[j], src + j, dst + j, ilen, dlen); - } - - crypto_wait_req(crypto_acomp_decompress(req), &wait); - for (j = 0; j < MAX_MB_MSGS; j++) { - ret = reqs[j]->base.err; - if (ret) { - pr_err("alg: acomp: compression failed on test %d (%d) for %s: ret=%d\n", - i + 1, j, algo, -ret); - kfree(input_vec); - goto out; - } + acomp_request_set_params(req, &src, &dst, ilen, dlen); - if (reqs[j]->dlen != ctemplate[i].inlen) { - pr_err("alg: acomp: Compression test %d (%d) failed for %s: output len = %d\n", - i + 1, j, algo, reqs[j]->dlen); - ret = -EINVAL; - kfree(input_vec); - goto out; - } + ret = crypto_wait_req(crypto_acomp_decompress(req), &wait); + if (ret) { + pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", + i + 1, algo, -ret); + kfree(input_vec); + acomp_request_free(req); + goto out; + } - if (memcmp(input_vec, decomp_out[j], reqs[j]->dlen)) { - pr_err("alg: acomp: Compression test %d (%d) failed for %s\n", - i + 1, j, algo); - hexdump(output[j], reqs[j]->dlen); - ret = -EINVAL; - kfree(input_vec); - goto out; - } + if (req->dlen != ctemplate[i].inlen) { + pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n", + i + 1, algo, req->dlen); + ret = -EINVAL; + kfree(input_vec); + acomp_request_free(req); + goto out; + } + + if (memcmp(input_vec, decomp_out, req->dlen)) { + pr_err("alg: acomp: Compression test %d failed for %s\n", + i + 1, algo); + hexdump(output, req->dlen); + ret = -EINVAL; + kfree(input_vec); + acomp_request_free(req); + goto out; } kfree(input_vec); + acomp_request_free(req); } for (i = 0; i < dtcount; i++) { @@ -3446,9 +3431,10 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } + memset(output, 0, dlen); crypto_init_wait(&wait); - sg_init_one(src, input_vec, ilen); - sg_init_one(dst, output[0], dlen); + sg_init_one(&src, input_vec, ilen); + sg_init_one(&dst, output, dlen); req = acomp_request_alloc(tfm); if (!req) { @@ -3459,7 +3445,7 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } - acomp_request_set_params(req, src, dst, ilen, dlen); + acomp_request_set_params(req, &src, &dst, ilen, dlen); acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); @@ -3481,10 +3467,10 @@ static int test_acomp(struct crypto_acomp *tfm, goto out; } - if (memcmp(output[0], dtemplate[i].output, req->dlen)) { + if (memcmp(output, dtemplate[i].output, req->dlen)) { pr_err("alg: acomp: Decompression test %d failed for %s\n", i + 1, algo); - hexdump(output[0], req->dlen); + hexdump(output, req->dlen); ret = -EINVAL; kfree(input_vec); acomp_request_free(req); @@ -3498,13 +3484,8 @@ static int test_acomp(struct crypto_acomp *tfm, ret = 0; out: - acomp_request_free(reqs[0]); - for (i = 0; i < MAX_MB_MSGS; i++) { - kfree(output[i]); - kfree(decomp_out[i]); - } - kfree(dst); - kfree(src); + kfree(decomp_out); + kfree(output); return ret; } diff --git a/crypto/xts.c b/crypto/xts.c index 31529c9ef08f8f..46b7c70ea54bbf 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -363,7 +363,7 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), cipher_name, 0, mask); - if (err == -ENOENT) { + if (err == -ENOENT && memcmp(cipher_name, "ecb(", 4)) { err = -ENAMETOOLONG; if (snprintf(name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) @@ -397,7 +397,7 @@ static int xts_create(struct crypto_template *tmpl, struct rtattr **tb) /* Alas we screwed up the naming so we have to mangle the * cipher name. */ - if (!strncmp(cipher_name, "ecb(", 4)) { + if (!memcmp(cipher_name, "ecb(", 4)) { int len; len = strscpy(name, cipher_name + 4, sizeof(name)); diff --git a/drivers/Makefile b/drivers/Makefile index b5749cf67044ce..5beba9f57254cd 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -64,14 +64,8 @@ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers obj-y += iommu/ -# gpu/ comes after char for AGP vs DRM startup and after iommu -obj-y += gpu/ - obj-$(CONFIG_CONNECTOR) += connector/ -# i810fb depends on char/agp/ -obj-$(CONFIG_FB_I810) += video/fbdev/i810/ - obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ @@ -83,6 +77,13 @@ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ + +# gpu/ comes after char for AGP vs DRM startup and after iommu +obj-y += gpu/ + +# i810fb depends on char/agp/ +obj-$(CONFIG_FB_I810) += video/fbdev/i810/ + obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ obj-$(CONFIG_SPI) += spi/ diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index bf4219e32cc19d..82412eec9a4b8b 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -525,7 +525,7 @@ aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset, if (!payload) return -EINVAL; - if (!slot_cf_has_space(offset, payload_len)) + if (!slot_has_space(*buf, offset, payload_len)) return -ENOSPC; buf->cu_idx = cu_idx; @@ -558,7 +558,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE) return -EINVAL; - if (!slot_dpu_has_space(offset, arg_sz)) + if (!slot_has_space(*buf, offset, arg_sz)) return -ENOSPC; buf->inst_buf_addr = sn->buffer; @@ -569,7 +569,7 @@ aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset, memcpy(buf->args, sn->prop_args, arg_sz); /* Accurate buf size to hint firmware to do necessary copy */ - *size += sizeof(*buf) + arg_sz; + *size = sizeof(*buf) + arg_sz; return 0; } diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index 4e02e744b470eb..6df9065b13f685 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -319,18 +319,16 @@ struct async_event_msg_resp { } __packed; #define MAX_CHAIN_CMDBUF_SIZE SZ_4K -#define slot_cf_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_execbuf_cf, args[0])) +#define slot_has_space(slot, offset, payload_size) \ + (MAX_CHAIN_CMDBUF_SIZE >= (offset) + (payload_size) + \ + sizeof(typeof(slot))) + struct cmd_chain_slot_execbuf_cf { __u32 cu_idx; __u32 arg_cnt; __u32 args[] __counted_by(arg_cnt); }; -#define slot_dpu_has_space(offset, payload_size) \ - (MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \ - offsetof(struct cmd_chain_slot_dpu, args[0])) struct cmd_chain_slot_dpu { __u64 inst_buf_addr; __u32 inst_size; diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index dc3a072ce3b6df..f28a060a88109b 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config * psp->ddev = ddev; memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); - psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN; - psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL); + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN); + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL); if (!psp->fw_buffer) { drm_err(ddev, "no memory for fw buffer"); return NULL; diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 0825851656a274..cd24ccd20ba6cc 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -332,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; ivpu_pm_trigger_recovery(vdev, "debugfs"); @@ -383,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent) return -EINVAL; ret = ivpu_rpm_get(vdev); - if (ret) + if (ret < 0) return ret; if (active_percent) @@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t if (ret < 0) return ret; - buf[size] = '\0'; + buf[ret] = '\0'; ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period, &process_quantum); if (ret != 4) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 4fa73189502e1a..eff1d3ca075f5a 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include @@ -164,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio); + args->value = ivpu_hw_dpu_max_freq_get(vdev); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); @@ -421,9 +421,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev) { ivpu_hw_irq_disable(vdev); disable_irq(vdev->irq); - cancel_work_sync(&vdev->irq_ipc_work); - cancel_work_sync(&vdev->irq_dct_work); - cancel_work_sync(&vdev->context_abort_work); + flush_work(&vdev->irq_ipc_work); + flush_work(&vdev->irq_dct_work); + flush_work(&vdev->context_abort_work); ivpu_ipc_disable(vdev); ivpu_mmu_disable(vdev); } diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 7a1bb92d8c8161..ccaaf6c100c022 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #include @@ -233,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->dvfs_mode = 0; fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr); - fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; - fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS"); + if (fw_hdr->preemption_buffer_1_max_size) + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size; + else + fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size; + + if (fw_hdr->preemption_buffer_2_max_size) + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size; + else + fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size; + ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n", + fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size); + if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address, fw_hdr->ro_section_size, fw_hdr->image_load_address, @@ -534,7 +544,7 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->d0i3_entry_vpu_ts); ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n", boot_params->system_time_us); - ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = %u\n", + ivpu_dbg(vdev, FW_BOOT, "boot_params.power_profile = 0x%x\n", boot_params->power_profile); } @@ -566,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->magic = VPU_BOOT_PARAMS_MAGIC; boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number; - boot_params->frequency = ivpu_hw_pll_freq_get(vdev); /* * This param is a debug firmware feature. It switches default clock @@ -637,7 +646,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->d0i3_residency_time_us = 0; boot_params->d0i3_entry_vpu_ts = 0; if (IVPU_WA(disable_d0i2)) - boot_params->power_profile = 1; + boot_params->power_profile |= BIT(1); boot_params->system_time_us = ktime_to_us(ktime_get_real()); wmb(); /* Flush WC buffers after writing bootparams */ diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index ec9a3629da3a92..633160470c939f 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -119,7 +119,7 @@ static void timeouts_init(struct ivpu_device *vdev) else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; - vdev->timeout.state_dump_msg = 10; + vdev->timeout.state_dump_msg = 100; } } diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index 16435f2756d024..d79668fe1609f6 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_H__ @@ -82,19 +82,19 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) return range->end - range->start; } -static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) +static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev) { - return ivpu_hw_btrs_ratio_to_freq(vdev, ratio); + return ivpu_hw_btrs_dpu_max_freq_get(vdev); } -static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev) { - ivpu_hw_ip_irq_clear(vdev); + return ivpu_hw_btrs_dpu_freq_get(vdev); } -static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev) +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) { - return ivpu_hw_btrs_pll_freq_get(vdev); + ivpu_hw_ip_irq_clear(vdev); } static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.c b/drivers/accel/ivpu/ivpu_hw_btrs.c index 56c56012b980fd..b236c7234daabb 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.c +++ b/drivers/accel/ivpu/ivpu_hw_btrs.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ +#include + #include "ivpu_drv.h" #include "ivpu_hw.h" #include "ivpu_hw_btrs.h" @@ -28,17 +30,13 @@ #define BTRS_LNL_ALL_IRQ_MASK ((u32)-1) -#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3) -#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3) -#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0) #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_CONFIG_DEFAULT 0x0 -#define PLL_SIMULATION_FREQ 10000000 -#define PLL_REF_CLK_FREQ 50000000 +#define PLL_REF_CLK_FREQ 50000000ull +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) + #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) #define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) #define TIMEOUT_US (150 * USEC_PER_MSEC) @@ -62,6 +60,8 @@ #define DCT_ENABLE 0x1 #define DCT_DISABLE 0x0 +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio); + int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev) { REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK); @@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev) hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH; hw->sku = BTRS_MTL_TILE_SKU_BOTH; - hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO; + hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3); return 0; } @@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable) prepare_wp_request(vdev, &wp, enable); - ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", - PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn); + ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n", + pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn); ret = wp_request_send(vdev, &wp); if (ret) { @@ -573,6 +573,47 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev) return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); } +static u32 pll_config_get_mtl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); +} + +static u32 pll_config_get_lnl(struct ivpu_device *vdev) +{ + return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); +} + +static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio) +{ + return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3; +} + +static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio) +{ + return PLL_RATIO_TO_FREQ(ratio) / 2; +} + +static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(ratio); + else + return pll_ratio_to_dpu_freq_lnl(ratio); +} + +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev) +{ + return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio); +} + +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev) +{ + if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) + return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev)); + else + return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev)); +} + /* Handler for IRQs from Buttress core (irqB) */ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) { @@ -582,9 +623,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq) if (!status) return false; - if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", - REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL)); + if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_mtl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0)); @@ -633,8 +677,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq) queue_work(system_wq, &vdev->irq_dct_work); } - if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) - ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ)); + if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) { + u32 pll = pll_config_get_lnl(vdev); + + ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz", + pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ); + } if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) { ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n", @@ -717,60 +765,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val); } -static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config) -{ - u32 pll_clock = PLL_REF_CLK_FREQ * ratio; - u32 cpu_clock; - - if ((config & 0xff) == MTL_PLL_RATIO_4_3) - cpu_clock = pll_clock * 2 / 4; - else - cpu_clock = pll_clock * 2 / 5; - - return cpu_clock; -} - -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio) -{ - struct ivpu_hw_info *hw = vdev->hw; - - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_ratio_to_freq_mtl(ratio, hw->config); - else - return PLL_RATIO_TO_FREQ(ratio); -} - -static u32 pll_freq_get_mtl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL); - pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK; - - if (!ivpu_is_silicon(vdev)) - return PLL_SIMULATION_FREQ; - - return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config); -} - -static u32 pll_freq_get_lnl(struct ivpu_device *vdev) -{ - u32 pll_curr_ratio; - - pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ); - pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK; - - return PLL_RATIO_TO_FREQ(pll_curr_ratio); -} - -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev) -{ - if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) - return pll_freq_get_mtl(vdev); - else - return pll_freq_get_lnl(vdev); -} - u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev) { if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL) diff --git a/drivers/accel/ivpu/ivpu_hw_btrs.h b/drivers/accel/ivpu/ivpu_hw_btrs.h index 1fd71b4d4ab01a..d2d82651976d15 100644 --- a/drivers/accel/ivpu/ivpu_hw_btrs.h +++ b/drivers/accel/ivpu/ivpu_hw_btrs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __IVPU_HW_BTRS_H__ @@ -13,9 +13,8 @@ #define PLL_PROFILING_FREQ_DEFAULT 38400000 #define PLL_PROFILING_FREQ_HIGH 400000000 -#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) -#define DCT_DEFAULT_ACTIVE_PERCENT 15u +#define DCT_DEFAULT_ACTIVE_PERCENT 30u #define DCT_PERIOD_US 35300u int ivpu_hw_btrs_info_init(struct ivpu_device *vdev); @@ -32,12 +31,12 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev); void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev); void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev); +u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev); bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq); bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq); int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable); void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent); -u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev); -u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio); u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev); u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 0e096fd9b95dd6..39f83225c1815a 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -302,7 +302,8 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req struct ivpu_ipc_consumer cons; int ret; - drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev) && + pm_runtime_enabled(vdev->drm.dev)); ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 004059e4f1e89d..1c8e283ad98542 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -247,6 +247,10 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm if (!cmdq->db_id) return 0; + ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); + if (!ret) + ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); + if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) { ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id); if (!ret) @@ -254,10 +258,6 @@ static int ivpu_cmdq_unregister(struct ivpu_file_priv *file_priv, struct ivpu_cm cmdq->id, file_priv->ctx.id); } - ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id); - if (!ret) - ivpu_dbg(vdev, JOB, "DB %d unregistered\n", cmdq->db_id); - xa_erase(&file_priv->vdev->db_xa, cmdq->db_id); cmdq->db_id = 0; @@ -470,8 +470,8 @@ static void ivpu_job_destroy(struct ivpu_job *job) struct ivpu_device *vdev = job->vdev; u32 i; - ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d", - job->job_id, job->file_priv->ctx.id, job->engine_idx); + ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx); for (i = 0; i < job->bo_count; i++) if (job->bos[i]) @@ -564,8 +564,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 dma_fence_signal(job->done_fence); trace_job("done", job); - ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", - job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status); ivpu_job_destroy(job); ivpu_stop_job_timeout_detection(vdev); @@ -664,8 +664,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) } trace_job("submit", job); - ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n", - job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority, + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n", + job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority, job->cmd_buf_vpu_addr, cmdq->jobq->header.tail); mutex_unlock(&file_priv->lock); @@ -681,8 +681,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); err_unlock: - mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock); + mutex_unlock(&vdev->submitted_jobs_lock); ivpu_rpm_put(vdev); return ret; } @@ -777,7 +777,8 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, goto err_free_handles; } - ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count); + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n", + file_priv->ctx.id, cmdq_id, buffer_count); job = ivpu_job_create(file_priv, engine, buffer_count); if (!job) { @@ -873,15 +874,21 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file * int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_create *args = data; struct ivpu_cmdq *cmdq; + int ret; - if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false); @@ -890,6 +897,8 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file * mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); + return cmdq ? 0 : -ENOMEM; } @@ -899,28 +908,35 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_destroy *args = data; struct ivpu_cmdq *cmdq; - u32 cmdq_id; + u32 cmdq_id = 0; int ret; if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); if (!cmdq || cmdq->is_legacy) { ret = -ENOENT; - goto err_unlock; + } else { + cmdq_id = cmdq->id; + ivpu_cmdq_destroy(file_priv, cmdq); + ret = 0; } - cmdq_id = cmdq->id; - ivpu_cmdq_destroy(file_priv, cmdq); mutex_unlock(&file_priv->lock); - ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); - return 0; -err_unlock: - mutex_unlock(&file_priv->lock); + /* Abort any pending jobs only if cmdq was destroyed */ + if (!ret) + ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); + + ivpu_rpm_put(vdev); + return ret; } diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c index ffe7b10f8a767b..2a043baf10ca17 100644 --- a/drivers/accel/ivpu/ivpu_ms.c +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -4,6 +4,7 @@ */ #include +#include #include "ivpu_drv.h" #include "ivpu_gem.h" @@ -44,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); if (get_instance_by_mask(file_priv, args->metric_group_mask)) { @@ -96,6 +101,8 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil kfree(ms); unlock: mutex_unlock(&file_priv->ms_lock); + + ivpu_rpm_put(vdev); return ret; } @@ -160,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -187,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file * unlock: mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ret; } @@ -204,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file { struct ivpu_file_priv *file_priv = file->driver_priv; struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_device *vdev = file_priv->vdev; struct ivpu_ms_instance *ms; + int ret; if (!args->metric_group_mask) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->ms_lock); ms = get_instance_by_mask(file_priv, args->metric_group_mask); @@ -217,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file mutex_unlock(&file_priv->ms_lock); + ivpu_rpm_put(vdev); return ms ? 0 : -EINVAL; } @@ -281,6 +300,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file * void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) { struct ivpu_ms_instance *ms, *tmp; + struct ivpu_device *vdev = file_priv->vdev; + + pm_runtime_get_sync(vdev->drm.dev); mutex_lock(&file_priv->ms_lock); @@ -293,6 +315,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) free_instance(file_priv, ms); mutex_unlock(&file_priv->ms_lock); + + pm_runtime_put_autosuspend(vdev->drm.dev); } void ivpu_ms_cleanup_all(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index b5891e91f7abaf..ac0e224545961d 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -428,16 +428,17 @@ int ivpu_pm_dct_enable(struct ivpu_device *vdev, u8 active_percent) active_us = (DCT_PERIOD_US * active_percent) / 100; inactive_us = DCT_PERIOD_US - active_us; + vdev->pm->dct_active_percent = active_percent; + + ivpu_dbg(vdev, PM, "DCT requested %u%% (D0: %uus, D0i2: %uus)\n", + active_percent, active_us, inactive_us); + ret = ivpu_jsm_dct_enable(vdev, active_us, inactive_us); if (ret) { ivpu_err_ratelimited(vdev, "Failed to enable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = active_percent; - - ivpu_dbg(vdev, PM, "DCT set to %u%% (D0: %uus, D0i2: %uus)\n", - active_percent, active_us, inactive_us); return 0; } @@ -445,15 +446,16 @@ int ivpu_pm_dct_disable(struct ivpu_device *vdev) { int ret; + vdev->pm->dct_active_percent = 0; + + ivpu_dbg(vdev, PM, "DCT requested to be disabled\n"); + ret = ivpu_jsm_dct_disable(vdev); if (ret) { ivpu_err_ratelimited(vdev, "Failed to disable DCT: %d\n", ret); return ret; } - vdev->pm->dct_active_percent = 0; - - ivpu_dbg(vdev, PM, "DCT disabled\n"); return 0; } @@ -466,7 +468,7 @@ void ivpu_pm_irq_dct_work_fn(struct work_struct *work) if (ivpu_hw_btrs_dct_get_request(vdev, &enable)) return; - if (vdev->pm->dct_active_percent) + if (enable) ret = ivpu_pm_dct_enable(vdev, DCT_DEFAULT_ACTIVE_PERCENT); else ret = ivpu_pm_dct_disable(vdev); diff --git a/drivers/accel/ivpu/ivpu_sysfs.c b/drivers/accel/ivpu/ivpu_sysfs.c index 97102feaf8ddca..268ab7744a8bbb 100644 --- a/drivers/accel/ivpu/ivpu_sysfs.c +++ b/drivers/accel/ivpu/ivpu_sysfs.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation */ #include #include +#include +#include #include "ivpu_drv.h" #include "ivpu_gem.h" @@ -90,10 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf) static DEVICE_ATTR_RO(sched_mode); +/** + * DOC: npu_max_frequency + * + * The npu_max_frequency shows maximum frequency in MHz of the NPU's data + * processing unit + */ +static ssize_t +npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = ivpu_hw_dpu_max_freq_get(vdev); + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_max_frequency_mhz); + +/** + * DOC: npu_current_frequency_mhz + * + * The npu_current_frequency_mhz shows current frequency in MHz of the NPU's + * data processing unit + */ +static ssize_t +npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct ivpu_device *vdev = to_ivpu_device(drm); + u32 freq = 0; + + /* Read frequency only if device is active, otherwise frequency is 0 */ + if (pm_runtime_get_if_active(vdev->drm.dev) > 0) { + freq = ivpu_hw_dpu_freq_get(vdev); + + pm_runtime_put_autosuspend(vdev->drm.dev); + } + + return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ); +} + +static DEVICE_ATTR_RO(npu_current_frequency_mhz); + static struct attribute *ivpu_dev_attrs[] = { &dev_attr_npu_busy_time_us.attr, &dev_attr_npu_memory_utilization.attr, &dev_attr_sched_mode.attr, + &dev_attr_npu_max_frequency_mhz.attr, + &dev_attr_npu_current_frequency_mhz.attr, NULL, }; diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 908e68ea1c39c2..218468bbbcadaf 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -26,7 +26,7 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 26 +#define VPU_BOOT_API_VER_MINOR 28 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -76,8 +76,15 @@ struct vpu_firmware_header { * submission queue size and device capabilities. */ u32 preemption_buffer_2_size; + /* + * Maximum preemption buffer size that the FW can use: no need for the host + * driver to allocate more space than that specified by these fields. + * A value of 0 means no declared limit. + */ + u32 preemption_buffer_1_max_size; + u32 preemption_buffer_2_max_size; /* Space reserved for future preemption-related fields. */ - u32 preemption_reserved[6]; + u32 preemption_reserved[4]; /* FW image read only section start address, 4KB aligned */ u64 ro_section_start_address; /* FW image read only section size, 4KB aligned */ @@ -134,7 +141,7 @@ enum vpu_trace_destination { /* * Processor bit shifts (for loggable HW components). */ -#define VPU_TRACE_PROC_BIT_ARM 0 +#define VPU_TRACE_PROC_BIT_RESERVED 0 #define VPU_TRACE_PROC_BIT_LRT 1 #define VPU_TRACE_PROC_BIT_LNN 2 #define VPU_TRACE_PROC_BIT_SHV_0 3 diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 7215c144158cbd..4b6b2b3d2583a8 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,7 +22,7 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 25 +#define VPU_JSM_API_VER_MINOR 29 /* * API header changed (field names, documentation, formatting) but API itself has not been changed @@ -53,8 +53,7 @@ * Engine indexes. */ #define VPU_ENGINE_COMPUTE 0 -#define VPU_ENGINE_COPY 1 -#define VPU_ENGINE_NB 2 +#define VPU_ENGINE_NB 1 /* * VPU status values. @@ -126,11 +125,13 @@ enum { * When set, indicates that job queue uses native fences (as inline commands * in job queue). Such queues may also use legacy fences (as commands in batch buffers). * When cleared, indicates the job queue only uses legacy fences. - * NOTE: For queues using native fences, VPU expects that all jobs in the queue - * are immediately followed by an inline command object. This object is expected - * to be a fence signal command in most cases, but can also be a NOP in case the host - * does not need per-job fence signalling. Other inline commands objects can be - * inserted between "job and inline command" pairs. + * NOTES: + * 1. For queues using native fences, VPU expects that all jobs in the queue + * are immediately followed by an inline command object. This object is expected + * to be a fence signal command in most cases, but can also be a NOP in case the host + * does not need per-job fence signalling. Other inline commands objects can be + * inserted between "job and inline command" pairs. + * 2. Native fence queues are only supported on VPU 40xx onwards. */ VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U), @@ -275,6 +276,8 @@ struct vpu_inline_cmd { u64 value; /* User VA of the log buffer in which to add log entry on completion. */ u64 log_buffer_va; + /* NPU private data. */ + u64 npu_private_data; } fence; /* Other commands do not have a payload. */ /* Payload definition for future inline commands can be inserted here. */ @@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update { /** Metric group mask that identifies metric streamer instance. */ u64 metric_group_mask; /** - * Address and size of the buffer where the VPU will write metric data. If - * the buffer address is 0 or same as the currently used buffer the VPU will - * continue writing metric data to the current buffer. In this case the - * buffer size is ignored and the size of the current buffer is unchanged. - * If the address is non-zero and differs from the current buffer address the - * VPU will immediately switch data collection to the new buffer. + * Address and size of the buffer where the VPU will write metric data. + * This member dictates how the update operation should perform: + * 1. client needs information about the number of collected samples and the + * amount of data written to the current buffer + * 2. client wants to switch to a new buffer + * + * Case 1. is identified by the buffer address being 0 or the same as the + * currently used buffer address. In this case the buffer size is ignored and + * the size of the current buffer is unchanged. The VPU will return an update + * in the vpu_jsm_metric_streamer_done structure. The internal writing position + * into the buffer is not changed. + * + * Case 2. is identified by the address being non-zero and differs from the + * current buffer address. The VPU will immediately switch data collection to + * the new buffer. Then the VPU will return an update in the + * vpu_jsm_metric_streamer_done structure. */ u64 buffer_addr; u64 buffer_size; @@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* * Default quantum in 100ns units for scheduling across processes * within a priority band + * Minimum value supported by NPU is 1ms (10000 in 100ns units). */ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; /* @@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { * in situations when it's starved by the focus band. */ u32 normal_band_percentage; - /* Reserved */ - u32 reserved_0; + /* + * TDR timeout value in milliseconds. Default value of 0 meaning no timeout. + */ + u32 tdr_timeout; }; /* @@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { s32 in_process_priority; /* Zero padding / Reserved */ u32 reserved_1; - /* Context quantum relative to other contexts of same priority in the same process */ + /* + * Context quantum relative to other contexts of same priority in the same process + * Minimum value supported by NPU is 1ms (10000 in 100ns units). + */ u64 context_quantum; /* Grace period when preempting context of the same priority within the same process */ u64 grace_period_same_priority; diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h index 911875c5a5f190..58842130ca47bb 100644 --- a/drivers/acpi/acpica/acdebug.h +++ b/drivers/acpi/acpica/acdebug.h @@ -37,7 +37,7 @@ struct acpi_db_argument_info { struct acpi_db_execute_walk { u32 count; u32 max_count; - char name_seg[ACPI_NAMESEG_SIZE + 1]; + char name_seg[ACPI_NAMESEG_SIZE + 1] ACPI_NONSTRING; }; #define PARAM_LIST(pl) pl diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 6f4fe47c955bd0..b40e9a520618e0 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -293,7 +293,7 @@ acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state); * expected_return_btypes - Allowed type(s) for the return value */ struct acpi_name_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; u16 argument_list; u8 expected_btypes; }; @@ -370,7 +370,7 @@ typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node * converted_object); struct acpi_simple_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; u32 unexpected_btypes; u32 package_index; acpi_object_converter object_converter; diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c index 5241f4c01c7655..89a4ac447a2bea 100644 --- a/drivers/acpi/acpica/exserial.c +++ b/drivers/acpi/acpica/exserial.c @@ -201,6 +201,12 @@ acpi_ex_read_serial_bus(union acpi_operand_object *obj_desc, function = ACPI_READ; break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + + buffer_length = ACPI_FFH_INPUT_BUFFER_SIZE; + function = ACPI_READ; + break; + default: return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID); } diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c index d91153f6570053..22aeeeb56cffdb 100644 --- a/drivers/acpi/acpica/nsnames.c +++ b/drivers/acpi/acpica/nsnames.c @@ -194,7 +194,7 @@ acpi_ns_build_normalized_path(struct acpi_namespace_node *node, char *full_path, u32 path_size, u8 no_trailing) { u32 length = 0, i; - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; u8 do_no_trailing; char c, *left, *right; struct acpi_namespace_node *next_node; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 1bb7b71f07f1f6..0075fc80d49843 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -25,7 +25,7 @@ acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info, return_object_ptr); typedef struct acpi_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; acpi_repair_function repair_function; } acpi_repair_info; diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 3cfe7e7475f2fd..070c07d68dfb2f 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -23,6 +23,7 @@ config ACPI_APEI_GHES select ACPI_HED select IRQ_WORK select GENERIC_ALLOCATOR + select ARM_SDE_INTERFACE if ARM64 help Generic Hardware Error Source provides a way to report platform hardware errors (such as that from chipset). It diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 289e365f84b249..0f3c663c1b0a33 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1715,7 +1715,7 @@ void __init acpi_ghes_init(void) { int rc; - sdei_init(); + acpi_sdei_init(); if (acpi_disabled) return; diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 90b09840536dde..0a702604018825 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -458,7 +458,7 @@ static void acpi_button_notify(acpi_handle handle, u32 event, void *data) acpi_pm_wakeup_event(&device->dev); button = acpi_driver_data(device); - if (button->suspended) + if (button->suspended || event == ACPI_BUTTON_NOTIFY_WAKE) return; input = button->input; diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index f193e713825ac2..ff23b6edb2df37 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -463,7 +463,7 @@ bool cppc_allow_fast_switch(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 8db09d81918fbb..3c5f34892734e9 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2301,6 +2301,34 @@ static const struct dmi_system_id acpi_ec_no_wakeup[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "103C_5336AN HP ZHAN 66 Pro"), }, }, + /* + * Lenovo Legion Go S; touchscreen blocks HW sleep when woken up from EC + * https://gitlab.freedesktop.org/drm/amd/-/issues/3929 + */ + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83L3"), + } + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83N6"), + } + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q2"), + } + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"), + } + }, { }, }; diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c index df9328c850bd33..f2c943b934be0a 100644 --- a/drivers/acpi/osi.c +++ b/drivers/acpi/osi.c @@ -42,7 +42,6 @@ static struct acpi_osi_entry osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = { {"Module Device", true}, {"Processor Device", true}, - {"3.0 _SCP Extensions", true}, {"Processor Aggregator Device", true}, }; diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index ffbfd32f4cf1ba..b43f4459a4f61e 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -688,6 +688,9 @@ static int __init platform_profile_init(void) { int err; + if (acpi_disabled) + return -EOPNOTSUPP; + err = class_register(&platform_profile_class); if (err) return err; diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index a35dd0e41c2704..54676e3d82dd59 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -229,18 +229,20 @@ static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, node_entry = ACPI_PTR_DIFF(node, table_hdr); entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, sizeof(struct acpi_table_pptt)); - proc_sz = sizeof(struct acpi_pptt_processor *); + proc_sz = sizeof(struct acpi_pptt_processor); - while ((unsigned long)entry + proc_sz < table_end) { + /* ignore subtable types that are smaller than a processor node */ + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && cpu_node->parent == node_entry) return 0; if (entry->length == 0) return 0; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, entry->length); - } return 1; } @@ -270,18 +272,21 @@ static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_he table_end = (unsigned long)table_hdr + table_hdr->length; entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, sizeof(struct acpi_table_pptt)); - proc_sz = sizeof(struct acpi_pptt_processor *); + proc_sz = sizeof(struct acpi_pptt_processor); /* find the processor structure associated with this cpuid */ - while ((unsigned long)entry + proc_sz < table_end) { + while ((unsigned long)entry + proc_sz <= table_end) { cpu_node = (struct acpi_pptt_processor *)entry; if (entry->length == 0) { pr_warn("Invalid zero length subtable\n"); break; } + /* entry->length may not equal proc_sz, revalidate the processor structure length */ if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && acpi_cpu_id == cpu_node->acpi_processor_id && + (unsigned long)entry + entry->length <= table_end && + entry->length == proc_sz + cpu_node->number_of_priv_resources * sizeof(u32) && acpi_pptt_leaf_node(table_hdr, cpu_node)) { return (struct acpi_pptt_processor *)entry; } diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 14c7bac4100b46..7d59c6c9185fc1 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -534,7 +534,7 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = { */ static const struct dmi_system_id irq1_edge_low_force_override[] = { { - /* MECHREV Jiaolong17KS Series GM7XG0M */ + /* MECHREVO Jiaolong17KS Series GM7XG0M */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "GM7XG0M"), }, diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 0c874186f8aed4..5c2defe55898f1 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -803,6 +803,12 @@ static int acpi_thermal_add(struct acpi_device *device) acpi_thermal_aml_dependency_fix(tz); + /* + * Set the cooling mode [_SCP] to active cooling. This needs to happen before + * we retrieve the trip point values. + */ + acpi_execute_simple_method(tz->device->handle, "_SCP", ACPI_THERMAL_MODE_ACTIVE); + /* Get trip points [_ACi, _PSV, etc.] (required). */ acpi_thermal_get_trip_points(tz); @@ -814,10 +820,6 @@ static int acpi_thermal_add(struct acpi_device *device) if (result) goto free_memory; - /* Set the cooling mode [_SCP] to active cooling. */ - acpi_execute_simple_method(tz->device->handle, "_SCP", - ACPI_THERMAL_MODE_ACTIVE); - /* Determine the default polling frequency [_TZP]. */ if (tzp) tz->polling_frequency = tzp; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 76052006bd8714..6be0f7ac7213d1 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -79,6 +79,8 @@ static HLIST_HEAD(binder_deferred_list); static DEFINE_MUTEX(binder_deferred_lock); static HLIST_HEAD(binder_devices); +static DEFINE_SPINLOCK(binder_devices_lock); + static HLIST_HEAD(binder_procs); static DEFINE_MUTEX(binder_procs_lock); @@ -5244,6 +5246,7 @@ static void binder_free_proc(struct binder_proc *proc) __func__, proc->outstanding_txns); device = container_of(proc->context, struct binder_device, context); if (refcount_dec_and_test(&device->ref)) { + binder_remove_device(device); kfree(proc->context->name); kfree(device); } @@ -6373,7 +6376,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m, seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd offset %lx\n", buffer->data_size, buffer->offsets_size, - proc->alloc.vm_start - buffer->user_data); + buffer->user_data - proc->alloc.vm_start); } static void print_binder_work_ilocked(struct seq_file *m, @@ -6929,7 +6932,16 @@ const struct binder_debugfs_entry binder_debugfs_entries[] = { void binder_add_device(struct binder_device *device) { + spin_lock(&binder_devices_lock); hlist_add_head(&device->hlist, &binder_devices); + spin_unlock(&binder_devices_lock); +} + +void binder_remove_device(struct binder_device *device) +{ + spin_lock(&binder_devices_lock); + hlist_del_init(&device->hlist); + spin_unlock(&binder_devices_lock); } static int __init init_binder_device(const char *name) @@ -6956,7 +6968,7 @@ static int __init init_binder_device(const char *name) return ret; } - hlist_add_head(&binder_device->hlist, &binder_devices); + binder_add_device(binder_device); return ret; } @@ -7018,7 +7030,7 @@ static int __init binder_init(void) err_init_binder_device_failed: hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { misc_deregister(&device->miscdev); - hlist_del(&device->hlist); + binder_remove_device(device); kfree(device); } diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 6a66c9769c6cd4..1ba5caf1d88d9d 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -583,9 +583,13 @@ struct binder_object { /** * Add a binder device to binder_devices * @device: the new binder device to add to the global list - * - * Not reentrant as the list is not protected by any locks */ void binder_add_device(struct binder_device *device); +/** + * Remove a binder device to binder_devices + * @device: the binder device to remove from the global list + */ +void binder_remove_device(struct binder_device *device); + #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 94c6446604fc95..44d430c4ebefd2 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -274,7 +274,7 @@ static void binderfs_evict_inode(struct inode *inode) mutex_unlock(&binderfs_minors_mutex); if (refcount_dec_and_test(&device->ref)) { - hlist_del_init(&device->hlist); + binder_remove_device(device); kfree(device->context.name); kfree(device); } diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 163ac909bd0689..83a1e8b5443c9e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1629,7 +1629,7 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) } #endif -static void ahci_remap_check(struct pci_dev *pdev, int bar, +static int ahci_remap_check(struct pci_dev *pdev, int bar, struct ahci_host_priv *hpriv) { int i; @@ -1642,7 +1642,7 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, pci_resource_len(pdev, bar) < SZ_512K || bar != AHCI_PCI_BAR_STANDARD || !(readl(hpriv->mmio + AHCI_VSCAP) & 1)) - return; + return 0; cap = readq(hpriv->mmio + AHCI_REMAP_CAP); for (i = 0; i < AHCI_MAX_REMAP; i++) { @@ -1657,18 +1657,11 @@ static void ahci_remap_check(struct pci_dev *pdev, int bar, } if (!hpriv->remapped_nvme) - return; - - dev_warn(&pdev->dev, "Found %u remapped NVMe devices.\n", - hpriv->remapped_nvme); - dev_warn(&pdev->dev, - "Switch your BIOS from RAID to AHCI mode to use them.\n"); + return 0; - /* - * Don't rely on the msi-x capability in the remap case, - * share the legacy interrupt across ahci and remapped devices. - */ - hpriv->flags |= AHCI_HFLAG_NO_MSI; + /* Abort probe, allowing intel-nvme-remap to step in when available */ + dev_info(&pdev->dev, "Device will be handled by intel-nvme-remap.\n"); + return -ENODEV; } static int ahci_get_irq_vector(struct ata_host *host, int port) @@ -1912,7 +1905,9 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; /* detect remapped nvme devices */ - ahci_remap_check(pdev, ahci_pci_bar, hpriv); + rc = ahci_remap_check(pdev, ahci_pci_bar, hpriv); + if (rc) + return rc; sysfs_add_file_to_group(&pdev->dev.kobj, &dev_attr_remapped_nvme.attr, diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index ba300cc0a3a327..2e4463d3a3561f 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1510,6 +1510,8 @@ int ata_eh_get_ncq_success_sense(struct ata_link *link) unsigned int err_mask, tag; u8 *sense, sk = 0, asc = 0, ascq = 0; u64 sense_valid, val; + u16 extended_sense; + bool aux_icc_valid; int ret = 0; err_mask = ata_read_log_page(dev, ATA_LOG_SENSE_NCQ, 0, buf, 2); @@ -1529,6 +1531,8 @@ int ata_eh_get_ncq_success_sense(struct ata_link *link) sense_valid = (u64)buf[8] | ((u64)buf[9] << 8) | ((u64)buf[10] << 16) | ((u64)buf[11] << 24); + extended_sense = get_unaligned_le16(&buf[14]); + aux_icc_valid = extended_sense & BIT(15); ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_EH) || @@ -1556,6 +1560,17 @@ int ata_eh_get_ncq_success_sense(struct ata_link *link) continue; } + qc->result_tf.nsect = sense[6]; + qc->result_tf.hob_nsect = sense[7]; + qc->result_tf.lbal = sense[8]; + qc->result_tf.lbam = sense[9]; + qc->result_tf.lbah = sense[10]; + qc->result_tf.hob_lbal = sense[11]; + qc->result_tf.hob_lbam = sense[12]; + qc->result_tf.hob_lbah = sense[13]; + if (aux_icc_valid) + qc->result_tf.auxiliary = get_unaligned_le32(&sense[16]); + /* Set sense without also setting scsicmd->result */ scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE, qc->scsicmd->sense_buffer, sk, diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 2796c0da82578a..c0eb8c67a9ff69 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2453,8 +2453,8 @@ static unsigned int ata_msense_control_ata_feature(struct ata_device *dev, */ put_unaligned_be16(ATA_FEATURE_SUB_MPAGE_LEN - 4, &buf[2]); - if (dev->flags & ATA_DFLAG_CDL) - buf[4] = 0x02; /* Support T2A and T2B pages */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + buf[4] = 0x02; /* T2A and T2B pages enabled */ else buf[4] = 0; @@ -3886,12 +3886,11 @@ static int ata_mselect_control_spg0(struct ata_queued_cmd *qc, } /* - * Translate MODE SELECT control mode page, sub-pages f2h (ATA feature mode + * Translate MODE SELECT control mode page, sub-page f2h (ATA feature mode * page) into a SET FEATURES command. */ -static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, - const u8 *buf, int len, - u16 *fp) +static int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, + const u8 *buf, int len, u16 *fp) { struct ata_device *dev = qc->dev; struct ata_taskfile *tf = &qc->tf; @@ -3909,17 +3908,27 @@ static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc, /* Check cdl_ctrl */ switch (buf[0] & 0x03) { case 0: - /* Disable CDL */ + /* Disable CDL if it is enabled */ + if (!(dev->flags & ATA_DFLAG_CDL_ENABLED)) + return 0; + ata_dev_dbg(dev, "Disabling CDL\n"); cdl_action = 0; dev->flags &= ~ATA_DFLAG_CDL_ENABLED; break; case 0x02: - /* Enable CDL T2A/T2B: NCQ priority must be disabled */ + /* + * Enable CDL if not already enabled. Since this is mutually + * exclusive with NCQ priority, allow this only if NCQ priority + * is disabled. + */ + if (dev->flags & ATA_DFLAG_CDL_ENABLED) + return 0; if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) { ata_dev_err(dev, "NCQ priority must be disabled to enable CDL\n"); return -EINVAL; } + ata_dev_dbg(dev, "Enabling CDL\n"); cdl_action = 1; dev->flags |= ATA_DFLAG_CDL_ENABLED; break; diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c index 434f380114af09..03dbaf4a13a75c 100644 --- a/drivers/ata/pata_pxa.c +++ b/drivers/ata/pata_pxa.c @@ -223,10 +223,16 @@ static int pxa_ata_probe(struct platform_device *pdev) ap->ioaddr.cmd_addr = devm_ioremap(&pdev->dev, cmd_res->start, resource_size(cmd_res)); + if (!ap->ioaddr.cmd_addr) + return -ENOMEM; ap->ioaddr.ctl_addr = devm_ioremap(&pdev->dev, ctl_res->start, resource_size(ctl_res)); + if (!ap->ioaddr.ctl_addr) + return -ENOMEM; ap->ioaddr.bmdma_addr = devm_ioremap(&pdev->dev, dma_res->start, resource_size(dma_res)); + if (!ap->ioaddr.bmdma_addr) + return -ENOMEM; /* * Adjust register offsets diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index a482741eb181ff..c3042eca6332df 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -1117,9 +1117,14 @@ static int pdc20621_prog_dimm0(struct ata_host *host) mmio += PDC_CHIP0_OFS; for (i = 0; i < ARRAY_SIZE(pdc_i2c_read_data); i++) - pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, - pdc_i2c_read_data[i].reg, - &spd0[pdc_i2c_read_data[i].ofs]); + if (!pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, + pdc_i2c_read_data[i].reg, + &spd0[pdc_i2c_read_data[i].ofs])) { + dev_err(host->dev, + "Failed in i2c read at index %d: device=%#x, reg=%#x\n", + i, PDC_DIMM0_SPD_DEV_ADDRESS, pdc_i2c_read_data[i].reg); + return -EIO; + } data |= (spd0[4] - 8) | ((spd0[21] != 0) << 3) | ((spd0[3]-11) << 4); data |= ((spd0[17] / 4) << 6) | ((spd0[5] / 2) << 7) | @@ -1284,6 +1289,8 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) /* Programming DIMM0 Module Control Register (index_CID0:80h) */ size = pdc20621_prog_dimm0(host); + if (size < 0) + return size; dev_dbg(host->dev, "Local DIMM Size = %dMB\n", size); /* Programming DIMM Module Global Control Register (index_CID0:88h) */ diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index afa4df4c5a3f37..95717d509ca99d 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -156,6 +156,16 @@ * }, * .ops = my_custom_ops, * }; + * + * Please note that such custom ops approach is valid, but it is hard to implement + * it right without global locks per-device to protect from auxiliary_drv removal + * during call to that ops. In addition, this implementation lacks proper module + * dependency, which causes to load/unload races between auxiliary parent and devices + * modules. + * + * The most easiest way to provide these ops reliably without needing to + * have a lock is to EXPORT_SYMBOL*() them and rely on already existing + * modules infrastructure for validity and correct dependencies chains. */ static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, diff --git a/drivers/base/base.h b/drivers/base/base.h index 0042e4774b0ce7..123031a757d916 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -73,6 +73,7 @@ static inline void subsys_put(struct subsys_private *sp) kset_put(&sp->subsys); } +struct subsys_private *bus_to_subsys(const struct bus_type *bus); struct subsys_private *class_to_subsys(const struct class *class); struct driver_private { @@ -180,6 +181,22 @@ int driver_add_groups(const struct device_driver *drv, const struct attribute_gr void driver_remove_groups(const struct device_driver *drv, const struct attribute_group **groups); void device_driver_detach(struct device *dev); +static inline void device_set_driver(struct device *dev, const struct device_driver *drv) +{ + /* + * Majority (all?) read accesses to dev->driver happens either + * while holding device lock or in bus/driver code that is only + * invoked when the device is bound to a driver and there is no + * concern of the pointer being changed while it is being read. + * However when reading device's uevent file we read driver pointer + * without taking device lock (so we do not block there for + * arbitrary amount of time). We use WRITE_ONCE() here to prevent + * tearing so that READ_ONCE() can safely be used in uevent code. + */ + // FIXME - this cast should not be needed "soon" + WRITE_ONCE(dev->driver, (struct device_driver *)drv); +} + int devres_release_all(struct device *dev); void device_block_probing(void); void device_unblock_probing(void); diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 5ea3b03af9ba6d..5e75e1bce5516d 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -57,7 +57,7 @@ static int __must_check bus_rescan_devices_helper(struct device *dev, * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ -static struct subsys_private *bus_to_subsys(const struct bus_type *bus) +struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; diff --git a/drivers/base/core.c b/drivers/base/core.c index d2f9d3a59d6b00..cbc0099d8ef246 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2624,6 +2624,35 @@ static const char *dev_uevent_name(const struct kobject *kobj) return NULL; } +/* + * Try filling "DRIVER=" uevent variable for a device. Because this + * function may race with binding and unbinding the device from a driver, + * we need to be careful. Binding is generally safe, at worst we miss the + * fact that the device is already bound to a driver (but the driver + * information that is delivered through uevents is best-effort, it may + * become obsolete as soon as it is generated anyways). Unbinding is more + * risky as driver pointer is transitioning to NULL, so READ_ONCE() should + * be used to make sure we are dealing with the same pointer, and to + * ensure that driver structure is not going to disappear from under us + * we take bus' drivers klist lock. The assumption that only registered + * driver can be bound to a device, and to unregister a driver bus code + * will take the same lock. + */ +static void dev_driver_uevent(const struct device *dev, struct kobj_uevent_env *env) +{ + struct subsys_private *sp = bus_to_subsys(dev->bus); + + if (sp) { + scoped_guard(spinlock, &sp->klist_drivers.k_lock) { + struct device_driver *drv = READ_ONCE(dev->driver); + if (drv) + add_uevent_var(env, "DRIVER=%s", drv->name); + } + + subsys_put(sp); + } +} + static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); @@ -2655,8 +2684,8 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Add "DRIVER=%s" variable if the device is bound to a driver */ + dev_driver_uevent(dev, env); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2726,11 +2755,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; @@ -3700,7 +3726,7 @@ int device_add(struct device *dev) device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: - dev->driver = NULL; + device_set_driver(dev, NULL); bus_remove_device(dev); BusError: device_remove_attrs(dev); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index a7e5118498758e..50651435577c8f 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -600,6 +600,7 @@ CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow); CPU_SHOW_VULN_FALLBACK(gds); CPU_SHOW_VULN_FALLBACK(reg_file_data_sampling); CPU_SHOW_VULN_FALLBACK(ghostwrite); +CPU_SHOW_VULN_FALLBACK(indirect_target_selection); static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); @@ -616,6 +617,7 @@ static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NU static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL); static DEVICE_ATTR(reg_file_data_sampling, 0444, cpu_show_reg_file_data_sampling, NULL); static DEVICE_ATTR(ghostwrite, 0444, cpu_show_ghostwrite, NULL); +static DEVICE_ATTR(indirect_target_selection, 0444, cpu_show_indirect_target_selection, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -633,6 +635,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_gather_data_sampling.attr, &dev_attr_reg_file_data_sampling.attr, &dev_attr_ghostwrite.attr, + &dev_attr_indirect_target_selection.attr, NULL }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index f0e4b4aba885c6..b526e0e0f52d79 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -550,7 +550,7 @@ static void device_unbind_cleanup(struct device *dev) arch_teardown_dma_ops(dev); kfree(dev->dma_range_map); dev->dma_range_map = NULL; - dev->driver = NULL; + device_set_driver(dev, NULL); dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) dev->pm_domain->dismiss(dev); @@ -629,8 +629,7 @@ static int really_probe(struct device *dev, const struct device_driver *drv) } re_probe: - // FIXME - this cast should not be needed "soon" - dev->driver = (struct device_driver *)drv; + device_set_driver(dev, drv); /* If using pinctrl, bind pins now before probing */ ret = pinctrl_bind_pins(dev); @@ -1014,7 +1013,7 @@ static int __device_attach(struct device *dev, bool allow_async) if (ret == 0) ret = 1; else { - dev->driver = NULL; + device_set_driver(dev, NULL); ret = 0; } } else { diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 6dd1a8860f1c92..31bfb3194b4c29 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -296,7 +296,7 @@ static int delete_path(const char *nodepath) return err; } -static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) +static int dev_mynode(struct device *dev, struct inode *inode) { /* did we create it */ if (inode->i_private != &thread) @@ -304,13 +304,13 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta /* does the dev_t match */ if (is_blockdev(dev)) { - if (!S_ISBLK(stat->mode)) + if (!S_ISBLK(inode->i_mode)) return 0; } else { - if (!S_ISCHR(stat->mode)) + if (!S_ISCHR(inode->i_mode)) return 0; } - if (stat->rdev != dev->devt) + if (inode->i_rdev != dev->devt) return 0; /* ours */ @@ -321,20 +321,16 @@ static int handle_remove(const char *nodename, struct device *dev) { struct path parent; struct dentry *dentry; - struct kstat stat; - struct path p; + struct inode *inode; int deleted = 0; - int err; + int err = 0; dentry = kern_path_locked(nodename, &parent); if (IS_ERR(dentry)) return PTR_ERR(dentry); - p.mnt = parent.mnt; - p.dentry = dentry; - err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, - AT_STATX_SYNC_AS_STAT); - if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { + inode = d_inode(dentry); + if (dev_mynode(dev, inode)) { struct iattr newattrs; /* * before unlinking this node, reset permissions @@ -342,7 +338,7 @@ static int handle_remove(const char *nodename, struct device *dev) */ newattrs.ia_uid = GLOBAL_ROOT_UID; newattrs.ia_gid = GLOBAL_ROOT_GID; - newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_mode = inode->i_mode & ~0777; newattrs.ia_valid = ATTR_UID|ATTR_GID|ATTR_MODE; inode_lock(d_inode(dentry)); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 8f3a41d9bfaa2c..19469e7f88c25e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -816,21 +816,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state, return 0; } -static int __init add_boot_memory_block(unsigned long base_section_nr) -{ - unsigned long nr; - - for_each_present_section_nr(base_section_nr, nr) { - if (nr >= (base_section_nr + sections_per_block)) - break; - - return add_memory_block(memory_block_id(base_section_nr), - MEM_ONLINE, NULL, NULL); - } - - return 0; -} - static int add_hotplug_memory_block(unsigned long block_id, struct vmem_altmap *altmap, struct memory_group *group) @@ -957,7 +942,7 @@ static const struct attribute_group *memory_root_attr_groups[] = { void __init memory_dev_init(void) { int ret; - unsigned long block_sz, nr; + unsigned long block_sz, block_id, nr; /* Validate the configured memory block size */ block_sz = memory_block_size_bytes(); @@ -970,15 +955,23 @@ void __init memory_dev_init(void) panic("%s() failed to register subsystem: %d\n", __func__, ret); /* - * Create entries for memory sections that were found - * during boot and have been initialized + * Create entries for memory sections that were found during boot + * and have been initialized. Use @block_id to track the last + * handled block and initialize it to an invalid value (ULONG_MAX) + * to bypass the block ID matching check for the first present + * block so that it can be covered. */ - for (nr = 0; nr <= __highest_present_section_nr; - nr += sections_per_block) { - ret = add_boot_memory_block(nr); - if (ret) - panic("%s() failed to add memory block: %d\n", __func__, - ret); + block_id = ULONG_MAX; + for_each_present_section_nr(0, nr) { + if (block_id != ULONG_MAX && memory_block_id(nr) == block_id) + continue; + + block_id = memory_block_id(nr); + ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL); + if (ret) { + panic("%s() failed to add memory block: %d\n", + __func__, ret); + } } } diff --git a/drivers/base/module.c b/drivers/base/module.c index 5bc71bea883a06..218aaa0964552f 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -42,16 +42,13 @@ int module_add_driver(struct module *mod, const struct device_driver *drv) if (mod) mk = &mod->mkobj; else if (drv->mod_name) { - struct kobject *mkobj; - - /* Lookup built-in module entry in /sys/modules */ - mkobj = kset_find_obj(module_kset, drv->mod_name); - if (mkobj) { - mk = container_of(mkobj, struct module_kobject, kobj); + /* Lookup or create built-in module entry in /sys/modules */ + mk = lookup_or_create_module_kobject(drv->mod_name); + if (mk) { /* remember our module structure */ drv->p->mkobj = mk; - /* kset_find_obj took a reference */ - kobject_put(mkobj); + /* lookup_or_create_module_kobject took a reference */ + kobject_put(&mk->kobj); } } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 1813cfd0c4bdf4..cfccf3ff36e76e 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1440,7 +1440,7 @@ static void platform_shutdown(struct device *_dev) static int platform_dma_configure(struct device *dev) { - struct platform_driver *drv = to_platform_driver(dev->driver); + struct device_driver *drv = READ_ONCE(dev->driver); struct fwnode_handle *fwnode = dev_fwnode(dev); enum dev_dma_attr attr; int ret = 0; @@ -1451,8 +1451,8 @@ static int platform_dma_configure(struct device *dev) attr = acpi_get_dma_attr(to_acpi_device_node(fwnode)); ret = acpi_dma_configure(dev, attr); } - /* @drv may not be valid when we're called from the IOMMU layer */ - if (ret || !dev->driver || drv->driver_managed_dma) + /* @dev->driver may not be valid when we're called from the IOMMU layer */ + if (ret || !drv || to_platform_driver(drv)->driver_managed_dma) return ret; ret = iommu_device_use_default_domain(dev); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c8b0a9e29ed843..1926454c7a7e8c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -941,6 +941,8 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) if (!dev->power.is_suspended) goto Complete; + dev->power.is_suspended = false; + if (dev->power.direct_complete) { /* * Allow new children to be added under the device after this @@ -1003,7 +1005,6 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) End: error = dpm_run_callback(callback, dev, state, info); - dev->power.is_suspended = false; device_unlock(dev); dpm_watchdog_clear(&wd); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 0e127b0329c00c..205a4f8828b0ac 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1568,6 +1568,32 @@ void pm_runtime_enable(struct device *dev) } EXPORT_SYMBOL_GPL(pm_runtime_enable); +static void pm_runtime_set_suspended_action(void *data) +{ + pm_runtime_set_suspended(data); +} + +/** + * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable. + * + * @dev: Device to handle. + */ +int devm_pm_runtime_set_active_enabled(struct device *dev) +{ + int err; + + err = pm_runtime_set_active(dev); + if (err) + return err; + + err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev); + if (err) + return err; + + return devm_pm_runtime_enable(dev); +} +EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled); + static void pm_runtime_disable_action(void *data) { pm_runtime_dont_use_autosuspend(data); @@ -1590,6 +1616,24 @@ int devm_pm_runtime_enable(struct device *dev) } EXPORT_SYMBOL_GPL(devm_pm_runtime_enable); +static void pm_runtime_put_noidle_action(void *data) +{ + pm_runtime_put_noidle(data); +} + +/** + * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume. + * + * @dev: Device to handle. + */ +int devm_pm_runtime_get_noresume(struct device *dev) +{ + pm_runtime_get_noresume(dev); + + return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev); +} +EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume); + /** * pm_runtime_forbid - Block runtime PM of a device. * @dev: Device to handle. diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index b1726a3515f6fb..5c78fa6ae77257 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1080,6 +1080,7 @@ void software_node_notify(struct device *dev) if (!swnode) return; + kobject_get(&swnode->kobj); ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node"); if (ret) return; @@ -1089,8 +1090,6 @@ void software_node_notify(struct device *dev) sysfs_remove_link(&dev->kobj, "software_node"); return; } - - kobject_get(&swnode->kobj); } void software_node_notify_remove(struct device *dev) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a97f2c40c640dd..e48b24be45eea7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -367,7 +367,7 @@ config BLK_DEV_RBD tristate "Rados block device (RBD)" depends on INET && BLOCK select CEPH_LIB - select LIBCRC32C + select CRC32 select CRYPTO_AES select CRYPTO help @@ -388,12 +388,6 @@ config BLK_DEV_UBLK definition isn't finalized yet, and might change according to future requirement, so mark is as experimental now. - Say Y if you want to get better performance because task_work_add() - can be used in IO path for replacing io_uring cmd, which will become - shared between IO tasks and ubq daemon, meantime task_work_add() can - can handle batch more effectively, but task_work_add() isn't exported - for module, so ublk has to be built to kernel. - config BLKDEV_UBLK_LEGACY_OPCODES bool "Support legacy command opcode" depends on BLK_DEV_UBLK diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 292f127cae0abe..02fa8106ef549f 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -224,19 +224,22 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size) { - sector_t aligned_sector = (sector + PAGE_SECTORS) & ~PAGE_SECTORS; + sector_t aligned_sector = round_up(sector, PAGE_SECTORS); + sector_t aligned_end = round_down( + sector + (size >> SECTOR_SHIFT), PAGE_SECTORS); struct page *page; - size -= (aligned_sector - sector) * SECTOR_SIZE; + if (aligned_end <= aligned_sector) + return; + xa_lock(&brd->brd_pages); - while (size >= PAGE_SIZE && aligned_sector < rd_size * 2) { + while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) { page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT); if (page) { __free_page(page); brd->brd_nr_pages--; } aligned_sector += PAGE_SECTORS; - size -= PAGE_SIZE; } xa_unlock(&brd->brd_pages); } diff --git a/drivers/block/drbd/Kconfig b/drivers/block/drbd/Kconfig index 6fb4e38fca88c2..495a72da04c6c1 100644 --- a/drivers/block/drbd/Kconfig +++ b/drivers/block/drbd/Kconfig @@ -10,7 +10,7 @@ config BLK_DEV_DRBD tristate "DRBD Distributed Replicated Block Device support" depends on PROC_FS && INET select LRU_CACHE - select LIBCRC32C + select CRC32 help NOTE: In order to authenticate connections you have to select diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 674527d770dc66..f8d136684109aa 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -211,72 +211,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size) kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } -static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) -{ - struct iov_iter i; - ssize_t bw; - - iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len); - - bw = vfs_iter_write(file, &i, ppos, 0); - - if (likely(bw == bvec->bv_len)) - return 0; - - printk_ratelimited(KERN_ERR - "loop: Write error at byte offset %llu, length %i.\n", - (unsigned long long)*ppos, bvec->bv_len); - if (bw >= 0) - bw = -EIO; - return bw; -} - -static int lo_write_simple(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec; - struct req_iterator iter; - int ret = 0; - - rq_for_each_segment(bvec, rq, iter) { - ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos); - if (ret < 0) - break; - cond_resched(); - } - - return ret; -} - -static int lo_read_simple(struct loop_device *lo, struct request *rq, - loff_t pos) -{ - struct bio_vec bvec; - struct req_iterator iter; - struct iov_iter i; - ssize_t len; - - rq_for_each_segment(bvec, rq, iter) { - iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len); - len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); - if (len < 0) - return len; - - flush_dcache_page(bvec.bv_page); - - if (len != bvec.bv_len) { - struct bio *bio; - - __rq_for_each_bio(bio, rq) - zero_fill_bio(bio); - break; - } - cond_resched(); - } - - return 0; -} - static void loop_clear_limits(struct loop_device *lo, int mode) { struct queue_limits lim = queue_limits_start_update(lo->lo_queue); @@ -342,7 +276,7 @@ static void lo_complete_rq(struct request *rq) struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); blk_status_t ret = BLK_STS_OK; - if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || + if (cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || req_op(rq) != REQ_OP_READ) { if (cmd->ret < 0) ret = errno_to_blk_status(cmd->ret); @@ -358,14 +292,13 @@ static void lo_complete_rq(struct request *rq) cmd->ret = 0; blk_mq_requeue_request(rq, true); } else { - if (cmd->use_aio) { - struct bio *bio = rq->bio; + struct bio *bio = rq->bio; - while (bio) { - zero_fill_bio(bio); - bio = bio->bi_next; - } + while (bio) { + zero_fill_bio(bio); + bio = bio->bi_next; } + ret = BLK_STS_IOERR; end_io: blk_mq_end_request(rq, ret); @@ -375,11 +308,14 @@ static void lo_complete_rq(struct request *rq) static void lo_rw_aio_do_completion(struct loop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); + struct loop_device *lo = rq->q->queuedata; if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; + if (req_op(rq) == REQ_OP_WRITE) + file_end_write(lo->lo_backing_file); if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } @@ -445,20 +381,26 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; - cmd->iocb.ki_complete = lo_rw_aio_complete; - cmd->iocb.ki_flags = IOCB_DIRECT; - cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + cmd->iocb.ki_ioprio = req_get_ioprio(rq); + if (cmd->use_aio) { + cmd->iocb.ki_complete = lo_rw_aio_complete; + cmd->iocb.ki_flags = IOCB_DIRECT; + } else { + cmd->iocb.ki_complete = NULL; + cmd->iocb.ki_flags = 0; + } - if (rw == ITER_SOURCE) + if (rw == ITER_SOURCE) { + file_start_write(lo->lo_backing_file); ret = file->f_op->write_iter(&cmd->iocb, &iter); - else + } else ret = file->f_op->read_iter(&cmd->iocb, &iter); lo_rw_aio_do_completion(cmd); if (ret != -EIOCBQUEUED) lo_rw_aio_complete(&cmd->iocb, ret); - return 0; + return -EIOCBQUEUED; } static int do_req_filebacked(struct loop_device *lo, struct request *rq) @@ -466,15 +408,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; - /* - * lo_write_simple and lo_read_simple should have been covered - * by io submit style function like lo_rw_aio(), one blocker - * is that lo_read_simple() need to call flush_dcache_page after - * the page is written from kernel, and it isn't easy to handle - * this in io submit style function which submits all segments - * of the req at one time. And direct read IO doesn't need to - * run flush_dcache_page(). - */ switch (req_op(rq)) { case REQ_OP_FLUSH: return lo_req_flush(lo, rq); @@ -490,15 +423,9 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) case REQ_OP_DISCARD: return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: - if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); - else - return lo_write_simple(lo, rq, pos); + return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); case REQ_OP_READ: - if (cmd->use_aio) - return lo_rw_aio(lo, cmd, pos, ITER_DEST); - else - return lo_read_simple(lo, rq, pos); + return lo_rw_aio(lo, cmd, pos, ITER_DEST); default: WARN_ON_ONCE(1); return -EIO; @@ -582,6 +509,17 @@ static void loop_assign_backing_file(struct loop_device *lo, struct file *file) lo->lo_min_dio_size = loop_query_min_dio_size(lo); } +static int loop_check_backing_file(struct file *file) +{ + if (!file->f_op->read_iter) + return -EINVAL; + + if ((file->f_mode & FMODE_WRITE) && !file->f_op->write_iter) + return -EINVAL; + + return 0; +} + /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up @@ -603,6 +541,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!file) return -EBADF; + error = loop_check_backing_file(file); + if (error) + return error; + /* suppress uevents while reconfiguring the device */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); @@ -662,19 +604,20 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, * dependency. */ fput(old_file); + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); if (partscan) loop_reread_partitions(lo); error = 0; done: - /* enable and uncork uevent now that we are done */ - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); return error; out_err: loop_global_unlock(lo, is_loop); out_putf: fput(file); + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); goto done; } @@ -1039,6 +982,11 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (!file) return -EBADF; + + error = loop_check_backing_file(file); + if (error) + return error; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ @@ -1129,8 +1077,8 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); - /* enable and uncork uevent now that we are done */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); loop_global_unlock(lo, is_loop); if (partscan) @@ -1921,7 +1869,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd) struct loop_device *lo = rq->q->queuedata; int ret = 0; struct mem_cgroup *old_memcg = NULL; - const bool use_aio = cmd->use_aio; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; @@ -1951,7 +1898,7 @@ static void loop_handle_cmd(struct loop_cmd *cmd) } failed: /* complete non-aio request */ - if (!use_aio || ret) { + if (ret != -EIOCBQUEUED) { if (ret == -EOPNOTSUPP) cmd->ret = ret; else diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 3bb9cee0a9b55a..aa163ae9b2aa5c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2031,7 +2031,7 @@ static int null_add_dev(struct nullb_device *dev) nullb->disk->minors = 1; nullb->disk->fops = &null_ops; nullb->disk->private_data = nullb; - strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN); + strscpy(nullb->disk->disk_name, nullb->disk_name); if (nullb->dev->zoned) { rv = null_register_zoned_dev(nullb); diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2fd05c1bd30b03..dc104c025cd568 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -122,15 +122,6 @@ struct ublk_uring_cmd_pdu { */ #define UBLK_IO_FLAG_OWNED_BY_SRV 0x02 -/* - * IO command is aborted, so this flag is set in case of - * !UBLK_IO_FLAG_ACTIVE. - * - * After this flag is observed, any pending or new incoming request - * associated with this io command will be failed immediately - */ -#define UBLK_IO_FLAG_ABORTED 0x04 - /* * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires * get data buffer address from ublksrv. @@ -199,8 +190,6 @@ struct ublk_device { struct completion completion; unsigned int nr_queues_ready; unsigned int nr_privileged_daemon; - - struct work_struct nosrv_work; }; /* header of ublk_params */ @@ -209,18 +198,13 @@ struct ublk_params_header { __u32 types; }; -static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq); - +static void ublk_stop_dev_unlocked(struct ublk_device *ub); +static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq); static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset); + const struct ublk_queue *ubq, int tag, size_t offset); static inline unsigned int ublk_req_build_flags(struct request *req); static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, int tag); -static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub) -{ - return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); -} - static inline bool ublk_dev_is_zoned(const struct ublk_device *ub) { return ub->dev_info.flags & UBLK_F_ZONED; @@ -620,14 +604,19 @@ static void ublk_apply_params(struct ublk_device *ub) ublk_dev_param_zoned_apply(ub); } +static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq) +{ + return ubq->flags & UBLK_F_SUPPORT_ZERO_COPY; +} + static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) { - return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); + return ubq->flags & UBLK_F_USER_COPY; } static inline bool ublk_need_map_io(const struct ublk_queue *ubq) { - return !ublk_support_user_copy(ubq); + return !ublk_support_user_copy(ubq) && !ublk_support_zero_copy(ubq); } static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) @@ -635,8 +624,11 @@ static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) /* * read()/write() is involved in user copy, so request reference * has to be grabbed + * + * for zero copy, request buffer need to be registered to io_uring + * buffer table, so reference is needed */ - return ublk_support_user_copy(ubq); + return ublk_support_user_copy(ubq) || ublk_support_zero_copy(ubq); } static inline void ublk_init_req_ref(const struct ublk_queue *ubq, @@ -1074,7 +1066,7 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) { - return ubq->ubq_daemon->flags & PF_EXITING; + return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING; } /* todo: handle partial completion */ @@ -1085,12 +1077,6 @@ static inline void __ublk_complete_rq(struct request *req) unsigned int unmapped_bytes; blk_status_t res = BLK_STS_OK; - /* called from ublk_abort_queue() code path */ - if (io->flags & UBLK_IO_FLAG_ABORTED) { - res = BLK_STS_IOERR; - goto exit; - } - /* failed read IO if nothing is read */ if (!io->res && req_op(req) == REQ_OP_READ) io->res = -EIO; @@ -1140,25 +1126,6 @@ static void ublk_complete_rq(struct kref *ref) __ublk_complete_rq(req); } -/* - * Since ublk_rq_task_work_cb always fails requests immediately during - * exiting, __ublk_fail_req() is only called from abort context during - * exiting. So lock is unnecessary. - * - * Also aborting may not be started yet, keep in mind that one failed - * request may be issued by block layer again. - */ -static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, - struct request *req) -{ - WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); - - if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) - blk_mq_requeue_request(req, false); - else - ublk_put_req_ref(ubq, req); -} - static void ubq_complete_io_cmd(struct ublk_io *io, int res, unsigned issue_flags) { @@ -1314,8 +1281,6 @@ static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) static enum blk_eh_timer_return ublk_timeout(struct request *rq) { struct ublk_queue *ubq = rq->mq_hctx->driver_data; - unsigned int nr_inflight = 0; - int i; if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) { if (!ubq->timeout) { @@ -1326,30 +1291,11 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) return BLK_EH_DONE; } - if (!ubq_daemon_is_dying(ubq)) - return BLK_EH_RESET_TIMER; - - for (i = 0; i < ubq->q_depth; i++) { - struct ublk_io *io = &ubq->ios[i]; - - if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) - nr_inflight++; - } - - /* cancelable uring_cmd can't help us if all commands are in-flight */ - if (nr_inflight == ubq->q_depth) { - struct ublk_device *ub = ubq->dev; - - if (ublk_abort_requests(ub, ubq)) { - schedule_work(&ub->nosrv_work); - } - return BLK_EH_DONE; - } - return BLK_EH_RESET_TIMER; } -static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) +static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq, + bool check_cancel) { blk_status_t res; @@ -1368,7 +1314,7 @@ static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; - if (unlikely(ubq->canceling)) + if (check_cancel && unlikely(ubq->canceling)) return BLK_STS_IOERR; /* fill iod to slot in io cmd buffer */ @@ -1387,7 +1333,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; blk_status_t res; - res = ublk_prep_req(ubq, rq); + res = ublk_prep_req(ubq, rq, false); if (res != BLK_STS_OK) return res; @@ -1419,7 +1365,7 @@ static void ublk_queue_rqs(struct rq_list *rqlist) ublk_queue_cmd_list(ubq, &submit_list); ubq = this_q; - if (ublk_prep_req(ubq, req) == BLK_STS_OK) + if (ublk_prep_req(ubq, req, true) == BLK_STS_OK) rq_list_add_tail(&submit_list, req); else rq_list_add_tail(&requeue_list, req); @@ -1447,6 +1393,37 @@ static const struct blk_mq_ops ublk_mq_ops = { .timeout = ublk_timeout, }; +static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) +{ + int i; + + /* All old ioucmds have to be completed */ + ubq->nr_io_ready = 0; + + /* + * old daemon is PF_EXITING, put it now + * + * It could be NULL in case of closing one quisced device. + */ + if (ubq->ubq_daemon) + put_task_struct(ubq->ubq_daemon); + /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ + ubq->ubq_daemon = NULL; + ubq->timeout = false; + + for (i = 0; i < ubq->q_depth; i++) { + struct ublk_io *io = &ubq->ios[i]; + + /* + * UBLK_IO_FLAG_CANCELED is kept for avoiding to touch + * io->cmd + */ + io->flags &= UBLK_IO_FLAG_CANCELED; + io->cmd = NULL; + io->addr = 0; + } +} + static int ublk_ch_open(struct inode *inode, struct file *filp) { struct ublk_device *ub = container_of(inode->i_cdev, @@ -1458,10 +1435,119 @@ static int ublk_ch_open(struct inode *inode, struct file *filp) return 0; } +static void ublk_reset_ch_dev(struct ublk_device *ub) +{ + int i; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_queue_reinit(ub, ublk_get_queue(ub, i)); + + /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ + ub->mm = NULL; + ub->nr_queues_ready = 0; + ub->nr_privileged_daemon = 0; +} + +static struct gendisk *ublk_get_disk(struct ublk_device *ub) +{ + struct gendisk *disk; + + spin_lock(&ub->lock); + disk = ub->ub_disk; + if (disk) + get_device(disk_to_dev(disk)); + spin_unlock(&ub->lock); + + return disk; +} + +static void ublk_put_disk(struct gendisk *disk) +{ + if (disk) + put_device(disk_to_dev(disk)); +} + static int ublk_ch_release(struct inode *inode, struct file *filp) { struct ublk_device *ub = filp->private_data; + struct gendisk *disk; + int i; + + /* + * disk isn't attached yet, either device isn't live, or it has + * been removed already, so we needn't to do anything + */ + disk = ublk_get_disk(ub); + if (!disk) + goto out; + + /* + * All uring_cmd are done now, so abort any request outstanding to + * the ublk server + * + * This can be done in lockless way because ublk server has been + * gone + * + * More importantly, we have to provide forward progress guarantee + * without holding ub->mutex, otherwise control task grabbing + * ub->mutex triggers deadlock + * + * All requests may be inflight, so ->canceling may not be set, set + * it now. + */ + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + ubq->canceling = true; + ublk_abort_queue(ub, ubq); + } + blk_mq_kick_requeue_list(disk->queue); + + /* + * All infligh requests have been completed or requeued and any new + * request will be failed or requeued via `->canceling` now, so it is + * fine to grab ub->mutex now. + */ + mutex_lock(&ub->mutex); + + /* double check after grabbing lock */ + if (!ub->ub_disk) + goto unlock; + + /* + * Transition the device to the nosrv state. What exactly this + * means depends on the recovery flags + */ + blk_mq_quiesce_queue(disk->queue); + if (ublk_nosrv_should_stop_dev(ub)) { + /* + * Allow any pending/future I/O to pass through quickly + * with an error. This is needed because del_gendisk + * waits for all pending I/O to complete + */ + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_get_queue(ub, i)->force_abort = true; + blk_mq_unquiesce_queue(disk->queue); + ublk_stop_dev_unlocked(ub); + } else { + if (ublk_nosrv_dev_should_queue_io(ub)) { + /* ->canceling is set and all requests are aborted */ + ub->dev_info.state = UBLK_S_DEV_QUIESCED; + } else { + ub->dev_info.state = UBLK_S_DEV_FAIL_IO; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_get_queue(ub, i)->fail_io = true; + } + blk_mq_unquiesce_queue(disk->queue); + } +unlock: + mutex_unlock(&ub->mutex); + ublk_put_disk(disk); + + /* all uring_cmd has been done now, reset device & ubq */ + ublk_reset_ch_dev(ub); +out: clear_bit(UB_STATE_OPEN, &ub->state); return 0; } @@ -1528,10 +1614,26 @@ static void ublk_commit_completion(struct ublk_device *ub, ublk_put_req_ref(ubq, req); } +static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, + struct request *req) +{ + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); + + if (ublk_nosrv_should_reissue_outstanding(ubq->dev)) + blk_mq_requeue_request(req, false); + else { + io->res = -EIO; + __ublk_complete_rq(req); + } +} + /* - * Called from ubq_daemon context via cancel fn, meantime quiesce ublk - * blk-mq queue, so we are called exclusively with blk-mq and ubq_daemon - * context, so everything is serialized. + * Called from ublk char device release handler, when any uring_cmd is + * done, meantime request queue is "quiesced" since all inflight requests + * can't be completed because ublk server is dead. + * + * So no one can hold our request IO reference any more, simply ignore the + * reference, and complete the request immediately */ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) { @@ -1548,46 +1650,29 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) * will do it */ rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); - if (rq && blk_mq_request_started(rq)) { - io->flags |= UBLK_IO_FLAG_ABORTED; + if (rq && blk_mq_request_started(rq)) __ublk_fail_req(ubq, io, rq); - } } } } /* Must be called when queue is frozen */ -static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) +static void ublk_mark_queue_canceling(struct ublk_queue *ubq) { - bool canceled; - spin_lock(&ubq->cancel_lock); - canceled = ubq->canceling; - if (!canceled) + if (!ubq->canceling) ubq->canceling = true; spin_unlock(&ubq->cancel_lock); - - return canceled; } -static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) +static void ublk_start_cancel(struct ublk_queue *ubq) { - bool was_canceled = ubq->canceling; - struct gendisk *disk; - - if (was_canceled) - return false; - - spin_lock(&ub->lock); - disk = ub->ub_disk; - if (disk) - get_device(disk_to_dev(disk)); - spin_unlock(&ub->lock); + struct ublk_device *ub = ubq->dev; + struct gendisk *disk = ublk_get_disk(ub); /* Our disk has been dead */ if (!disk) - return false; - + return; /* * Now we are serialized with ublk_queue_rq() * @@ -1596,25 +1681,36 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq) * touch completed uring_cmd */ blk_mq_quiesce_queue(disk->queue); - was_canceled = ublk_mark_queue_canceling(ubq); - if (!was_canceled) { - /* abort queue is for making forward progress */ - ublk_abort_queue(ub, ubq); - } + ublk_mark_queue_canceling(ubq); blk_mq_unquiesce_queue(disk->queue); - put_device(disk_to_dev(disk)); - - return !was_canceled; + ublk_put_disk(disk); } -static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, +static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag, unsigned int issue_flags) { + struct ublk_io *io = &ubq->ios[tag]; + struct ublk_device *ub = ubq->dev; + struct request *req; bool done; if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) return; + /* + * Don't try to cancel this command if the request is started for + * avoiding race between io_uring_cmd_done() and + * io_uring_cmd_complete_in_task(). + * + * Either the started request will be aborted via __ublk_abort_rq(), + * then this uring_cmd is canceled next time, or it will be done in + * task work function ublk_dispatch_req() because io_uring guarantees + * that ublk_dispatch_req() is always called + */ + req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); + if (req && blk_mq_request_started(req) && req->tag == tag) + return; + spin_lock(&ubq->cancel_lock); done = !!(io->flags & UBLK_IO_FLAG_CANCELED); if (!done) @@ -1628,6 +1724,17 @@ static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, /* * The ublk char device won't be closed when calling cancel fn, so both * ublk device and queue are guaranteed to be live + * + * Two-stage cancel: + * + * - make every active uring_cmd done in ->cancel_fn() + * + * - aborting inflight ublk IO requests in ublk char device release handler, + * which depends on 1st stage because device can only be closed iff all + * uring_cmd are done + * + * Do _not_ try to acquire ub->mutex before all inflight requests are + * aborted, otherwise deadlock may be caused. */ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -1635,9 +1742,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; struct task_struct *task; - struct ublk_device *ub; - bool need_schedule; - struct ublk_io *io; if (WARN_ON_ONCE(!ubq)) return; @@ -1649,16 +1753,11 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd, if (WARN_ON_ONCE(task && task != ubq->ubq_daemon)) return; - ub = ubq->dev; - need_schedule = ublk_abort_requests(ub, ubq); + if (!ubq->canceling) + ublk_start_cancel(ubq); - io = &ubq->ios[pdu->tag]; - WARN_ON_ONCE(io->cmd != cmd); - ublk_cancel_cmd(ubq, io, issue_flags); - - if (need_schedule) { - schedule_work(&ub->nosrv_work); - } + WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd); + ublk_cancel_cmd(ubq, pdu->tag, issue_flags); } static inline bool ublk_queue_ready(struct ublk_queue *ubq) @@ -1671,7 +1770,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) int i; for (i = 0; i < ubq->q_depth; i++) - ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED); + ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED); } /* Cancel all pending commands, must be called after del_gendisk() returns */ @@ -1709,33 +1808,20 @@ static void ublk_wait_tagset_rqs_idle(struct ublk_device *ub) } } -static void __ublk_quiesce_dev(struct ublk_device *ub) -{ - pr_devel("%s: quiesce ub: dev_id %d state %s\n", - __func__, ub->dev_info.dev_id, - ub->dev_info.state == UBLK_S_DEV_LIVE ? - "LIVE" : "QUIESCED"); - blk_mq_quiesce_queue(ub->ub_disk->queue); - ublk_wait_tagset_rqs_idle(ub); - ub->dev_info.state = UBLK_S_DEV_QUIESCED; -} - -static void ublk_unquiesce_dev(struct ublk_device *ub) +static void ublk_force_abort_dev(struct ublk_device *ub) { int i; - pr_devel("%s: unquiesce ub: dev_id %d state %s\n", + pr_devel("%s: force abort ub: dev_id %d state %s\n", __func__, ub->dev_info.dev_id, ub->dev_info.state == UBLK_S_DEV_LIVE ? "LIVE" : "QUIESCED"); - /* quiesce_work has run. We let requeued rqs be aborted - * before running fallback_wq. "force_abort" must be seen - * after request queue is unqiuesced. Then del_gendisk() - * can move on. - */ + blk_mq_quiesce_queue(ub->ub_disk->queue); + if (ub->dev_info.state == UBLK_S_DEV_LIVE) + ublk_wait_tagset_rqs_idle(ub); + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) ublk_get_queue(ub, i)->force_abort = true; - blk_mq_unquiesce_queue(ub->ub_disk->queue); /* We may have requeued some rqs in ublk_quiesce_queue() */ blk_mq_kick_requeue_list(ub->ub_disk->queue); @@ -1756,61 +1842,51 @@ static struct gendisk *ublk_detach_disk(struct ublk_device *ub) return disk; } -static void ublk_stop_dev(struct ublk_device *ub) +static void ublk_stop_dev_unlocked(struct ublk_device *ub) + __must_hold(&ub->mutex) { struct gendisk *disk; - mutex_lock(&ub->mutex); if (ub->dev_info.state == UBLK_S_DEV_DEAD) - goto unlock; - if (ublk_nosrv_dev_should_queue_io(ub)) { - if (ub->dev_info.state == UBLK_S_DEV_LIVE) - __ublk_quiesce_dev(ub); - ublk_unquiesce_dev(ub); - } + return; + + if (ublk_nosrv_dev_should_queue_io(ub)) + ublk_force_abort_dev(ub); del_gendisk(ub->ub_disk); disk = ublk_detach_disk(ub); put_disk(disk); - unlock: +} + +static void ublk_stop_dev(struct ublk_device *ub) +{ + mutex_lock(&ub->mutex); + ublk_stop_dev_unlocked(ub); mutex_unlock(&ub->mutex); ublk_cancel_dev(ub); } -static void ublk_nosrv_work(struct work_struct *work) +/* reset ublk io_uring queue & io flags */ +static void ublk_reset_io_flags(struct ublk_device *ub) { - struct ublk_device *ub = - container_of(work, struct ublk_device, nosrv_work); - int i; - - if (ublk_nosrv_should_stop_dev(ub)) { - ublk_stop_dev(ub); - return; - } + int i, j; - mutex_lock(&ub->mutex); - if (ub->dev_info.state != UBLK_S_DEV_LIVE) - goto unlock; + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); - if (ublk_nosrv_dev_should_queue_io(ub)) { - __ublk_quiesce_dev(ub); - } else { - blk_mq_quiesce_queue(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_FAIL_IO; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - ublk_get_queue(ub, i)->fail_io = true; - } - blk_mq_unquiesce_queue(ub->ub_disk->queue); + /* UBLK_IO_FLAG_CANCELED can be cleared now */ + spin_lock(&ubq->cancel_lock); + for (j = 0; j < ubq->q_depth; j++) + ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED; + spin_unlock(&ubq->cancel_lock); + ubq->canceling = false; + ubq->fail_io = false; } - - unlock: - mutex_unlock(&ub->mutex); - ublk_cancel_dev(ub); } /* device can only be started after all IOs are ready */ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) + __must_hold(&ub->mutex) { - mutex_lock(&ub->mutex); ubq->nr_io_ready++; if (ublk_queue_ready(ubq)) { ubq->ubq_daemon = current; @@ -1820,18 +1896,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) if (capable(CAP_SYS_ADMIN)) ub->nr_privileged_daemon++; } - if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) - complete_all(&ub->completion); - mutex_unlock(&ub->mutex); -} - -static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, - int tag) -{ - struct ublk_queue *ubq = ublk_get_queue(ub, q_id); - struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - ublk_queue_cmd(ubq, req); + if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) { + /* now we are ready for handling ublk io request */ + ublk_reset_io_flags(ub); + complete_all(&ub->completion); + } } static inline int ublk_check_cmd_op(u32 cmd_op) @@ -1879,13 +1949,20 @@ static void ublk_io_release(void *priv) } static int ublk_register_io_buf(struct io_uring_cmd *cmd, - struct ublk_queue *ubq, unsigned int tag, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { struct ublk_device *ub = cmd->file->private_data; + const struct ublk_io *io = &ubq->ios[tag]; struct request *req; int ret; + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + req = __ublk_check_and_get_req(ub, ubq, tag, 0); if (!req) return -EINVAL; @@ -1901,11 +1978,66 @@ static int ublk_register_io_buf(struct io_uring_cmd *cmd, } static int ublk_unregister_io_buf(struct io_uring_cmd *cmd, + const struct ublk_queue *ubq, unsigned int tag, unsigned int index, unsigned int issue_flags) { + const struct ublk_io *io = &ubq->ios[tag]; + + if (!ublk_support_zero_copy(ubq)) + return -EINVAL; + + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + return -EINVAL; + return io_buffer_unregister_bvec(cmd, index, issue_flags); } +static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq, + struct ublk_io *io, __u64 buf_addr) +{ + struct ublk_device *ub = ubq->dev; + int ret = 0; + + /* + * When handling FETCH command for setting up ublk uring queue, + * ub->mutex is the innermost lock, and we won't block for handling + * FETCH, so it is fine even for IO_URING_F_NONBLOCK. + */ + mutex_lock(&ub->mutex); + /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ + if (ublk_queue_ready(ubq)) { + ret = -EBUSY; + goto out; + } + + /* allow each command to be FETCHed at most once */ + if (io->flags & UBLK_IO_FLAG_ACTIVE) { + ret = -EINVAL; + goto out; + } + + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV); + + if (ublk_need_map_io(ubq)) { + /* + * FETCH_RQ has to provide IO buffer if NEED GET + * DATA is not enabled + */ + if (!buf_addr && !ublk_need_get_data(ubq)) + goto out; + } else if (buf_addr) { + /* User copy requires addr to be unset */ + ret = -EINVAL; + goto out; + } + + ublk_fill_io_cmd(io, cmd, buf_addr); + ublk_mark_io_ready(ub, ubq); +out: + mutex_unlock(&ub->mutex); + return ret; +} + static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, const struct ublksrv_io_cmd *ub_cmd) @@ -1960,35 +2092,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, case UBLK_IO_REGISTER_IO_BUF: return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_UNREGISTER_IO_BUF: - return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags); + return ublk_unregister_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags); case UBLK_IO_FETCH_REQ: - /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ - if (ublk_queue_ready(ubq)) { - ret = -EBUSY; - goto out; - } - /* - * The io is being handled by server, so COMMIT_RQ is expected - * instead of FETCH_REQ - */ - if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) - goto out; - - if (ublk_need_map_io(ubq)) { - /* - * FETCH_RQ has to provide IO buffer if NEED GET - * DATA is not enabled - */ - if (!ub_cmd->addr && !ublk_need_get_data(ubq)) - goto out; - } else if (ub_cmd->addr) { - /* User copy requires addr to be unset */ - ret = -EINVAL; + ret = ublk_fetch(cmd, ubq, io, ub_cmd->addr); + if (ret) goto out; - } - - ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - ublk_mark_io_ready(ub, ubq); break; case UBLK_IO_COMMIT_AND_FETCH_REQ: req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); @@ -2020,8 +2128,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; ublk_fill_io_cmd(io, cmd, ub_cmd->addr); - ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); - break; + req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag); + ublk_dispatch_req(ubq, req, issue_flags); + return -EIOCBQUEUED; default: goto out; } @@ -2035,13 +2144,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, } static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub, - struct ublk_queue *ubq, int tag, size_t offset) + const struct ublk_queue *ubq, int tag, size_t offset) { struct request *req; - if (!ublk_need_req_ref(ubq)) - return NULL; - req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag); if (!req) return NULL; @@ -2155,6 +2261,9 @@ static struct request *ublk_check_and_get_req(struct kiocb *iocb, if (!ubq) return ERR_PTR(-EINVAL); + if (!ublk_support_user_copy(ubq)) + return ERR_PTR(-EACCES); + if (tag >= ubq->q_depth) return ERR_PTR(-EINVAL); @@ -2388,7 +2497,6 @@ static void ublk_remove(struct ublk_device *ub) bool unprivileged; ublk_stop_dev(ub); - cancel_work_sync(&ub->nosrv_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV; ublk_put_device(ub); @@ -2413,9 +2521,9 @@ static struct ublk_device *ublk_get_device_from_id(int idx) return ub; } -static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) +static int ublk_ctrl_start_dev(struct ublk_device *ub, + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); const struct ublk_param_basic *p = &ub->params.basic; int ublksrv_pid = (int)header->data[0]; struct queue_limits lim = { @@ -2534,9 +2642,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) } static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; cpumask_var_t cpumask; unsigned long queue; @@ -2585,9 +2692,8 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) info->nr_hw_queues, info->queue_depth); } -static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) +static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublksrv_ctrl_dev_info info; struct ublk_device *ub; @@ -2675,7 +2781,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) goto out_unlock; mutex_init(&ub->mutex); spin_lock_init(&ub->lock); - INIT_WORK(&ub->nosrv_work, ublk_nosrv_work); ret = ublk_alloc_dev_number(ub, header->dev_id); if (ret < 0) @@ -2697,13 +2802,18 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE | UBLK_F_URING_CMD_COMP_IN_TASK; - /* GET_DATA isn't needed any more with USER_COPY */ - if (ublk_dev_is_user_copy(ub)) + /* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */ + if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA; - /* Zoned storage support requires user copy feature */ + /* + * Zoned storage support requires reuse `ublksrv_io_cmd->addr` for + * returning write_append_lba, which is only allowed in case of + * user copy or zero copy + */ if (ublk_dev_is_zoned(ub) && - (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !ublk_dev_is_user_copy(ub))) { + (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags & + (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) { ret = -EINVAL; goto out_free_dev_number; } @@ -2807,14 +2917,12 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) static int ublk_ctrl_stop_dev(struct ublk_device *ub) { ublk_stop_dev(ub); - cancel_work_sync(&ub->nosrv_work); return 0; } static int ublk_ctrl_get_dev_info(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) @@ -2843,9 +2951,8 @@ static void ublk_ctrl_fill_params_devt(struct ublk_device *ub) } static int ublk_ctrl_get_params(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret; @@ -2874,9 +2981,8 @@ static int ublk_ctrl_get_params(struct ublk_device *ub, } static int ublk_ctrl_set_params(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_params_header ph; int ret = -EFAULT; @@ -2914,43 +3020,14 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, return ret; } -static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq) -{ - int i; - - WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq))); - - /* All old ioucmds have to be completed */ - ubq->nr_io_ready = 0; - /* old daemon is PF_EXITING, put it now */ - put_task_struct(ubq->ubq_daemon); - /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */ - ubq->ubq_daemon = NULL; - ubq->timeout = false; - ubq->canceling = false; - - for (i = 0; i < ubq->q_depth; i++) { - struct ublk_io *io = &ubq->ios[i]; - - /* forget everything now and be ready for new FETCH_REQ */ - io->flags = 0; - io->cmd = NULL; - io->addr = 0; - } -} - static int ublk_ctrl_start_recovery(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ret = -EINVAL; - int i; mutex_lock(&ub->mutex); if (ublk_nosrv_should_stop_dev(ub)) goto out_unlock; - if (!ub->nr_queues_ready) - goto out_unlock; /* * START_RECOVERY is only allowd after: * @@ -2974,12 +3051,6 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, goto out_unlock; } pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id); - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) - ublk_queue_reinit(ub, ublk_get_queue(ub, i)); - /* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */ - ub->mm = NULL; - ub->nr_queues_ready = 0; - ub->nr_privileged_daemon = 0; init_completion(&ub->completion); ret = 0; out_unlock: @@ -2988,12 +3059,10 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, } static int ublk_ctrl_end_recovery(struct ublk_device *ub, - struct io_uring_cmd *cmd) + const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); int ublksrv_pid = (int)header->data[0]; int ret = -EINVAL; - int i; pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n", __func__, ub->dev_info.nr_hw_queues, header->dev_id); @@ -3013,33 +3082,18 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub, goto out_unlock; } ub->dev_info.ublksrv_pid = ublksrv_pid; + ub->dev_info.state = UBLK_S_DEV_LIVE; pr_devel("%s: new ublksrv_pid %d, dev id %d\n", __func__, ublksrv_pid, header->dev_id); - - if (ublk_nosrv_dev_should_queue_io(ub)) { - ub->dev_info.state = UBLK_S_DEV_LIVE; - blk_mq_unquiesce_queue(ub->ub_disk->queue); - pr_devel("%s: queue unquiesced, dev id %d.\n", - __func__, header->dev_id); - blk_mq_kick_requeue_list(ub->ub_disk->queue); - } else { - blk_mq_quiesce_queue(ub->ub_disk->queue); - ub->dev_info.state = UBLK_S_DEV_LIVE; - for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { - ublk_get_queue(ub, i)->fail_io = false; - } - blk_mq_unquiesce_queue(ub->ub_disk->queue); - } - + blk_mq_kick_requeue_list(ub->ub_disk->queue); ret = 0; out_unlock: mutex_unlock(&ub->mutex); return ret; } -static int ublk_ctrl_get_features(struct io_uring_cmd *cmd) +static int ublk_ctrl_get_features(const struct ublksrv_ctrl_cmd *header) { - const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe); void __user *argp = (void __user *)(unsigned long)header->addr; u64 features = UBLK_F_ALL; @@ -3178,7 +3232,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, goto out; if (cmd_op == UBLK_U_CMD_GET_FEATURES) { - ret = ublk_ctrl_get_features(cmd); + ret = ublk_ctrl_get_features(header); goto out; } @@ -3195,17 +3249,17 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, switch (_IOC_NR(cmd_op)) { case UBLK_CMD_START_DEV: - ret = ublk_ctrl_start_dev(ub, cmd); + ret = ublk_ctrl_start_dev(ub, header); break; case UBLK_CMD_STOP_DEV: ret = ublk_ctrl_stop_dev(ub); break; case UBLK_CMD_GET_DEV_INFO: case UBLK_CMD_GET_DEV_INFO2: - ret = ublk_ctrl_get_dev_info(ub, cmd); + ret = ublk_ctrl_get_dev_info(ub, header); break; case UBLK_CMD_ADD_DEV: - ret = ublk_ctrl_add_dev(cmd); + ret = ublk_ctrl_add_dev(header); break; case UBLK_CMD_DEL_DEV: ret = ublk_ctrl_del_dev(&ub, true); @@ -3214,19 +3268,19 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, ret = ublk_ctrl_del_dev(&ub, false); break; case UBLK_CMD_GET_QUEUE_AFFINITY: - ret = ublk_ctrl_get_queue_affinity(ub, cmd); + ret = ublk_ctrl_get_queue_affinity(ub, header); break; case UBLK_CMD_GET_PARAMS: - ret = ublk_ctrl_get_params(ub, cmd); + ret = ublk_ctrl_get_params(ub, header); break; case UBLK_CMD_SET_PARAMS: - ret = ublk_ctrl_set_params(ub, cmd); + ret = ublk_ctrl_set_params(ub, header); break; case UBLK_CMD_START_USER_RECOVERY: - ret = ublk_ctrl_start_recovery(ub, cmd); + ret = ublk_ctrl_start_recovery(ub, header); break; case UBLK_CMD_END_USER_RECOVERY: - ret = ublk_ctrl_end_recovery(ub, cmd); + ret = ublk_ctrl_end_recovery(ub, header); break; default: ret = -EOPNOTSUPP; diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 48e2f400957bc9..46d9bbd8e411b3 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2719,7 +2719,7 @@ static int btintel_uefi_get_dsbr(u32 *dsbr_var) } __packed data; efi_status_t status; - unsigned long data_size = 0; + unsigned long data_size = sizeof(data); efi_guid_t guid = EFI_GUID(0xe65d8884, 0xd4af, 0x4b20, 0x8d, 0x03, 0x77, 0x2e, 0xcc, 0x3d, 0xa5, 0x31); @@ -2729,16 +2729,10 @@ static int btintel_uefi_get_dsbr(u32 *dsbr_var) if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return -EOPNOTSUPP; - status = efi.get_variable(BTINTEL_EFI_DSBR, &guid, NULL, &data_size, - NULL); - - if (status != EFI_BUFFER_TOO_SMALL || !data_size) - return -EIO; - status = efi.get_variable(BTINTEL_EFI_DSBR, &guid, NULL, &data_size, &data); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS || data_size != sizeof(data)) return -ENXIO; *dsbr_var = data.dsbr; diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index c1e69fcc9c4fac..385e29367dd1df 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -303,8 +303,13 @@ static int btintel_pcie_submit_rx(struct btintel_pcie_data *data) static int btintel_pcie_start_rx(struct btintel_pcie_data *data) { int i, ret; + struct rxq *rxq = &data->rxq; + + /* Post (BTINTEL_PCIE_RX_DESCS_COUNT - 3) buffers to overcome the + * hardware issues leading to race condition at the firmware. + */ - for (i = 0; i < BTINTEL_PCIE_RX_MAX_QUEUE; i++) { + for (i = 0; i < rxq->count - 3; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; @@ -957,8 +962,10 @@ static int btintel_pcie_recv_event(struct hci_dev *hdev, struct sk_buff *skb) /* This is a debug event that comes from IML and OP image when it * starts execution. There is no need pass this event to stack. */ - if (skb->data[2] == 0x97) + if (skb->data[2] == 0x97) { + hci_recv_diag(hdev, skb); return 0; + } } return hci_recv_frame(hdev, skb); @@ -974,7 +981,6 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, u8 pkt_type; u16 plen; u32 pcie_pkt_type; - struct sk_buff *new_skb; void *pdata; struct hci_dev *hdev = data->hdev; @@ -1051,24 +1057,20 @@ static int btintel_pcie_recv_frame(struct btintel_pcie_data *data, bt_dev_dbg(hdev, "pkt_type: 0x%2.2x len: %u", pkt_type, plen); - new_skb = bt_skb_alloc(plen, GFP_ATOMIC); - if (!new_skb) { - bt_dev_err(hdev, "Failed to allocate memory for skb of len: %u", - skb->len); - ret = -ENOMEM; - goto exit_error; - } - - hci_skb_pkt_type(new_skb) = pkt_type; - skb_put_data(new_skb, skb->data, plen); + hci_skb_pkt_type(skb) = pkt_type; hdev->stat.byte_rx += plen; + skb_trim(skb, plen); if (pcie_pkt_type == BTINTEL_PCIE_HCI_EVT_PKT) - ret = btintel_pcie_recv_event(hdev, new_skb); + ret = btintel_pcie_recv_event(hdev, skb); else - ret = hci_recv_frame(hdev, new_skb); + ret = hci_recv_frame(hdev, skb); + skb = NULL; /* skb is freed in the callee */ exit_error: + if (skb) + kfree_skb(skb); + if (ret) hdev->stat.err_rx++; @@ -1202,8 +1204,6 @@ static void btintel_pcie_rx_work(struct work_struct *work) struct btintel_pcie_data *data = container_of(work, struct btintel_pcie_data, rx_work); struct sk_buff *skb; - int err; - struct hci_dev *hdev = data->hdev; if (test_bit(BTINTEL_PCIE_HWEXP_INPROGRESS, &data->flags)) { /* Unlike usb products, controller will not send hardware @@ -1224,11 +1224,7 @@ static void btintel_pcie_rx_work(struct work_struct *work) /* Process the sk_buf in queue and send to the HCI layer */ while ((skb = skb_dequeue(&data->rx_skb_q))) { - err = btintel_pcie_recv_frame(data, skb); - if (err) - bt_dev_err(hdev, "Failed to send received frame: %d", - err); - kfree_skb(skb); + btintel_pcie_recv_frame(data, skb); } } @@ -1281,10 +1277,8 @@ static void btintel_pcie_msix_rx_handle(struct btintel_pcie_data *data) bt_dev_dbg(hdev, "RXQ: cr_hia: %u cr_tia: %u", cr_hia, cr_tia); /* Check CR_TIA and CR_HIA for change */ - if (cr_tia == cr_hia) { - bt_dev_warn(hdev, "RXQ: no new CD found"); + if (cr_tia == cr_hia) return; - } rxq = &data->rxq; @@ -1320,6 +1314,16 @@ static irqreturn_t btintel_pcie_msix_isr(int irq, void *data) return IRQ_WAKE_THREAD; } +static inline bool btintel_pcie_is_rxq_empty(struct btintel_pcie_data *data) +{ + return data->ia.cr_hia[BTINTEL_PCIE_RXQ_NUM] == data->ia.cr_tia[BTINTEL_PCIE_RXQ_NUM]; +} + +static inline bool btintel_pcie_is_txackq_empty(struct btintel_pcie_data *data) +{ + return data->ia.cr_tia[BTINTEL_PCIE_TXQ_NUM] == data->ia.cr_hia[BTINTEL_PCIE_TXQ_NUM]; +} + static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) { struct msix_entry *entry = dev_id; @@ -1351,12 +1355,18 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) btintel_pcie_msix_gp0_handler(data); /* For TX */ - if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) + if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) { btintel_pcie_msix_tx_handle(data); + if (!btintel_pcie_is_rxq_empty(data)) + btintel_pcie_msix_rx_handle(data); + } /* For RX */ - if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_1) + if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_1) { btintel_pcie_msix_rx_handle(data); + if (!btintel_pcie_is_txackq_empty(data)) + btintel_pcie_msix_tx_handle(data); + } /* * Before sending the interrupt the HW disables it to prevent a nested @@ -1659,8 +1669,8 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) * + size of index * Number of queues(2) * type of index array(4) * + size of context information */ - total = (sizeof(struct tfd) + sizeof(struct urbd0) + sizeof(struct frbd) - + sizeof(struct urbd1)) * BTINTEL_DESCS_COUNT; + total = (sizeof(struct tfd) + sizeof(struct urbd0)) * BTINTEL_PCIE_TX_DESCS_COUNT; + total += (sizeof(struct frbd) + sizeof(struct urbd1)) * BTINTEL_PCIE_RX_DESCS_COUNT; /* Add the sum of size of index array and size of ci struct */ total += (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4) + sizeof(struct ctx_info); @@ -1685,36 +1695,36 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) data->dma_v_addr = v_addr; /* Setup descriptor count */ - data->txq.count = BTINTEL_DESCS_COUNT; - data->rxq.count = BTINTEL_DESCS_COUNT; + data->txq.count = BTINTEL_PCIE_TX_DESCS_COUNT; + data->rxq.count = BTINTEL_PCIE_RX_DESCS_COUNT; /* Setup tfds */ data->txq.tfds_p_addr = p_addr; data->txq.tfds = v_addr; - p_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup urbd0 */ data->txq.urbd0s_p_addr = p_addr; data->txq.urbd0s = v_addr; - p_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup FRBD*/ data->rxq.frbds_p_addr = p_addr; data->rxq.frbds = v_addr; - p_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup urbd1 */ data->rxq.urbd1s_p_addr = p_addr; data->rxq.urbd1s = v_addr; - p_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup data buffers for txq */ err = btintel_pcie_setup_txq_bufs(data, &data->txq); diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 873178019cad09..a94910ccd5d3c2 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -135,8 +135,11 @@ enum btintel_pcie_tlv_type { /* Default interrupt timeout in msec */ #define BTINTEL_DEFAULT_INTR_TIMEOUT_MS 3000 -/* The number of descriptors in TX/RX queues */ -#define BTINTEL_DESCS_COUNT 16 +/* The number of descriptors in TX queues */ +#define BTINTEL_PCIE_TX_DESCS_COUNT 32 + +/* The number of descriptors in RX queues */ +#define BTINTEL_PCIE_RX_DESCS_COUNT 64 /* Number of Queue for TX and RX * It indicates the index of the IA(Index Array) @@ -158,9 +161,6 @@ enum { /* Doorbell vector for TFD */ #define BTINTEL_PCIE_TX_DB_VEC 0 -/* Number of pending RX requests for downlink */ -#define BTINTEL_PCIE_RX_MAX_QUEUE 6 - /* Doorbell vector for FRBD */ #define BTINTEL_PCIE_RX_DB_VEC 513 diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index edd5eead1e93b0..1d26207b2ba70a 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -723,6 +723,10 @@ static int btmtksdio_close(struct hci_dev *hdev) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); + /* Skip btmtksdio_close if BTMTKSDIO_FUNC_ENABLED isn't set */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) + return 0; + sdio_claim_host(bdev->func); /* Disable interrupt */ @@ -1443,11 +1447,15 @@ static void btmtksdio_remove(struct sdio_func *func) if (!bdev) return; + hdev = bdev->hdev; + + /* Make sure to call btmtksdio_close before removing sdio card */ + if (test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) + btmtksdio_close(hdev); + /* Be consistent the state in btmtksdio_probe */ pm_runtime_get_noresume(bdev->dev); - hdev = bdev->hdev; - sdio_set_drvdata(func, NULL); hci_unregister_dev(hdev); hci_free_dev(hdev); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 5091dea762a073..604ab2bba231c5 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1286,7 +1286,9 @@ static void nxp_coredump(struct hci_dev *hdev) u8 pcmd = 2; skb = nxp_drv_send_cmd(hdev, HCI_NXP_TRIGGER_DUMP, 1, &pcmd); - if (!IS_ERR(skb)) + if (IS_ERR(skb)) + bt_dev_err(hdev, "Failed to trigger FW Dump. (%ld)", PTR_ERR(skb)); + else kfree_skb(skb); } @@ -1445,9 +1447,6 @@ static int nxp_shutdown(struct hci_dev *hdev) /* HCI_NXP_IND_RESET command may not returns any response */ if (!IS_ERR(skb)) kfree_skb(skb); - } else if (nxpdev->current_baudrate != nxpdev->fw_init_baudrate) { - nxpdev->new_baudrate = nxpdev->fw_init_baudrate; - nxp_set_baudrate_cmd(hdev, NULL); } return 0; @@ -1799,13 +1798,15 @@ static void nxp_serdev_remove(struct serdev_device *serdev) clear_bit(BTNXPUART_FW_DOWNLOADING, &nxpdev->tx_state); wake_up_interruptible(&nxpdev->check_boot_sign_wait_q); wake_up_interruptible(&nxpdev->fw_dnld_done_wait_q); - } - - if (test_bit(HCI_RUNNING, &hdev->flags)) { - /* Ensure shutdown callback is executed before unregistering, so - * that baudrate is reset to initial value. + } else { + /* Restore FW baudrate to fw_init_baudrate if changed. + * This will ensure FW baudrate is in sync with + * driver baudrate in case this driver is re-inserted. */ - nxp_shutdown(hdev); + if (nxpdev->current_baudrate != nxpdev->fw_init_baudrate) { + nxpdev->new_baudrate = nxpdev->fw_init_baudrate; + nxp_set_baudrate_cmd(hdev, NULL); + } } ps_cleanup(nxpdev); diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 3d6778b95e0058..edefb9dc76aa1a 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -889,7 +889,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, if (le32_to_cpu(ver.soc_id) == QCA_WCN3950_SOC_ID_T) variant = "t"; else if (le32_to_cpu(ver.soc_id) == QCA_WCN3950_SOC_ID_S) - variant = "u"; + variant = "s"; snprintf(config.fwname, sizeof(config.fwname), "qca/cmnv%02x%s.bin", rom_ver, variant); diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index d3eba0d4a57d3b..7838c89e529e0c 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1215,6 +1215,8 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, rtl_dev_err(hdev, "mandatory config file %s not found", btrtl_dev->ic_info->cfg_name); ret = btrtl_dev->cfg_len; + if (!ret) + ret = -EINVAL; goto err_free; } } diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5012b5ff92c8a3..256b451bbe065f 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3010,55 +3010,27 @@ static void btusb_coredump_qca(struct hci_dev *hdev) bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err); } -/* - * ==0: not a dump pkt. - * < 0: fails to handle a dump pkt - * > 0: otherwise. - */ +/* Return: 0 on success, negative errno on failure. */ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - int ret = 1; + int ret = 0; + unsigned int skip = 0; u8 pkt_type; - u8 *sk_ptr; - unsigned int sk_len; u16 seqno; u32 dump_size; - struct hci_event_hdr *event_hdr; - struct hci_acl_hdr *acl_hdr; struct qca_dump_hdr *dump_hdr; struct btusb_data *btdata = hci_get_drvdata(hdev); struct usb_device *udev = btdata->udev; pkt_type = hci_skb_pkt_type(skb); - sk_ptr = skb->data; - sk_len = skb->len; - - if (pkt_type == HCI_ACLDATA_PKT) { - acl_hdr = hci_acl_hdr(skb); - if (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE) - return 0; - sk_ptr += HCI_ACL_HDR_SIZE; - sk_len -= HCI_ACL_HDR_SIZE; - event_hdr = (struct hci_event_hdr *)sk_ptr; - } else { - event_hdr = hci_event_hdr(skb); - } - - if ((event_hdr->evt != HCI_VENDOR_PKT) - || (event_hdr->plen != (sk_len - HCI_EVENT_HDR_SIZE))) - return 0; - - sk_ptr += HCI_EVENT_HDR_SIZE; - sk_len -= HCI_EVENT_HDR_SIZE; + skip = sizeof(struct hci_event_hdr); + if (pkt_type == HCI_ACLDATA_PKT) + skip += sizeof(struct hci_acl_hdr); - dump_hdr = (struct qca_dump_hdr *)sk_ptr; - if ((sk_len < offsetof(struct qca_dump_hdr, data)) - || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) - || (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) - return 0; + skb_pull(skb, skip); + dump_hdr = (struct qca_dump_hdr *)skb->data; - /*it is dump pkt now*/ seqno = le16_to_cpu(dump_hdr->seqno); if (seqno == 0) { set_bit(BTUSB_HW_SSR_ACTIVE, &btdata->flags); @@ -3078,16 +3050,15 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) btdata->qca_dump.ram_dump_size = dump_size; btdata->qca_dump.ram_dump_seqno = 0; - sk_ptr += offsetof(struct qca_dump_hdr, data0); - sk_len -= offsetof(struct qca_dump_hdr, data0); + + skb_pull(skb, offsetof(struct qca_dump_hdr, data0)); usb_disable_autosuspend(udev); bt_dev_info(hdev, "%s memdump size(%u)\n", (pkt_type == HCI_ACLDATA_PKT) ? "ACL" : "event", dump_size); } else { - sk_ptr += offsetof(struct qca_dump_hdr, data); - sk_len -= offsetof(struct qca_dump_hdr, data); + skb_pull(skb, offsetof(struct qca_dump_hdr, data)); } if (!btdata->qca_dump.ram_dump_size) { @@ -3107,7 +3078,6 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) return ret; } - skb_pull(skb, skb->len - sk_len); hci_devcd_append(hdev, skb); btdata->qca_dump.ram_dump_seqno++; if (seqno == QCA_LAST_SEQUENCE_NUM) { @@ -3132,17 +3102,74 @@ static int handle_dump_pkt_qca(struct hci_dev *hdev, struct sk_buff *skb) return ret; } +/* Return: true if the ACL packet is a dump packet, false otherwise. */ +static bool acl_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_event_hdr *event_hdr; + struct hci_acl_hdr *acl_hdr; + struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; + + if (!clone) + return false; + + acl_hdr = skb_pull_data(clone, sizeof(*acl_hdr)); + if (!acl_hdr || (le16_to_cpu(acl_hdr->handle) != QCA_MEMDUMP_ACL_HANDLE)) + goto out; + + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; + + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; + + is_dump = true; +out: + consume_skb(clone); + return is_dump; +} + +/* Return: true if the event packet is a dump packet, false otherwise. */ +static bool evt_pkt_is_dump_qca(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_event_hdr *event_hdr; + struct qca_dump_hdr *dump_hdr; + struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); + bool is_dump = false; + + if (!clone) + return false; + + event_hdr = skb_pull_data(clone, sizeof(*event_hdr)); + if (!event_hdr || (event_hdr->evt != HCI_VENDOR_PKT)) + goto out; + + dump_hdr = skb_pull_data(clone, sizeof(*dump_hdr)); + if (!dump_hdr || (dump_hdr->vse_class != QCA_MEMDUMP_VSE_CLASS) || + (dump_hdr->msg_type != QCA_MEMDUMP_MSG_TYPE)) + goto out; + + is_dump = true; +out: + consume_skb(clone); + return is_dump; +} + static int btusb_recv_acl_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (acl_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } static int btusb_recv_evt_qca(struct hci_dev *hdev, struct sk_buff *skb) { - if (handle_dump_pkt_qca(hdev, skb)) - return 0; + if (evt_pkt_is_dump_qca(hdev, skb)) + return handle_dump_pkt_qca(hdev, skb); return hci_recv_frame(hdev, skb); } diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index e00590ba24fdbb..a2dc39c005f4f8 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2415,14 +2415,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (IS_ERR(qcadev->bt_en) && - (data->soc_type == QCA_WCN6750 || - data->soc_type == QCA_WCN6855)) { - dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n"); - return PTR_ERR(qcadev->bt_en); - } + if (IS_ERR(qcadev->bt_en)) + return dev_err_probe(&serdev->dev, + PTR_ERR(qcadev->bt_en), + "failed to acquire BT_EN gpio\n"); - if (!qcadev->bt_en) + if (!qcadev->bt_en && + (data->soc_type == QCA_WCN6750 || + data->soc_type == QCA_WCN6855)) power_ctrl_enabled = false; qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl", diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index a51935d37e5d71..59f4d7bdffdcb5 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -289,18 +289,18 @@ static void vhci_coredump(struct hci_dev *hdev) static void vhci_coredump_hdr(struct hci_dev *hdev, struct sk_buff *skb) { - char buf[80]; + const char *buf; - snprintf(buf, sizeof(buf), "Controller Name: vhci_ctrl\n"); + buf = "Controller Name: vhci_ctrl\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Firmware Version: vhci_fw\n"); + buf = "Firmware Version: vhci_fw\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Driver: vhci_drv\n"); + buf = "Driver: vhci_drv\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Vendor: vhci\n"); + buf = "Vendor: vhci\n"; skb_put_data(skb, buf, strlen(buf)); } diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index a8be8cf246fb6f..7671bd15854551 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -139,9 +139,9 @@ static int fsl_mc_bus_uevent(const struct device *dev, struct kobj_uevent_env *e static int fsl_mc_dma_configure(struct device *dev) { + const struct device_driver *drv = READ_ONCE(dev->driver); struct device *dma_dev = dev; struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); - struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); u32 input_id = mc_dev->icid; int ret; @@ -153,8 +153,8 @@ static int fsl_mc_dma_configure(struct device *dev) else ret = acpi_dma_configure_id(dev, DEV_DMA_COHERENT, &input_id); - /* @mc_drv may not be valid when we're called from the IOMMU layer */ - if (!ret && dev->driver && !mc_drv->driver_managed_dma) { + /* @drv may not be valid when we're called from the IOMMU layer */ + if (!ret && drv && !to_fsl_mc_driver(drv)->driver_managed_dma) { ret = iommu_device_use_default_domain(dev); if (ret) arch_teardown_dma_ops(dev); @@ -906,8 +906,10 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, error_cleanup_dev: kfree(mc_dev->regions); - kfree(mc_bus); - kfree(mc_dev); + if (mc_bus) + kfree(mc_bus); + else + kfree(mc_dev); return error; } diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 8fb33c90482f79..ae61967605563c 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -404,7 +404,7 @@ config TELCLOCK configuration of the telecom clock configuration settings. This device is used for hardware synchronization across the ATCA backplane fabric. Upon loading, the driver exports a sysfs directory, - /sys/devices/platform/telco_clock, with a number of files for + /sys/devices/faux/telco_clock, with a number of files for controlling the behavior of this hardware. source "drivers/s390/char/Kconfig" diff --git a/drivers/char/misc.c b/drivers/char/misc.c index f7dd455dd0dd3c..dda466f9181acf 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -315,7 +315,7 @@ static int __init misc_init(void) goto fail_remove; err = -EIO; - if (register_chrdev(MISC_MAJOR, "misc", &misc_fops)) + if (__register_chrdev(MISC_MAJOR, 0, MINORMASK + 1, "misc", &misc_fops)) goto fail_printk; return 0; diff --git a/drivers/char/tpm/tpm-buf.c b/drivers/char/tpm/tpm-buf.c index e49a19fea3bdf6..dc882fc9fa9efc 100644 --- a/drivers/char/tpm/tpm-buf.c +++ b/drivers/char/tpm/tpm-buf.c @@ -201,7 +201,7 @@ static void tpm_buf_read(struct tpm_buf *buf, off_t *offset, size_t count, void */ u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset) { - u8 value; + u8 value = 0; tpm_buf_read(buf, offset, sizeof(value), &value); @@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(tpm_buf_read_u8); */ u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset) { - u16 value; + u16 value = 0; tpm_buf_read(buf, offset, sizeof(value), &value); @@ -235,7 +235,7 @@ EXPORT_SYMBOL_GPL(tpm_buf_read_u16); */ u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset) { - u32 value; + u32 value = 0; tpm_buf_read(buf, offset, sizeof(value), &value); diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c index 3f89635ba5e855..7b5049b3d476ef 100644 --- a/drivers/char/tpm/tpm2-sessions.c +++ b/drivers/char/tpm/tpm2-sessions.c @@ -40,11 +40,6 @@ * * These are the usage functions: * - * tpm2_start_auth_session() which allocates the opaque auth structure - * and gets a session from the TPM. This must be called before - * any of the following functions. The session is protected by a - * session_key which is derived from a random salt value - * encrypted to the NULL seed. * tpm2_end_auth_session() kills the session and frees the resources. * Under normal operation this function is done by * tpm_buf_check_hmac_response(), so this is only to be used on @@ -963,16 +958,13 @@ static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key) } /** - * tpm2_start_auth_session() - create a HMAC authentication session with the TPM - * @chip: the TPM chip structure to create the session with + * tpm2_start_auth_session() - Create an a HMAC authentication session + * @chip: A TPM chip * - * This function loads the NULL seed from its saved context and starts - * an authentication session on the null seed, fills in the - * @chip->auth structure to contain all the session details necessary - * for performing the HMAC, encrypt and decrypt operations and - * returns. The NULL seed is flushed before this function returns. + * Loads the ephemeral key (null seed), and starts an HMAC authenticated + * session. The null seed is flushed before the return. * - * Return: zero on success or actual error encountered. + * Returns zero on success, or a POSIX error code. */ int tpm2_start_auth_session(struct tpm_chip *chip) { @@ -1024,7 +1016,7 @@ int tpm2_start_auth_session(struct tpm_chip *chip) /* hash algorithm for session */ tpm_buf_append_u16(&buf, TPM_ALG_SHA256); - rc = tpm_transmit_cmd(chip, &buf, 0, "start auth session"); + rc = tpm_ret_to_err(tpm_transmit_cmd(chip, &buf, 0, "StartAuthSession")); tpm2_flush_context(chip, null_key); if (rc == TPM2_RC_SUCCESS) diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h index 970d02c337c7f1..6c3aa480396b64 100644 --- a/drivers/char/tpm/tpm_tis_core.h +++ b/drivers/char/tpm/tpm_tis_core.h @@ -54,7 +54,7 @@ enum tis_int_flags { enum tis_defaults { TIS_MEM_LEN = 0x5000, TIS_SHORT_TIMEOUT = 750, /* ms */ - TIS_LONG_TIMEOUT = 2000, /* 2 sec */ + TIS_LONG_TIMEOUT = 4000, /* 4 secs */ TIS_TIMEOUT_MIN_ATML = 14700, /* usecs */ TIS_TIMEOUT_MAX_ATML = 15000, /* usecs */ }; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 5f04951d0dd469..088182e54debd6 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1576,8 +1576,8 @@ static void handle_control_message(struct virtio_device *vdev, break; case VIRTIO_CONSOLE_RESIZE: { struct { - __u16 rows; - __u16 cols; + __virtio16 cols; + __virtio16 rows; } size; if (!is_console_port(port)) @@ -1585,7 +1585,8 @@ static void handle_control_message(struct virtio_device *vdev, memcpy(&size, buf->buf + buf->offset + sizeof(*cpkt), sizeof(size)); - set_console_size(port, size.rows, size.cols); + set_console_size(port, virtio16_to_cpu(vdev, size.rows), + virtio16_to_cpu(vdev, size.cols)); port->cons.hvc->irq_requested = 1; resize_console(port); diff --git a/drivers/clk/bcm/clk-raspberrypi.c b/drivers/clk/bcm/clk-raspberrypi.c index 0e1fe3759530a4..720acc10f8aa45 100644 --- a/drivers/clk/bcm/clk-raspberrypi.c +++ b/drivers/clk/bcm/clk-raspberrypi.c @@ -286,6 +286,8 @@ static struct clk_hw *raspberrypi_clk_register(struct raspberrypi_clk *rpi, init.name = devm_kasprintf(rpi->dev, GFP_KERNEL, "fw-clk-%s", rpi_firmware_clk_names[id]); + if (!init.name) + return ERR_PTR(-ENOMEM); init.ops = &raspberrypi_firmware_clk_ops; init.flags = CLK_GET_RATE_NOCACHE; diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index 014db638662407..8ddf3a9a53dfd5 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -137,6 +137,8 @@ static int s2mps11_clk_probe(struct platform_device *pdev) if (!clk_data) return -ENOMEM; + clk_data->num = S2MPS11_CLKS_NUM; + switch (hwid) { case S2MPS11X: s2mps11_reg = S2MPS11_REG_RTC_CTRL; @@ -186,7 +188,6 @@ static int s2mps11_clk_probe(struct platform_device *pdev) clk_data->hws[i] = &s2mps11_clks[i].hw; } - clk_data->num = S2MPS11_CLKS_NUM; of_clk_add_hw_provider(s2mps11_clks->clk_np, of_clk_hw_onecell_get, clk_data); diff --git a/drivers/clk/qcom/camcc-sm6350.c b/drivers/clk/qcom/camcc-sm6350.c index 1871970fb046d7..8aac97d29ce3ff 100644 --- a/drivers/clk/qcom/camcc-sm6350.c +++ b/drivers/clk/qcom/camcc-sm6350.c @@ -1695,6 +1695,9 @@ static struct clk_branch camcc_sys_tmr_clk = { static struct gdsc bps_gdsc = { .gdscr = 0x6004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "bps_gdsc", }, @@ -1704,6 +1707,9 @@ static struct gdsc bps_gdsc = { static struct gdsc ipe_0_gdsc = { .gdscr = 0x7004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ipe_0_gdsc", }, @@ -1713,6 +1719,9 @@ static struct gdsc ipe_0_gdsc = { static struct gdsc ife_0_gdsc = { .gdscr = 0x9004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ife_0_gdsc", }, @@ -1721,6 +1730,9 @@ static struct gdsc ife_0_gdsc = { static struct gdsc ife_1_gdsc = { .gdscr = 0xa004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ife_1_gdsc", }, @@ -1729,6 +1741,9 @@ static struct gdsc ife_1_gdsc = { static struct gdsc ife_2_gdsc = { .gdscr = 0xb004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ife_2_gdsc", }, @@ -1737,6 +1752,9 @@ static struct gdsc ife_2_gdsc = { static struct gdsc titan_top_gdsc = { .gdscr = 0x14004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "titan_top_gdsc", }, diff --git a/drivers/clk/qcom/dispcc-sm6350.c b/drivers/clk/qcom/dispcc-sm6350.c index e703ecf00e4404..b0bd163a449ccd 100644 --- a/drivers/clk/qcom/dispcc-sm6350.c +++ b/drivers/clk/qcom/dispcc-sm6350.c @@ -681,6 +681,9 @@ static struct clk_branch disp_cc_xo_clk = { static struct gdsc mdss_gdsc = { .gdscr = 0x1004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "mdss_gdsc", }, diff --git a/drivers/clk/qcom/gcc-msm8939.c b/drivers/clk/qcom/gcc-msm8939.c index 7431c9a65044f8..45193b3d714bab 100644 --- a/drivers/clk/qcom/gcc-msm8939.c +++ b/drivers/clk/qcom/gcc-msm8939.c @@ -432,7 +432,7 @@ static const struct parent_map gcc_xo_gpll0_gpll1a_gpll6_sleep_map[] = { { P_XO, 0 }, { P_GPLL0, 1 }, { P_GPLL1_AUX, 2 }, - { P_GPLL6, 2 }, + { P_GPLL6, 3 }, { P_SLEEP_CLK, 6 }, }; @@ -1113,7 +1113,7 @@ static struct clk_rcg2 jpeg0_clk_src = { }; static const struct freq_tbl ftbl_gcc_camss_mclk0_1_clk[] = { - F(24000000, P_GPLL0, 1, 1, 45), + F(24000000, P_GPLL6, 1, 1, 45), F(66670000, P_GPLL0, 12, 0, 0), { } }; diff --git a/drivers/clk/qcom/gcc-sm6350.c b/drivers/clk/qcom/gcc-sm6350.c index 74346dc026068a..a4d6dff9d0f7f1 100644 --- a/drivers/clk/qcom/gcc-sm6350.c +++ b/drivers/clk/qcom/gcc-sm6350.c @@ -2320,6 +2320,9 @@ static struct clk_branch gcc_video_xo_clk = { static struct gdsc usb30_prim_gdsc = { .gdscr = 0x1a004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "usb30_prim_gdsc", }, @@ -2328,6 +2331,9 @@ static struct gdsc usb30_prim_gdsc = { static struct gdsc ufs_phy_gdsc = { .gdscr = 0x3a004, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0xf, .pd = { .name = "ufs_phy_gdsc", }, diff --git a/drivers/clk/qcom/gpucc-sm6350.c b/drivers/clk/qcom/gpucc-sm6350.c index 35ed0500bc5931..ee89c42413f885 100644 --- a/drivers/clk/qcom/gpucc-sm6350.c +++ b/drivers/clk/qcom/gpucc-sm6350.c @@ -413,6 +413,9 @@ static struct clk_branch gpu_cc_gx_vsense_clk = { static struct gdsc gpu_cx_gdsc = { .gdscr = 0x106c, .gds_hw_ctrl = 0x1540, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0x8, .pd = { .name = "gpu_cx_gdsc", }, @@ -423,6 +426,9 @@ static struct gdsc gpu_cx_gdsc = { static struct gdsc gpu_gx_gdsc = { .gdscr = 0x100c, .clamp_io_ctrl = 0x1508, + .en_rest_wait_val = 0x2, + .en_few_wait_val = 0x2, + .clk_dis_wait_val = 0x2, .pd = { .name = "gpu_gx_gdsc", .power_on = gdsc_gx_do_nothing_enable, diff --git a/drivers/clk/rockchip/clk-rk3576.c b/drivers/clk/rockchip/clk-rk3576.c index 595e010341f73a..be703f250197af 100644 --- a/drivers/clk/rockchip/clk-rk3576.c +++ b/drivers/clk/rockchip/clk-rk3576.c @@ -541,6 +541,8 @@ static struct rockchip_clk_branch rk3576_clk_branches[] __initdata = { RK3576_CLKGATE_CON(5), 14, GFLAGS), GATE(CLK_OTPC_AUTO_RD_G, "clk_otpc_auto_rd_g", "xin24m", 0, RK3576_CLKGATE_CON(5), 15, GFLAGS), + GATE(CLK_OTP_PHY_G, "clk_otp_phy_g", "xin24m", 0, + RK3576_CLKGATE_CON(6), 0, GFLAGS), COMPOSITE(CLK_MIPI_CAMERAOUT_M0, "clk_mipi_cameraout_m0", mux_24m_spll_gpll_cpll_p, 0, RK3576_CLKSEL_CON(38), 8, 2, MFLAGS, 0, 8, DFLAGS, RK3576_CLKGATE_CON(6), 3, GFLAGS), diff --git a/drivers/clk/samsung/clk-exynosautov920.c b/drivers/clk/samsung/clk-exynosautov920.c index dc8d4240f6defc..b0561faecfeb1b 100644 --- a/drivers/clk/samsung/clk-exynosautov920.c +++ b/drivers/clk/samsung/clk-exynosautov920.c @@ -1393,7 +1393,7 @@ static const unsigned long hsi1_clk_regs[] __initconst = { /* List of parent clocks for Muxes in CMU_HSI1 */ PNAME(mout_hsi1_mmc_card_user_p) = {"oscclk", "dout_clkcmu_hsi1_mmc_card"}; PNAME(mout_hsi1_noc_user_p) = { "oscclk", "dout_clkcmu_hsi1_noc" }; -PNAME(mout_hsi1_usbdrd_user_p) = { "oscclk", "mout_clkcmu_hsi1_usbdrd" }; +PNAME(mout_hsi1_usbdrd_user_p) = { "oscclk", "dout_clkcmu_hsi1_usbdrd" }; PNAME(mout_hsi1_usbdrd_p) = { "dout_tcxo_div2", "mout_hsi1_usbdrd_user" }; static const struct samsung_mux_clock hsi1_mux_clks[] __initconst = { diff --git a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c index bb66c906ebbb62..e83d4fd40240fa 100644 --- a/drivers/clk/sunxi-ng/ccu-sun20i-d1.c +++ b/drivers/clk/sunxi-ng/ccu-sun20i-d1.c @@ -412,19 +412,23 @@ static const struct clk_parent_data mmc0_mmc1_parents[] = { { .hw = &pll_periph0_2x_clk.common.hw }, { .hw = &pll_audio1_div2_clk.common.hw }, }; -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc0_clk, "mmc0", mmc0_mmc1_parents, 0x830, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); - -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc1_clk, "mmc1", mmc0_mmc1_parents, 0x834, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc0_clk, "mmc0", + mmc0_mmc1_parents, 0x830, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); + +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc1_clk, "mmc1", + mmc0_mmc1_parents, 0x834, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); static const struct clk_parent_data mmc2_parents[] = { { .fw_name = "hosc" }, @@ -433,12 +437,14 @@ static const struct clk_parent_data mmc2_parents[] = { { .hw = &pll_periph0_800M_clk.common.hw }, { .hw = &pll_audio1_div2_clk.common.hw }, }; -static SUNXI_CCU_MP_DATA_WITH_MUX_GATE(mmc2_clk, "mmc2", mmc2_parents, 0x838, - 0, 4, /* M */ - 8, 2, /* P */ - 24, 3, /* mux */ - BIT(31), /* gate */ - 0); +static SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(mmc2_clk, "mmc2", mmc2_parents, + 0x838, + 0, 4, /* M */ + 8, 2, /* P */ + 24, 3, /* mux */ + BIT(31), /* gate */ + 2, /* post-div */ + 0); static SUNXI_CCU_GATE_HWS(bus_mmc0_clk, "bus-mmc0", psi_ahb_hws, 0x84c, BIT(0), 0); diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h index b35aeec70484d6..bb09c649bfa356 100644 --- a/drivers/clk/sunxi-ng/ccu_mp.h +++ b/drivers/clk/sunxi-ng/ccu_mp.h @@ -52,6 +52,28 @@ struct ccu_mp { } \ } +#define SUNXI_CCU_MP_DATA_WITH_MUX_GATE_POSTDIV(_struct, _name, _parents, \ + _reg, \ + _mshift, _mwidth, \ + _pshift, _pwidth, \ + _muxshift, _muxwidth, \ + _gate, _postdiv, _flags)\ + struct ccu_mp _struct = { \ + .enable = _gate, \ + .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \ + .p = _SUNXI_CCU_DIV(_pshift, _pwidth), \ + .mux = _SUNXI_CCU_MUX(_muxshift, _muxwidth), \ + .fixed_post_div = _postdiv, \ + .common = { \ + .reg = _reg, \ + .features = CCU_FEATURE_FIXED_POSTDIV, \ + .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, \ + _parents, \ + &ccu_mp_ops, \ + _flags), \ + } \ + } + #define SUNXI_CCU_MP_WITH_MUX_GATE(_struct, _name, _parents, _reg, \ _mshift, _mwidth, \ _pshift, _pwidth, \ @@ -109,8 +131,7 @@ struct ccu_mp { _mshift, _mwidth, \ _pshift, _pwidth, \ _muxshift, _muxwidth, \ - _gate, _features, \ - _flags) \ + _gate, _flags, _features) \ struct ccu_mp _struct = { \ .enable = _gate, \ .m = _SUNXI_CCU_DIV(_mshift, _mwidth), \ diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c index 39f7c2d736d169..b603c25f3dfaac 100644 --- a/drivers/clocksource/i8253.c +++ b/drivers/clocksource/i8253.c @@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void) #ifdef CONFIG_CLKEVT_I8253 void clockevent_i8253_disable(void) { - raw_spin_lock(&i8253_lock); + guard(raw_spinlock_irqsave)(&i8253_lock); /* * Writing the MODE register should stop the counter, according to @@ -132,8 +132,6 @@ void clockevent_i8253_disable(void) outb_p(0, PIT_CH0); outb_p(0x30, PIT_MODE); - - raw_spin_unlock(&i8253_lock); } static int pit_shutdown(struct clock_event_device *evt) diff --git a/drivers/comedi/drivers/jr3_pci.c b/drivers/comedi/drivers/jr3_pci.c index cdc842b32babb4..75dce1ff24193b 100644 --- a/drivers/comedi/drivers/jr3_pci.c +++ b/drivers/comedi/drivers/jr3_pci.c @@ -758,7 +758,7 @@ static void jr3_pci_detach(struct comedi_device *dev) struct jr3_pci_dev_private *devpriv = dev->private; if (devpriv) - timer_delete_sync(&devpriv->timer); + timer_shutdown_sync(&devpriv->timer); comedi_pci_detach(dev); } diff --git a/drivers/counter/interrupt-cnt.c b/drivers/counter/interrupt-cnt.c index 949598d51575a1..d83848d0fe2af5 100644 --- a/drivers/counter/interrupt-cnt.c +++ b/drivers/counter/interrupt-cnt.c @@ -3,12 +3,14 @@ * Copyright (c) 2021 Pengutronix, Oleksij Rempel */ +#include #include #include #include #include #include #include +#include #include #include @@ -19,6 +21,7 @@ struct interrupt_cnt_priv { struct gpio_desc *gpio; int irq; bool enabled; + struct mutex lock; struct counter_signal signals; struct counter_synapse synapses; struct counter_count cnts; @@ -41,6 +44,8 @@ static int interrupt_cnt_enable_read(struct counter_device *counter, { struct interrupt_cnt_priv *priv = counter_priv(counter); + guard(mutex)(&priv->lock); + *enable = priv->enabled; return 0; @@ -51,6 +56,8 @@ static int interrupt_cnt_enable_write(struct counter_device *counter, { struct interrupt_cnt_priv *priv = counter_priv(counter); + guard(mutex)(&priv->lock); + if (priv->enabled == enable) return 0; @@ -227,6 +234,8 @@ static int interrupt_cnt_probe(struct platform_device *pdev) if (ret) return ret; + mutex_init(&priv->lock); + ret = devm_counter_add(dev, counter); if (ret < 0) return dev_err_probe(dev, ret, "Failed to add counter\n"); diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 4f9cb943d945c2..0d46402e30942e 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -76,7 +76,7 @@ config ARM_VEXPRESS_SPC_CPUFREQ config ARM_BRCMSTB_AVS_CPUFREQ tristate "Broadcom STB AVS CPUfreq driver" depends on (ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ) || COMPILE_TEST - default y + default y if ARCH_BRCMSTB && !ARM_SCMI_CPUFREQ help Some Broadcom STB SoCs use a co-processor running proprietary firmware ("AVS") to handle voltage and frequency scaling. This driver provides @@ -88,7 +88,7 @@ config ARM_HIGHBANK_CPUFREQ tristate "Calxeda Highbank-based" depends on ARCH_HIGHBANK || COMPILE_TEST depends on CPUFREQ_DT && REGULATOR && PL320_MBOX - default m + default m if ARCH_HIGHBANK help This adds the CPUFreq driver for Calxeda Highbank SoC based boards. @@ -133,7 +133,7 @@ config ARM_MEDIATEK_CPUFREQ config ARM_MEDIATEK_CPUFREQ_HW tristate "MediaTek CPUFreq HW driver" depends on ARCH_MEDIATEK || COMPILE_TEST - default m + default m if ARCH_MEDIATEK help Support for the CPUFreq HW driver. Some MediaTek chipsets have a HW engine to offload the steps @@ -181,7 +181,7 @@ config ARM_RASPBERRYPI_CPUFREQ config ARM_S3C64XX_CPUFREQ bool "Samsung S3C64XX" depends on CPU_S3C6410 || COMPILE_TEST - default y + default CPU_S3C6410 help This adds the CPUFreq driver for Samsung S3C6410 SoC. @@ -190,7 +190,7 @@ config ARM_S3C64XX_CPUFREQ config ARM_S5PV210_CPUFREQ bool "Samsung S5PV210 and S5PC110" depends on CPU_S5PV210 || COMPILE_TEST - default y + default CPU_S5PV210 help This adds the CPUFreq driver for Samsung S5PV210 and S5PC110 SoCs. @@ -214,7 +214,7 @@ config ARM_SCMI_CPUFREQ config ARM_SPEAR_CPUFREQ bool "SPEAr CPUFreq support" depends on PLAT_SPEAR || COMPILE_TEST - default y + default PLAT_SPEAR help This adds the CPUFreq driver support for SPEAr SOCs. @@ -233,7 +233,7 @@ config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra20/30 SOCs. @@ -241,7 +241,7 @@ config ARM_TEGRA124_CPUFREQ bool "Tegra124 CPUFreq support" depends on ARCH_TEGRA || COMPILE_TEST depends on CPUFREQ_DT - default y + default ARCH_TEGRA help This adds the CPUFreq driver support for Tegra124 SOCs. @@ -256,14 +256,14 @@ config ARM_TEGRA194_CPUFREQ tristate "Tegra194 CPUFreq support" depends on ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC || (64BIT && COMPILE_TEST) depends on TEGRA_BPMP - default y + default ARCH_TEGRA_194_SOC || ARCH_TEGRA_234_SOC help This adds CPU frequency driver support for Tegra194 SOCs. config ARM_TI_CPUFREQ bool "Texas Instruments CPUFreq support" depends on ARCH_OMAP2PLUS || ARCH_K3 || COMPILE_TEST - default y + default ARCH_OMAP2PLUS || ARCH_K3 help This driver enables valid OPPs on the running platform based on values contained within the SoC in use. Enable this in order to diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 2c5c228408bf28..918e2bebfe788f 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -9,7 +9,6 @@ config X86_INTEL_PSTATE select ACPI_PROCESSOR if ACPI select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO select CPU_FREQ_GOV_PERFORMANCE - select CPU_FREQ_GOV_SCHEDUTIL if SMP help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become @@ -39,7 +38,6 @@ config X86_AMD_PSTATE depends on X86 && ACPI select ACPI_PROCESSOR select ACPI_CPPC_LIB if X86_64 - select CPU_FREQ_GOV_SCHEDUTIL if SMP help This driver adds a CPUFreq driver which utilizes a fine grain processor performance frequency control range instead of legacy diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 924314cdeebcec..76768fe213a978 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -660,7 +660,7 @@ static u64 get_max_boost_ratio(unsigned int cpu, u64 *nominal_freq) nominal_perf = perf_caps.nominal_perf; if (nominal_freq) - *nominal_freq = perf_caps.nominal_freq; + *nominal_freq = perf_caps.nominal_freq * 1000; if (!highest_perf || !nominal_perf) { pr_debug("CPU%d: highest or nominal performance missing\n", cpu); @@ -909,8 +909,19 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency) pr_warn(FW_WARN "P-state 0 is not max freq\n"); - if (acpi_cpufreq_driver.set_boost) - policy->boost_supported = true; + if (acpi_cpufreq_driver.set_boost) { + if (policy->boost_supported) { + /* + * The firmware may have altered boost state while the + * CPU was offline (for example during a suspend-resume + * cycle). + */ + if (policy->boost_enabled != boost_state(cpu)) + set_boost(policy, policy->boost_enabled); + } else { + policy->boost_supported = true; + } + } return result; diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 6789eed1bb5ba0..b961f3a3b58059 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -607,13 +607,16 @@ static void amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) union perf_cached perf = READ_ONCE(cpudata->perf); perf.max_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->max); - perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->max_limit_freq, policy->max); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { perf.min_limit_perf = min(perf.nominal_perf, perf.max_limit_perf); + WRITE_ONCE(cpudata->min_limit_freq, min(cpudata->nominal_freq, cpudata->max_limit_freq)); + } else { + perf.min_limit_perf = freq_to_perf(perf, cpudata->nominal_freq, policy->min); + WRITE_ONCE(cpudata->min_limit_freq, policy->min); + } - WRITE_ONCE(cpudata->max_limit_freq, policy->max); - WRITE_ONCE(cpudata->min_limit_freq, policy->min); WRITE_ONCE(cpudata->perf, perf); } @@ -791,16 +794,6 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } -/* - * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks - * due to locking, so queue the work for later. - */ -static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) -{ - sched_set_itmt_support(); -} -static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); - #define CPPC_MAX_PERF U8_MAX static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) @@ -811,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) cpudata->hw_prefcore = true; - /* - * The priorities can be set regardless of whether or not - * sched_set_itmt_support(true) has been called and it is valid to - * update them at any time after it has been called. - */ + /* Priorities must be initialized before ITMT support can be toggled on. */ sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu); - - schedule_work(&sched_prefcore_work); } static void amd_pstate_update_limits(unsigned int cpu) @@ -1193,6 +1180,9 @@ static ssize_t show_energy_performance_preference( static void amd_pstate_driver_cleanup(void) { + if (amd_pstate_prefcore) + sched_clear_itmt_support(); + cppc_state = AMD_PSTATE_DISABLE; current_pstate_driver = NULL; } @@ -1235,6 +1225,10 @@ static int amd_pstate_register_driver(int mode) return ret; } + /* Enable ITMT support once all CPUs have initialized their asym priorities. */ + if (amd_pstate_prefcore) + sched_set_itmt_support(); + return 0; } diff --git a/drivers/cpufreq/apple-soc-cpufreq.c b/drivers/cpufreq/apple-soc-cpufreq.c index 4994c86feb5738..b1d29b7af23262 100644 --- a/drivers/cpufreq/apple-soc-cpufreq.c +++ b/drivers/cpufreq/apple-soc-cpufreq.c @@ -134,11 +134,17 @@ static const struct of_device_id apple_soc_cpufreq_of_match[] __maybe_unused = { static unsigned int apple_soc_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct apple_cpu_priv *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct apple_cpu_priv *priv; struct cpufreq_frequency_table *p; unsigned int pstate; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + if (priv->info->cur_pstate_mask) { u32 reg = readl_relaxed(priv->reg_base + APPLE_DVFS_STATUS); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index b3d74f9adcf0bc..cb93f00bafdbaf 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -747,7 +747,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) int ret; if (!policy) - return -ENODEV; + return 0; cpu_data = policy->driver_data; diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 2aa00769cf09da..a010da0f6337f8 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -175,6 +175,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sm8350", }, { .compatible = "qcom,sm8450", }, { .compatible = "qcom,sm8550", }, + { .compatible = "qcom,sm8650", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 3841c9da6cac36..f45ded62b0e082 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -536,16 +536,18 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); static unsigned int __resolve_freq(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) + unsigned int target_freq, + unsigned int min, unsigned int max, + unsigned int relation) { unsigned int idx; - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (!policy->freq_table) return target_freq; - idx = cpufreq_frequency_table_target(policy, target_freq, relation); + idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation); policy->cached_resolved_idx = idx; policy->cached_target_freq = target_freq; return policy->freq_table[idx].frequency; @@ -565,7 +567,21 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { - return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE); + unsigned int min = READ_ONCE(policy->min); + unsigned int max = READ_ONCE(policy->max); + + /* + * If this function runs in parallel with cpufreq_set_policy(), it may + * read policy->min before the update and policy->max after the update + * or the other way around, so there is no ordering guarantee. + * + * Resolve this by always honoring the max (in case it comes from + * thermal throttling or similar). + */ + if (unlikely(min > max)) + min = max; + + return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE); } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); @@ -2384,7 +2400,8 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (cpufreq_disabled()) return -ENODEV; - target_freq = __resolve_freq(policy, target_freq, relation); + target_freq = __resolve_freq(policy, target_freq, policy->min, + policy->max, relation); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -2708,11 +2725,18 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, * Resolve policy min/max to available frequencies. It ensures * no frequency resolution will neither overshoot the requested maximum * nor undershoot the requested minimum. + * + * Avoid storing intermediate values in policy->max or policy->min and + * compiler optimizations around them because they may be accessed + * concurrently by cpufreq_driver_resolve_freq() during the update. */ - policy->min = new_data.min; - policy->max = new_data.max; - policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L); - policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H); + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, + new_data.min, new_data.max, + CPUFREQ_RELATION_H)); + new_data.min = __resolve_freq(policy, new_data.min, new_data.min, + new_data.max, CPUFREQ_RELATION_L); + WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min); + trace_cpu_frequency_limits(policy); cpufreq_update_pressure(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index a7c38b8b3e7890..0e65d37c923113 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -76,7 +76,8 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, return freq_next; } - index = cpufreq_frequency_table_target(policy, freq_next, relation); + index = cpufreq_frequency_table_target(policy, freq_next, policy->min, + policy->max, relation); freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index c03a91502f8480..35de513af6c94e 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -115,8 +115,8 @@ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -147,7 +147,7 @@ int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, cpufreq_for_each_valid_entry_idx(pos, table, i) { freq = pos->frequency; - if ((freq < policy->min) || (freq > policy->max)) + if (freq < min || freq > max) continue; if (freq == target_freq) { optimal.driver_data = i; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 4aad79d26c64f5..b8ceac594acd77 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -598,6 +598,9 @@ static bool turbo_is_disabled(void) { u64 misc_en; + if (!cpu_feature_enabled(X86_FEATURE_IDA)) + return true; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); @@ -2209,7 +2212,7 @@ static int knl_get_turbo_pstate(int cpu) static int hwp_get_cpu_scaling(int cpu) { if (hybrid_scaling_factor) { - struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + struct cpuinfo_x86 *c = &cpu_data(cpu); u8 cpu_type = c->topo.intel_type; /* @@ -3825,6 +3828,8 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; + else if (!strcmp(str, "enable")) + no_load = 0; else if (!strcmp(str, "active")) default_driver = &intel_pstate; else if (!strcmp(str, "passive")) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c310aeebc8f36b..944e899eb1be13 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -37,11 +37,17 @@ static struct cpufreq_driver scmi_cpufreq_driver; static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scmi_data *priv = policy->driver_data; + struct cpufreq_policy *policy; + struct scmi_data *priv; unsigned long rate; int ret; + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + ret = perf_ops->freq_get(ph, priv->domain_id, &rate, false); if (ret) return 0; diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 17cda84f00dfb9..dcbb0ae7dd476c 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -29,9 +29,16 @@ static struct scpi_ops *scpi_ops; static unsigned int scpi_cpufreq_get_rate(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu); - struct scpi_data *priv = policy->driver_data; - unsigned long rate = clk_get_rate(priv->clk); + struct cpufreq_policy *policy; + struct scpi_data *priv; + unsigned long rate; + + policy = cpufreq_cpu_get_raw(cpu); + if (unlikely(!policy)) + return 0; + + priv = policy->driver_data; + rate = clk_get_rate(priv->clk); return rate / 1000; } diff --git a/drivers/cpufreq/sun50i-cpufreq-nvmem.c b/drivers/cpufreq/sun50i-cpufreq-nvmem.c index 47d6840b348994..744312a44279cb 100644 --- a/drivers/cpufreq/sun50i-cpufreq-nvmem.c +++ b/drivers/cpufreq/sun50i-cpufreq-nvmem.c @@ -194,7 +194,9 @@ static int sun50i_cpufreq_get_efuse(void) struct nvmem_cell *speedbin_nvmem; const struct of_device_id *match; struct device *cpu_dev; - u32 *speedbin; + void *speedbin_ptr; + u32 speedbin = 0; + size_t len; int ret; cpu_dev = get_cpu_device(0); @@ -217,14 +219,18 @@ static int sun50i_cpufreq_get_efuse(void) return dev_err_probe(cpu_dev, PTR_ERR(speedbin_nvmem), "Could not get nvmem cell\n"); - speedbin = nvmem_cell_read(speedbin_nvmem, NULL); + speedbin_ptr = nvmem_cell_read(speedbin_nvmem, &len); nvmem_cell_put(speedbin_nvmem); - if (IS_ERR(speedbin)) - return PTR_ERR(speedbin); + if (IS_ERR(speedbin_ptr)) + return PTR_ERR(speedbin_ptr); - ret = opp_data->efuse_xlate(*speedbin); + if (len <= 4) + memcpy(&speedbin, speedbin_ptr, len); + speedbin = le32_to_cpu(speedbin); - kfree(speedbin); + ret = opp_data->efuse_xlate(speedbin); + + kfree(speedbin_ptr); return ret; }; diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 19b7fb4a93e86c..05f67661553c9a 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -275,13 +275,16 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req } else { if (nr_sgs > 0) dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); - dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE); + + if (nr_sgd > 0) + dma_unmap_sg(ce->dev, areq->dst, nd, DMA_FROM_DEVICE); } theend_iv: if (areq->iv && ivsize > 0) { - if (rctx->addr_iv) + if (!dma_mapping_error(ce->dev, rctx->addr_iv)) dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE); + offset = areq->cryptlen - ivsize; if (rctx->op_dir & CE_DECRYPTION) { memcpy(areq->iv, chan->backup_iv, ivsize); diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index ec1ffda9ea32e0..658f520cee0caa 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -832,13 +832,12 @@ static int sun8i_ce_pm_init(struct sun8i_ce_dev *ce) err = pm_runtime_set_suspended(ce->dev); if (err) return err; - pm_runtime_enable(ce->dev); - return err; -} -static void sun8i_ce_pm_exit(struct sun8i_ce_dev *ce) -{ - pm_runtime_disable(ce->dev); + err = devm_pm_runtime_enable(ce->dev); + if (err) + return err; + + return 0; } static int sun8i_ce_get_clks(struct sun8i_ce_dev *ce) @@ -1041,7 +1040,7 @@ static int sun8i_ce_probe(struct platform_device *pdev) "sun8i-ce-ns", ce); if (err) { dev_err(ce->dev, "Cannot request CryptoEngine Non-secure IRQ (err=%d)\n", err); - goto error_irq; + goto error_pm; } err = sun8i_ce_register_algs(ce); @@ -1082,8 +1081,6 @@ static int sun8i_ce_probe(struct platform_device *pdev) return 0; error_alg: sun8i_ce_unregister_algs(ce); -error_irq: - sun8i_ce_pm_exit(ce); error_pm: sun8i_ce_free_chanlist(ce, MAXFLOW - 1); return err; @@ -1104,8 +1101,6 @@ static void sun8i_ce_remove(struct platform_device *pdev) #endif sun8i_ce_free_chanlist(ce, MAXFLOW - 1); - - sun8i_ce_pm_exit(ce); } static const struct of_device_id sun8i_ce_crypto_of_match_table[] = { diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index 6072dd9f390b40..3f9d79ea01aaa6 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -343,9 +343,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) u32 common; u64 byte_count; __le32 *bf; - void *buf = NULL; + void *buf, *result; int j, i, todo; - void *result = NULL; u64 bs; int digestsize; dma_addr_t addr_res, addr_pad; @@ -365,14 +364,14 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA); if (!buf) { err = -ENOMEM; - goto theend; + goto err_out; } bf = (__le32 *)buf; result = kzalloc(digestsize, GFP_KERNEL | GFP_DMA); if (!result) { err = -ENOMEM; - goto theend; + goto err_free_buf; } flow = rctx->flow; @@ -398,7 +397,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (nr_sgs <= 0 || nr_sgs > MAX_SG) { dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs); err = -EINVAL; - goto theend; + goto err_free_result; } len = areq->nbytes; @@ -411,7 +410,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (len > 0) { dev_err(ce->dev, "remaining len %d\n", len); err = -EINVAL; - goto theend; + goto err_unmap_src; } addr_res = dma_map_single(ce->dev, result, digestsize, DMA_FROM_DEVICE); cet->t_dst[0].addr = desc_addr_val_le32(ce, addr_res); @@ -419,7 +418,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (dma_mapping_error(ce->dev, addr_res)) { dev_err(ce->dev, "DMA map dest\n"); err = -EINVAL; - goto theend; + goto err_unmap_src; } byte_count = areq->nbytes; @@ -441,7 +440,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) } if (!j) { err = -EINVAL; - goto theend; + goto err_unmap_result; } addr_pad = dma_map_single(ce->dev, buf, j * 4, DMA_TO_DEVICE); @@ -450,7 +449,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) if (dma_mapping_error(ce->dev, addr_pad)) { dev_err(ce->dev, "DMA error on padding SG\n"); err = -EINVAL; - goto theend; + goto err_unmap_result; } if (ce->variant->hash_t_dlen_in_bits) @@ -463,16 +462,25 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) err = sun8i_ce_run_task(ce, flow, crypto_ahash_alg_name(tfm)); dma_unmap_single(ce->dev, addr_pad, j * 4, DMA_TO_DEVICE); - dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); + +err_unmap_result: dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE); + if (!err) + memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize); +err_unmap_src: + dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); - memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize); -theend: - kfree(buf); +err_free_result: kfree(result); + +err_free_buf: + kfree(buf); + +err_out: local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); local_bh_enable(); + return 0; } diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 3b5c2af013d0da..83df4d71905318 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -308,8 +308,8 @@ struct sun8i_ce_hash_tfm_ctx { * @flow: the flow to use for this request */ struct sun8i_ce_hash_reqctx { - struct ahash_request fallback_req; int flow; + struct ahash_request fallback_req; // keep at the end }; /* diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 9b9605ce8ee629..8831bcb230c2d4 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -141,7 +141,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq) /* we need to copy all IVs from source in case DMA is bi-directionnal */ while (sg && len) { - if (sg_dma_len(sg) == 0) { + if (sg->length == 0) { sg = sg_next(sg); continue; } diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index 75bebec2c757bf..0fcf4a39de279d 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -163,6 +163,12 @@ static int atmel_sha204a_probe(struct i2c_client *client) i2c_priv->hwrng.name = dev_name(&client->dev); i2c_priv->hwrng.read = atmel_sha204a_rng_read; + /* + * According to review by Bill Cox [1], this HWRNG has very low entropy. + * [1] https://www.metzdowd.com/pipermail/cryptography/2014-December/023858.html + */ + i2c_priv->hwrng.quality = 1; + ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng); if (ret) dev_warn(&client->dev, "failed to register RNG (%d)\n", ret); diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 7701d00bcb3ac5..b6e7c0b29d4e6c 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -122,12 +122,12 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) qm_fd_addr_set64(&fd, addr); do { + refcount_inc(&req->drv_ctx->refcnt); ret = qman_enqueue(req->drv_ctx->req_fq, &fd); - if (likely(!ret)) { - refcount_inc(&req->drv_ctx->refcnt); + if (likely(!ret)) return 0; - } + refcount_dec(&req->drv_ctx->refcnt); if (ret != -EBUSY) break; num_retries++; diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 09d9589f2d681d..33a285981dfd45 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1187,8 +1187,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: compression mode %s," " desc->src1_addr %llx, desc->src1_size %d," @@ -1425,8 +1424,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode && !disable_async) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: decompression mode %s," " desc->src1_addr %llx, desc->src1_size %d," diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index cf62db50f95858..48c5c8ea8c43ec 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -459,6 +459,9 @@ static int mv_cesa_skcipher_queue_req(struct skcipher_request *req, struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); struct mv_cesa_engine *engine; + if (!req->cryptlen) + return 0; + ret = mv_cesa_skcipher_req_init(req, tmpl); if (ret) return ret; diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index f150861ceaf695..6815eddc906812 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -663,7 +663,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (ret) goto err_free_tdma; - if (iter.src.sg) { + if (iter.base.len > iter.src.op_offset) { /* * Add all the new data, inserting an operation block and * launch command between each full SRAM block-worth of diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c index ca9d0cca1f748e..0e07d0523291a5 100644 --- a/drivers/crypto/tegra/tegra-se-aes.c +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -269,7 +269,7 @@ static int tegra_aes_do_one_req(struct crypto_engine *engine, void *areq) unsigned int cmdlen, key1_id, key2_id; int ret; - rctx->iv = (u32 *)req->iv; + rctx->iv = (ctx->alg == SE_ALG_ECB) ? NULL : (u32 *)req->iv; rctx->len = req->cryptlen; key1_id = ctx->key1_id; key2_id = ctx->key2_id; @@ -498,9 +498,6 @@ static int tegra_aes_crypt(struct skcipher_request *req, bool encrypt) if (!req->cryptlen) return 0; - if (ctx->alg == SE_ALG_ECB) - req->iv = NULL; - rctx->encrypt = encrypt; return crypto_transfer_skcipher_request_to_engine(ctx->se->engine, req); diff --git a/drivers/crypto/xilinx/zynqmp-sha.c b/drivers/crypto/xilinx/zynqmp-sha.c index 580649f9bff81f..0edf8eb264b55f 100644 --- a/drivers/crypto/xilinx/zynqmp-sha.c +++ b/drivers/crypto/xilinx/zynqmp-sha.c @@ -3,18 +3,19 @@ * Xilinx ZynqMP SHA Driver. * Copyright (c) 2022 Xilinx Inc. */ -#include #include #include #include -#include +#include +#include #include #include +#include #include -#include #include #include #include +#include #include #define ZYNQMP_DMA_BIT_MASK 32U @@ -43,6 +44,8 @@ struct zynqmp_sha_desc_ctx { static dma_addr_t update_dma_addr, final_dma_addr; static char *ubuf, *fbuf; +static DEFINE_SPINLOCK(zynqmp_sha_lock); + static int zynqmp_sha_init_tfm(struct crypto_shash *hash) { const char *fallback_driver_name = crypto_shash_alg_name(hash); @@ -124,7 +127,8 @@ static int zynqmp_sha_export(struct shash_desc *desc, void *out) return crypto_shash_export(&dctx->fbk_req, out); } -static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +static int __zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { unsigned int remaining_len = len; int update_size; @@ -159,6 +163,12 @@ static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned i return ret; } +static int zynqmp_sha_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +{ + scoped_guard(spinlock_bh, &zynqmp_sha_lock) + return __zynqmp_sha_digest(desc, data, len, out); +} + static struct zynqmp_sha_drv_ctx sha3_drv_ctx = { .sha3_384 = { .init = zynqmp_sha_init, diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 15699299dc11d4..17b692eb325713 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -119,7 +119,7 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port); +int cxl_gpf_port_setup(struct cxl_dport *dport); int cxl_acpi_get_extended_linear_cache_size(struct resource *backing_res, int nid, resource_size_t *size); diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index f4daefe3180e52..1498e2369c3702 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -528,13 +528,13 @@ static void *cxlctl_set_feature(struct cxl_features_state *cxlfs, rc = cxl_set_feature(cxl_mbox, &feat_in->uuid, feat_in->version, feat_in->feat_data, data_size, flags, offset, &return_code); + *out_len = sizeof(*rpc_out); if (rc) { rpc_out->retval = return_code; return no_free_ptr(rpc_out); } rpc_out->retval = CXL_MBOX_CMD_RC_SUCCESS; - *out_len = sizeof(*rpc_out); return no_free_ptr(rpc_out); } @@ -677,7 +677,7 @@ static void free_memdev_fwctl(void *_fwctl_dev) fwctl_put(fwctl_dev); } -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_features_state *cxlfs; @@ -700,7 +700,7 @@ int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) if (rc) return rc; - return devm_add_action_or_reset(&cxlmd->dev, free_memdev_fwctl, + return devm_add_action_or_reset(host, free_memdev_fwctl, no_free_ptr(fwctl_dev)); } EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fwctl, "CXL"); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 96fecb799cbcf7..3b80e9a76ba862 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1072,14 +1072,20 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) #define GPF_TIMEOUT_BASE_MAX 2 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port) +u16 cxl_gpf_get_dvsec(struct device *dev) { + struct pci_dev *pdev; + bool is_port = true; u16 dvsec; if (!dev_is_pci(dev)) return 0; - dvsec = pci_find_dvsec_capability(to_pci_dev(dev), PCI_VENDOR_ID_CXL, + pdev = to_pci_dev(dev); + if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) + is_port = false; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", @@ -1128,26 +1134,24 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) return rc; } -int cxl_gpf_port_setup(struct device *dport_dev, struct cxl_port *port) +int cxl_gpf_port_setup(struct cxl_dport *dport) { - struct pci_dev *pdev; - - if (!port) + if (!dport) return -EINVAL; - if (!port->gpf_dvsec) { + if (!dport->gpf_dvsec) { + struct pci_dev *pdev; int dvsec; - dvsec = cxl_gpf_get_dvsec(dport_dev, true); + dvsec = cxl_gpf_get_dvsec(dport->dport_dev); if (!dvsec) return -EINVAL; - port->gpf_dvsec = dvsec; + dport->gpf_dvsec = dvsec; + pdev = to_pci_dev(dport->dport_dev); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); + update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); } - pdev = to_pci_dev(dport_dev); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 1); - update_gpf_port_dvsec(pdev, port->gpf_dvsec, 2); - return 0; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0fd6646c1a2e8e..726bd4a7de2703 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1678,7 +1678,7 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) if (rc && rc != -EBUSY) return rc; - cxl_gpf_port_setup(dport_dev, port); + cxl_gpf_port_setup(dport); /* Any more ports to add between this one and the root? */ if (!dev_is_cxl_root_child(&port->dev)) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 117c2e94c761d9..5ca7b0eed568b3 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -581,7 +581,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri resource_size_t rcrb = ri->base; void __iomem *addr; u32 bar0, bar1; - u16 cmd; u32 id; if (which == CXL_RCRB_UPSTREAM) @@ -603,7 +602,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri } id = readl(addr + PCI_VENDOR_ID); - cmd = readw(addr + PCI_COMMAND); bar0 = readl(addr + PCI_BASE_ADDRESS_0); bar1 = readl(addr + PCI_BASE_ADDRESS_1); iounmap(addr); @@ -618,8 +616,6 @@ resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri dev_err(dev, "Failed to access Downstream Port RCRB\n"); return CXL_RESOURCE_NONE; } - if (!(cmd & PCI_COMMAND_MEMORY)) - return CXL_RESOURCE_NONE; /* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */ if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO)) return CXL_RESOURCE_NONE; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index be8a7dc77719bd..a9ab46eb061001 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -592,7 +592,6 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds - * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_port { struct device dev; @@ -616,7 +615,6 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; - int gpf_dvsec; }; /** @@ -664,6 +662,7 @@ struct cxl_rcrb_info { * @regs: Dport parsed register blocks * @coord: access coordinates (bandwidth and latency performance attributes) * @link_latency: calculated PCIe downstream latency + * @gpf_dvsec: Cached GPF port DVSEC */ struct cxl_dport { struct device *dport_dev; @@ -675,6 +674,7 @@ struct cxl_dport { struct cxl_regs regs; struct access_coordinate coord[ACCESS_COORDINATE_MAX]; long link_latency; + int gpf_dvsec; }; /** @@ -910,6 +910,6 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #define __mock static #endif -u16 cxl_gpf_get_dvsec(struct device *dev, bool is_port); +u16 cxl_gpf_get_dvsec(struct device *dev); #endif /* __CXL_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 7b14a154463c59..785aa2af5eaac4 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1018,7 +1018,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index d061fe3d2b8662..e197883690efc1 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -108,7 +108,7 @@ static void cxl_nvdimm_arm_dirty_shutdown_tracking(struct cxl_nvdimm *cxl_nvd) return; } - if (!cxl_gpf_get_dvsec(cxlds->dev, false)) + if (!cxl_gpf_get_dvsec(cxlds->dev)) return; if (cxl_get_dirty_count(mds, &count)) { diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5f8d010516f07f..b1ef4546346d44 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -320,8 +320,9 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, count++; dma_resv_list_set(fobj, i, fence, usage); - /* pointer update must be visible before we extend the num_fences */ - smp_store_mb(fobj->num_fences, count); + /* fence update must be visible before we extend the num_fences */ + smp_wmb(); + fobj->num_fences = count; } EXPORT_SYMBOL(dma_resv_add_fence); diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index f5905d67dedbbb..22a808995f106f 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -438,15 +438,17 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a return -EINVAL; pt = dma_fence_to_sync_pt(fence); - if (!pt) - return -EINVAL; + if (!pt) { + ret = -EINVAL; + goto put_fence; + } spin_lock_irqsave(fence->lock, flags); - if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { - data.deadline_ns = ktime_to_ns(pt->deadline); - } else { + if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) { ret = -ENOENT; + goto unlock; } + data.deadline_ns = ktime_to_ns(pt->deadline); spin_unlock_irqrestore(fence->lock, flags); dma_fence_put(fence); @@ -458,6 +460,13 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a return -EFAULT; return 0; + +unlock: + spin_unlock_irqrestore(fence->lock, flags); +put_fence: + dma_fence_put(fence); + + return ret; } static long sw_sync_ioctl(struct file *file, unsigned int cmd, diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index cc7398cc17d67f..e74e36a8ecda21 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -393,7 +393,7 @@ static long udmabuf_create(struct miscdevice *device, if (!ubuf) return -ENOMEM; - pglimit = (size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; + pglimit = ((u64)size_limit_mb * 1024 * 1024) >> PAGE_SHIFT; for (i = 0; i < head->count; i++) { pgoff_t subpgcnt; diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c index 715ac3ae067b85..81339664036f33 100644 --- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c @@ -342,6 +342,9 @@ static void pt_cmd_callback_work(void *data, int err) struct pt_dma_chan *chan; unsigned long flags; + if (!desc) + return; + dma_chan = desc->vd.tx.chan; chan = to_pt_chan(dma_chan); @@ -355,16 +358,14 @@ static void pt_cmd_callback_work(void *data, int err) desc->status = DMA_ERROR; spin_lock_irqsave(&chan->vc.lock, flags); - if (desc) { - if (desc->status != DMA_COMPLETE) { - if (desc->status != DMA_ERROR) - desc->status = DMA_COMPLETE; + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; - dma_cookie_complete(tx_desc); - dma_descriptor_unmap(tx_desc); - } else { - tx_desc = NULL; - } + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + } else { + tx_desc = NULL; } spin_unlock_irqrestore(&chan->vc.lock, flags); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index d891dfca358e20..91b2fbc0b86471 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -841,9 +841,9 @@ static int dmatest_func(void *data) } else { dma_async_issue_pending(chan); - wait_event_timeout(thread->done_wait, - done->done, - msecs_to_jiffies(params->timeout)); + wait_event_freezable_timeout(thread->done_wait, + done->done, + msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 756d67325db526..66bfa28d984e1b 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -57,7 +57,7 @@ static irqreturn_t fsl_edma3_tx_handler(int irq, void *dev_id) intr = edma_readl_chreg(fsl_chan, ch_int); if (!intr) - return IRQ_HANDLED; + return IRQ_NONE; edma_writel_chreg(fsl_chan, 1, ch_int); diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index ff94ee892339d5..6d12033649f817 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -222,7 +222,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) struct idxd_wq *wq; struct device *dev, *fdev; int rc = 0; - struct iommu_sva *sva; + struct iommu_sva *sva = NULL; unsigned int pasid; struct idxd_cdev *idxd_cdev; @@ -317,7 +317,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) if (device_user_pasid_enabled(idxd)) idxd_xa_pasid_remove(ctx); failed_get_pasid: - if (device_user_pasid_enabled(idxd)) + if (device_user_pasid_enabled(idxd) && !IS_ERR_OR_NULL(sva)) iommu_sva_unbind_device(sva); failed: mutex_unlock(&wq->wq_lock); @@ -407,6 +407,9 @@ static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma) if (!idxd->user_submission_safe && !capable(CAP_SYS_RAWIO)) return -EPERM; + if (current->mm != ctx->mm) + return -EPERM; + rc = check_vma(wq, vma, __func__); if (rc < 0) return rc; @@ -473,6 +476,9 @@ static ssize_t idxd_cdev_write(struct file *filp, const char __user *buf, size_t ssize_t written = 0; int i; + if (current->mm != ctx->mm) + return -EPERM; + for (i = 0; i < len/sizeof(struct dsa_hw_desc); i++) { int rc = idxd_submit_user_descriptor(ctx, udesc + i); @@ -493,6 +499,9 @@ static __poll_t idxd_cdev_poll(struct file *filp, struct idxd_device *idxd = wq->idxd; __poll_t out = 0; + if (current->mm != ctx->mm) + return POLLNVAL; + poll_wait(filp, &wq->err_queue, wait); spin_lock(&idxd->dev_lock); if (idxd->sw_err.valid) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index fca1d29249998d..760b7d81fcd846 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -155,6 +155,25 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) pci_free_irq_vectors(pdev); } +static void idxd_clean_wqs(struct idxd_device *idxd) +{ + struct idxd_wq *wq; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_wqs; i++) { + wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); + conf_dev = wq_confdev(wq); + put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + kfree(idxd->wqs); +} + static int idxd_setup_wqs(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -169,8 +188,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev)); if (!idxd->wq_enable_map) { - kfree(idxd->wqs); - return -ENOMEM; + rc = -ENOMEM; + goto err_bitmap; } for (i = 0; i < idxd->max_wqs; i++) { @@ -189,10 +208,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) conf_dev->bus = &dsa_bus_type; conf_dev->type = &idxd_wq_device_type; rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id); - if (rc < 0) { - put_device(conf_dev); + if (rc < 0) goto err; - } mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); @@ -203,7 +220,6 @@ static int idxd_setup_wqs(struct idxd_device *idxd) wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); if (!wq->wqcfg) { - put_device(conf_dev); rc = -ENOMEM; goto err; } @@ -211,9 +227,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) if (idxd->hw.wq_cap.op_config) { wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL); if (!wq->opcap_bmap) { - put_device(conf_dev); rc = -ENOMEM; - goto err; + goto err_opcap_bmap; } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } @@ -224,15 +239,46 @@ static int idxd_setup_wqs(struct idxd_device *idxd) return 0; - err: +err_opcap_bmap: + kfree(wq->wqcfg); + +err: + put_device(conf_dev); + kfree(wq); + while (--i >= 0) { wq = idxd->wqs[i]; + if (idxd->hw.wq_cap.op_config) + bitmap_free(wq->opcap_bmap); + kfree(wq->wqcfg); conf_dev = wq_confdev(wq); put_device(conf_dev); + kfree(wq); + } + bitmap_free(idxd->wq_enable_map); + +err_bitmap: + kfree(idxd->wqs); + return rc; } +static void idxd_clean_engines(struct idxd_device *idxd) +{ + struct idxd_engine *engine; + struct device *conf_dev; + int i; + + for (i = 0; i < idxd->max_engines; i++) { + engine = idxd->engines[i]; + conf_dev = engine_confdev(engine); + put_device(conf_dev); + kfree(engine); + } + kfree(idxd->engines); +} + static int idxd_setup_engines(struct idxd_device *idxd) { struct idxd_engine *engine; @@ -263,6 +309,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { put_device(conf_dev); + kfree(engine); goto err; } @@ -276,10 +323,26 @@ static int idxd_setup_engines(struct idxd_device *idxd) engine = idxd->engines[i]; conf_dev = engine_confdev(engine); put_device(conf_dev); + kfree(engine); } + kfree(idxd->engines); + return rc; } +static void idxd_clean_groups(struct idxd_device *idxd) +{ + struct idxd_group *group; + int i; + + for (i = 0; i < idxd->max_groups; i++) { + group = idxd->groups[i]; + put_device(group_confdev(group)); + kfree(group); + } + kfree(idxd->groups); +} + static int idxd_setup_groups(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -310,6 +373,7 @@ static int idxd_setup_groups(struct idxd_device *idxd) rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id); if (rc < 0) { put_device(conf_dev); + kfree(group); goto err; } @@ -334,20 +398,18 @@ static int idxd_setup_groups(struct idxd_device *idxd) while (--i >= 0) { group = idxd->groups[i]; put_device(group_confdev(group)); + kfree(group); } + kfree(idxd->groups); + return rc; } static void idxd_cleanup_internals(struct idxd_device *idxd) { - int i; - - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_groups(idxd); + idxd_clean_engines(idxd); + idxd_clean_wqs(idxd); destroy_workqueue(idxd->wq); } @@ -390,7 +452,7 @@ static int idxd_init_evl(struct idxd_device *idxd) static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; - int rc, i; + int rc; init_waitqueue_head(&idxd->cmd_waitq); @@ -421,14 +483,11 @@ static int idxd_setup_internals(struct idxd_device *idxd) err_evl: destroy_workqueue(idxd->wq); err_wkq_create: - for (i = 0; i < idxd->max_groups; i++) - put_device(group_confdev(idxd->groups[i])); + idxd_clean_groups(idxd); err_group: - for (i = 0; i < idxd->max_engines; i++) - put_device(engine_confdev(idxd->engines[i])); + idxd_clean_engines(idxd); err_engine: - for (i = 0; i < idxd->max_wqs; i++) - put_device(wq_confdev(idxd->wqs[i])); + idxd_clean_wqs(idxd); err_wqs: return rc; } @@ -528,6 +587,17 @@ static void idxd_read_caps(struct idxd_device *idxd) idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET); } +static void idxd_free(struct idxd_device *idxd) +{ + if (!idxd) + return; + + put_device(idxd_confdev(idxd)); + bitmap_free(idxd->opcap_bmap); + ida_free(&idxd_ida, idxd->id); + kfree(idxd); +} + static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) { struct device *dev = &pdev->dev; @@ -545,28 +615,34 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type); idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL); if (idxd->id < 0) - return NULL; + goto err_ida; idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev)); - if (!idxd->opcap_bmap) { - ida_free(&idxd_ida, idxd->id); - return NULL; - } + if (!idxd->opcap_bmap) + goto err_opcap; device_initialize(conf_dev); conf_dev->parent = dev; conf_dev->bus = &dsa_bus_type; conf_dev->type = idxd->data->dev_type; rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id); - if (rc < 0) { - put_device(conf_dev); - return NULL; - } + if (rc < 0) + goto err_name; spin_lock_init(&idxd->dev_lock); spin_lock_init(&idxd->cmd_lock); return idxd; + +err_name: + put_device(conf_dev); + bitmap_free(idxd->opcap_bmap); +err_opcap: + ida_free(&idxd_ida, idxd->id); +err_ida: + kfree(idxd); + + return NULL; } static int idxd_enable_system_pasid(struct idxd_device *idxd) @@ -1190,7 +1266,7 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev, err: pci_iounmap(pdev, idxd->reg_base); err_iomap: - put_device(idxd_confdev(idxd)); + idxd_free(idxd); err_idxd_alloc: pci_disable_device(pdev); return rc; @@ -1232,7 +1308,6 @@ static void idxd_shutdown(struct pci_dev *pdev) static void idxd_remove(struct pci_dev *pdev) { struct idxd_device *idxd = pci_get_drvdata(pdev); - struct idxd_irq_entry *irq_entry; idxd_unregister_devices(idxd); /* @@ -1245,20 +1320,12 @@ static void idxd_remove(struct pci_dev *pdev) get_device(idxd_confdev(idxd)); device_unregister(idxd_confdev(idxd)); idxd_shutdown(pdev); - if (device_pasid_enabled(idxd)) - idxd_disable_system_pasid(idxd); idxd_device_remove_debugfs(idxd); - - irq_entry = idxd_get_ie(idxd, 0); - free_irq(irq_entry->vector, irq_entry); - pci_free_irq_vectors(pdev); + idxd_cleanup(idxd); pci_iounmap(pdev, idxd->reg_base); - if (device_user_pasid_enabled(idxd)) - idxd_disable_sva(pdev); - pci_disable_device(pdev); - destroy_workqueue(idxd->wq); - perfmon_pmu_remove(idxd); put_device(idxd_confdev(idxd)); + idxd_free(idxd); + pci_disable_device(pdev); } static struct pci_driver idxd_pci_driver = { diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c index d5ddb4e30e7150..47c8adfdc15504 100644 --- a/drivers/dma/mediatek/mtk-cqdma.c +++ b/drivers/dma/mediatek/mtk-cqdma.c @@ -420,15 +420,11 @@ static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c, { struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c); struct virt_dma_desc *vd; - unsigned long flags; - spin_lock_irqsave(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->pc->queue, node) if (vd->tx.cookie == cookie) { - spin_unlock_irqrestore(&cvc->pc->lock, flags); return vd; } - spin_unlock_irqrestore(&cvc->pc->lock, flags); list_for_each_entry(vd, &cvc->vc.desc_issued, node) if (vd->tx.cookie == cookie) @@ -452,9 +448,11 @@ static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c, if (ret == DMA_COMPLETE || !txstate) return ret; + spin_lock_irqsave(&cvc->pc->lock, flags); spin_lock_irqsave(&cvc->vc.lock, flags); vd = mtk_cqdma_find_active_desc(c, cookie); spin_unlock_irqrestore(&cvc->vc.lock, flags); + spin_unlock_irqrestore(&cvc->pc->lock, flags); if (vd) { cvd = to_cqdma_vdesc(vd); diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index b223a7aacb0cf1..aa2dc762140f6e 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -1091,8 +1091,11 @@ static void udma_check_tx_completion(struct work_struct *work) u32 residue_diff; ktime_t time_diff; unsigned long delay; + unsigned long flags; while (1) { + spin_lock_irqsave(&uc->vc.lock, flags); + if (uc->desc) { /* Get previous residue and time stamp */ residue_diff = uc->tx_drain.residue; @@ -1127,6 +1130,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + spin_unlock_irqrestore(&uc->vc.lock, flags); + usleep_range(ktime_to_us(delay), ktime_to_us(delay) + 10); continue; @@ -1143,6 +1148,8 @@ static void udma_check_tx_completion(struct work_struct *work) break; } + + spin_unlock_irqrestore(&uc->vc.lock, flags); } static irqreturn_t udma_ring_irq_handler(int irq, void *data) @@ -4246,7 +4253,6 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { struct udma_dev *ud = ofdma->of_dma_data; - dma_cap_mask_t mask = ud->ddev.cap_mask; struct udma_filter_param filter_param; struct dma_chan *chan; @@ -4278,7 +4284,7 @@ static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec, } } - chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param, + chan = __dma_request_channel(&ud->ddev.cap_mask, udma_dma_filter_fn, &filter_param, ofdma->of_node); if (!chan) { dev_err(ud->dev, "get channel fail in %s.\n", __func__); @@ -5618,7 +5624,8 @@ static int udma_probe(struct platform_device *pdev) uc->config.dir = DMA_MEM_TO_MEM; uc->name = devm_kasprintf(dev, GFP_KERNEL, "%s chan%d", dev_name(dev), i); - + if (!uc->name) + return -ENOMEM; vchan_init(&uc->vc, &ud->ddev); /* Use custom vchan completion handling */ tasklet_setup(&uc->vc.task, udma_vchan_complete); diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3e971f90236330..dcd7008fe06b05 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -99,7 +99,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) if (status & priv->ecc_stat_ce_mask) { regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset, &err_addr); - if (priv->ecc_uecnt_offset) + if (priv->ecc_cecnt_offset) regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset, &err_count); edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count, @@ -1005,9 +1005,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } } - /* Interrupt mode set to every SBERR */ - regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST, - ALTR_A10_ECC_INTMODE); /* Enable ECC */ ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base + ALTR_A10_ECC_CTRL_OFST)); @@ -2127,6 +2124,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return PTR_ERR(edac->ecc_mgr_map); } + /* Set irq mask for DDR SBE to avoid any pending irq before registration */ + regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST, + (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0)); + edac->irq_chip.name = pdev->dev.of_node->name; edac->irq_chip.irq_mask = a10_eccmgr_irq_mask; edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask; diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 3727e72c8c2e70..7248d24c4908d7 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -249,6 +249,8 @@ struct altr_sdram_mc_data { #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 #define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 #define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1) +#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16) +#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17) #define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C #define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c index 4942a240c30f25..ae3bb7afa103eb 100644 --- a/drivers/edac/bluefield_edac.c +++ b/drivers/edac/bluefield_edac.c @@ -199,8 +199,10 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, * error without the detailed information. */ err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); - if (err) + if (err) { dev_err(priv->dev, "DRAM syndrom read failed.\n"); + return; + } serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); @@ -213,20 +215,26 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, } err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); - if (err) + if (err) { dev_err(priv->dev, "DRAM additional info read failed.\n"); + return; + } err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); - if (err) + if (err) { dev_err(priv->dev, "Error addr 0 read failed.\n"); + return; + } err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); - if (err) + if (err) { dev_err(priv->dev, "Error addr 1 read failed.\n"); + return; + } ecc_dimm_addr = ((u64)edea1 << 32) | edea0; @@ -250,8 +258,10 @@ static void bluefield_edac_check(struct mem_ctl_info *mci) return; err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); - if (err) + if (err) { dev_err(priv->dev, "ECC count read failed.\n"); + return; + } single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 355a977019e944..355b527d839e78 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -95,7 +95,7 @@ static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, +static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl, u32 *offsets_scrub, u32 *offsets_demand, u32 *offsets_demand2) { @@ -108,10 +108,10 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable if (enable) { /* Save default configurations */ - imc->chan[chan].retry_rd_err_log_s = s; - imc->chan[chan].retry_rd_err_log_d = d; + rrl_ctl[0] = s; + rrl_ctl[1] = d; if (offsets_demand2) - imc->chan[chan].retry_rd_err_log_d2 = d2; + rrl_ctl[2] = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; s |= RETRY_RD_ERR_LOG_EN; @@ -125,25 +125,25 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable } } else { /* Restore default configurations */ - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC) s |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) + if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER) s |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN)) s &= ~RETRY_RD_ERR_LOG_EN; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC) d |= RETRY_RD_ERR_LOG_UC; - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER) d |= RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; if (offsets_demand2) { - if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) + if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC) d2 |= RETRY_RD_ERR_LOG_UC; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) + if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER)) d2 &= ~RETRY_RD_ERR_LOG_NOOVER; - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) + if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN)) d2 &= ~RETRY_RD_ERR_LOG_EN; } } @@ -157,6 +157,7 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable static void enable_retry_rd_err_log(bool enable) { int i, j, imc_num, chan_num; + struct skx_channel *chan; struct skx_imc *imc; struct skx_dev *d; @@ -171,8 +172,9 @@ static void enable_retry_rd_err_log(bool enable) if (!imc->mbase) continue; + chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], res_cfg->offsets_scrub, res_cfg->offsets_demand, res_cfg->offsets_demand2); @@ -186,12 +188,13 @@ static void enable_retry_rd_err_log(bool enable) if (!imc->mbase || !imc->hbm_mc) continue; + chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) { - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], res_cfg->offsets_scrub_hbm0, res_cfg->offsets_demand_hbm0, NULL); - __enable_retry_rd_err_log(imc, j, enable, + __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1], res_cfg->offsets_scrub_hbm1, res_cfg->offsets_demand_hbm1, NULL); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index fa5b442b184499..c9ade45c1a99f3 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -116,6 +116,7 @@ EXPORT_SYMBOL_GPL(skx_adxl_get); void skx_adxl_put(void) { + adxl_component_count = 0; kfree(adxl_values); kfree(adxl_msg); } diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index ca5408803f8787..5afd425f3b4ff1 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -79,6 +79,9 @@ */ #define MCACOD_EXT_MEM_ERR 0x280 +/* Max RRL register sets per {,sub-,pseudo-}channel. */ +#define NUM_RRL_SET 3 + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two @@ -117,9 +120,11 @@ struct skx_dev { struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; - u32 retry_rd_err_log_s; - u32 retry_rd_err_log_d; - u32 retry_rd_err_log_d2; + /* + * Two groups of RRL control registers per channel to save default RRL + * settings of two {sub-,pseudo-}channels in Linux RRL control mode. + */ + u32 rrl_ctl[2][NUM_RRL_SET]; struct skx_dimm { u8 close_pg; u8 bank_xor_enable; diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index aadc395ee16813..7df19d82aa689e 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -31,7 +31,6 @@ config ARM_SCPI_PROTOCOL config ARM_SDE_INTERFACE bool "ARM Software Delegated Exception Interface (SDEI)" depends on ARM64 - depends on ACPI_APEI_GHES help The Software Delegated Exception Interface (SDEI) is an ARM standard for registering callbacks from the platform firmware diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 19295282de2406..fe55613a8ea993 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -299,7 +299,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, import_uuid(&buf->uuid, (u8 *)&rx_buf->uuid); } - ffa_rx_release(); + if (!(flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)) + ffa_rx_release(); mutex_unlock(&drv_info->rx_lock); diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index 7af01664ce7e2f..3a5474015f7dfd 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -255,6 +255,9 @@ static struct scmi_device *scmi_child_dev_find(struct device *parent, if (!dev) return NULL; + /* Drop the refcnt bumped implicitly by device_find_child */ + put_device(dev); + return to_scmi_dev(dev); } diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1c75a4c9c37166..0390d5ff195ec0 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1248,7 +1248,8 @@ static void xfer_put(const struct scmi_protocol_handle *ph, } static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, - struct scmi_xfer *xfer, ktime_t stop) + struct scmi_xfer *xfer, ktime_t stop, + bool *ooo) { struct scmi_info *info = handle_to_scmi_info(cinfo->handle); @@ -1257,7 +1258,7 @@ static bool scmi_xfer_done_no_timeout(struct scmi_chan_info *cinfo, * in case of out-of-order receptions of delayed responses */ return info->desc->ops->poll_done(cinfo, xfer) || - try_wait_for_completion(&xfer->done) || + (*ooo = try_wait_for_completion(&xfer->done)) || ktime_after(ktime_get(), stop); } @@ -1274,15 +1275,17 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, * itself to support synchronous commands replies. */ if (!desc->sync_cmds_completed_on_ret) { + bool ooo = false; + /* * Poll on xfer using transport provided .poll_done(); * assumes no completion interrupt was available. */ ktime_t stop = ktime_add_ms(ktime_get(), timeout_ms); - spin_until_cond(scmi_xfer_done_no_timeout(cinfo, - xfer, stop)); - if (ktime_after(ktime_get(), stop)) { + spin_until_cond(scmi_xfer_done_no_timeout(cinfo, xfer, + stop, &ooo)); + if (!ooo && !info->desc->ops->poll_done(cinfo, xfer)) { dev_err(dev, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 3e8051fe829657..71e2a9a89f6ada 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1062,13 +1062,12 @@ static bool __init sdei_present_acpi(void) return true; } -void __init sdei_init(void) +void __init acpi_sdei_init(void) { struct platform_device *pdev; int ret; - ret = platform_driver_register(&sdei_driver); - if (ret || !sdei_present_acpi()) + if (!sdei_present_acpi()) return; pdev = platform_device_register_simple(sdei_driver.driver.name, @@ -1081,6 +1080,12 @@ void __init sdei_init(void) } } +static int __init sdei_init(void) +{ + return platform_driver_register(&sdei_driver); +} +arch_initcall(sdei_init); + int sdei_event_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index 0a883091259a2c..e3c2e38b746df9 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -6,14 +6,11 @@ config FW_CS_DSP config FW_CS_DSP_KUNIT_TEST_UTILS tristate - depends on KUNIT && REGMAP - select FW_CS_DSP config FW_CS_DSP_KUNIT_TEST tristate "KUnit tests for Cirrus Logic cs_dsp" if !KUNIT_ALL_TESTS - depends on KUNIT && REGMAP + depends on KUNIT && REGMAP && FW_CS_DSP default KUNIT_ALL_TESTS - select FW_CS_DSP select FW_CS_DSP_KUNIT_TEST_UTILS help This builds KUnit tests for cs_dsp. diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c index 161272e47bdabc..73412bcef50c50 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c @@ -461,36 +461,6 @@ unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *pri } EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_get_alg_base_in_words, "FW_CS_DSP_KUNIT_TEST_UTILS"); -/** - * cs_dsp_mock_xm_header_get_fw_version_from_regmap() - Firmware version. - * - * @priv: Pointer to struct cs_dsp_test. - * - * Return: Firmware version word value. - */ -unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv) -{ - unsigned int xm = cs_dsp_mock_base_addr_for_mem(priv, WMFW_ADSP2_XM); - union { - struct wmfw_id_hdr adsp2; - struct wmfw_v3_id_hdr halo; - } hdr; - - switch (priv->dsp->type) { - case WMFW_ADSP2: - regmap_raw_read(priv->dsp->regmap, xm, &hdr.adsp2, sizeof(hdr.adsp2)); - return be32_to_cpu(hdr.adsp2.ver); - case WMFW_HALO: - regmap_raw_read(priv->dsp->regmap, xm, &hdr.halo, sizeof(hdr.halo)); - return be32_to_cpu(hdr.halo.ver); - default: - KUNIT_FAIL(priv->test, NULL); - return 0; - } -} -EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_get_fw_version_from_regmap, - "FW_CS_DSP_KUNIT_TEST_UTILS"); - /** * cs_dsp_mock_xm_header_get_fw_version() - Firmware version. * diff --git a/drivers/firmware/cirrus/test/cs_dsp_test_bin.c b/drivers/firmware/cirrus/test/cs_dsp_test_bin.c index 1e161bbc5b4a46..163b7faecff466 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_test_bin.c +++ b/drivers/firmware/cirrus/test/cs_dsp_test_bin.c @@ -2198,7 +2198,7 @@ static int cs_dsp_bin_test_common_init(struct kunit *test, struct cs_dsp *dsp) priv->local->bin_builder = cs_dsp_mock_bin_init(priv, 1, - cs_dsp_mock_xm_header_get_fw_version_from_regmap(priv)); + cs_dsp_mock_xm_header_get_fw_version(xm_hdr)); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->local->bin_builder); /* We must provide a dummy wmfw to load */ diff --git a/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c b/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c index 8748874f055247..a7ec956d27249e 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c +++ b/drivers/firmware/cirrus/test/cs_dsp_test_bin_error.c @@ -451,7 +451,7 @@ static int cs_dsp_bin_err_test_common_init(struct kunit *test, struct cs_dsp *ds local->bin_builder = cs_dsp_mock_bin_init(priv, 1, - cs_dsp_mock_xm_header_get_fw_version_from_regmap(priv)); + cs_dsp_mock_xm_header_get_fw_version(local->xm_header)); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, local->bin_builder); /* Init cs_dsp */ diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index fd6dc790c5a89d..7aa2f9ad293562 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -601,6 +601,7 @@ efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image, * @image: EFI loaded image protocol * @soft_limit: preferred address for loading the initrd * @hard_limit: upper limit address for loading the initrd + * @out: pointer to store the address of the initrd table * * Return: status code */ diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index a1ebbe9b73b136..38ca190d4a22d6 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -804,8 +804,10 @@ int __init psci_dt_init(void) np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - if (!np || !of_device_is_available(np)) + if (!np || !of_device_is_available(np)) { + of_node_put(np); return -ENODEV; + } init_fn = (psci_initcall_t)matched_np->data; ret = init_fn(np); diff --git a/drivers/firmware/samsung/exynos-acpm-pmic.c b/drivers/firmware/samsung/exynos-acpm-pmic.c index 85e90d236da21e..39b33a356ebd24 100644 --- a/drivers/firmware/samsung/exynos-acpm-pmic.c +++ b/drivers/firmware/samsung/exynos-acpm-pmic.c @@ -43,13 +43,13 @@ static inline u32 acpm_pmic_get_bulk(u32 data, unsigned int i) return (data >> (ACPM_PMIC_BULK_SHIFT * i)) & ACPM_PMIC_BULK_MASK; } -static void acpm_pmic_set_xfer(struct acpm_xfer *xfer, u32 *cmd, +static void acpm_pmic_set_xfer(struct acpm_xfer *xfer, u32 *cmd, size_t cmdlen, unsigned int acpm_chan_id) { xfer->txd = cmd; xfer->rxd = cmd; - xfer->txlen = sizeof(cmd); - xfer->rxlen = sizeof(cmd); + xfer->txlen = cmdlen; + xfer->rxlen = cmdlen; xfer->acpm_chan_id = acpm_chan_id; } @@ -71,7 +71,7 @@ int acpm_pmic_read_reg(const struct acpm_handle *handle, int ret; acpm_pmic_init_read_cmd(cmd, type, reg, chan); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -104,7 +104,7 @@ int acpm_pmic_bulk_read(const struct acpm_handle *handle, return -EINVAL; acpm_pmic_init_bulk_read_cmd(cmd, type, reg, chan, count); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -144,7 +144,7 @@ int acpm_pmic_write_reg(const struct acpm_handle *handle, int ret; acpm_pmic_init_write_cmd(cmd, type, reg, chan, value); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -184,7 +184,7 @@ int acpm_pmic_bulk_write(const struct acpm_handle *handle, return -EINVAL; acpm_pmic_init_bulk_write_cmd(cmd, type, reg, chan, count, buf); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) @@ -214,7 +214,7 @@ int acpm_pmic_update_reg(const struct acpm_handle *handle, int ret; acpm_pmic_init_update_cmd(cmd, type, reg, chan, value, mask); - acpm_pmic_set_xfer(&xfer, cmd, acpm_chan_id); + acpm_pmic_set_xfer(&xfer, cmd, sizeof(cmd), acpm_chan_id); ret = acpm_do_xfer(handle, &xfer); if (ret) diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c index a85b2dbdd9f0d7..e80cb7a8da8f23 100644 --- a/drivers/firmware/samsung/exynos-acpm.c +++ b/drivers/firmware/samsung/exynos-acpm.c @@ -184,6 +184,29 @@ struct acpm_match_data { #define client_to_acpm_chan(c) container_of(c, struct acpm_chan, cl) #define handle_to_acpm_info(h) container_of(h, struct acpm_info, handle) +/** + * acpm_get_saved_rx() - get the response if it was already saved. + * @achan: ACPM channel info. + * @xfer: reference to the transfer to get response for. + * @tx_seqnum: xfer TX sequence number. + */ +static void acpm_get_saved_rx(struct acpm_chan *achan, + const struct acpm_xfer *xfer, u32 tx_seqnum) +{ + const struct acpm_rx_data *rx_data = &achan->rx_data[tx_seqnum - 1]; + u32 rx_seqnum; + + if (!rx_data->response) + return; + + rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, rx_data->cmd[0]); + + if (rx_seqnum == tx_seqnum) { + memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen); + clear_bit(rx_seqnum - 1, achan->bitmap_seqnum); + } +} + /** * acpm_get_rx() - get response from RX queue. * @achan: ACPM channel info. @@ -204,15 +227,16 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) rx_front = readl(achan->rx.front); i = readl(achan->rx.rear); - /* Bail out if RX is empty. */ - if (i == rx_front) + tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]); + + if (i == rx_front) { + acpm_get_saved_rx(achan, xfer, tx_seqnum); return 0; + } base = achan->rx.base; mlen = achan->mlen; - tx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]); - /* Drain RX queue. */ do { /* Read RX seqnum. */ @@ -259,16 +283,8 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer) * If the response was not in this iteration of the queue, check if the * RX data was previously saved. */ - rx_data = &achan->rx_data[tx_seqnum - 1]; - if (!rx_set && rx_data->response) { - rx_seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, - rx_data->cmd[0]); - - if (rx_seqnum == tx_seqnum) { - memcpy(xfer->rxd, rx_data->cmd, xfer->rxlen); - clear_bit(rx_seqnum - 1, achan->bitmap_seqnum); - } - } + if (!rx_set) + acpm_get_saved_rx(achan, xfer, tx_seqnum); return 0; } @@ -680,24 +696,17 @@ static const struct acpm_handle *acpm_get_by_phandle(struct device *dev, return ERR_PTR(-ENODEV); pdev = of_find_device_by_node(acpm_np); - if (!pdev) { - dev_err(dev, "Cannot find device node %s\n", acpm_np->name); - of_node_put(acpm_np); - return ERR_PTR(-EPROBE_DEFER); - } - of_node_put(acpm_np); + if (!pdev) + return ERR_PTR(-EPROBE_DEFER); acpm = platform_get_drvdata(pdev); if (!acpm) { - dev_err(dev, "Cannot get drvdata from %s\n", - dev_name(&pdev->dev)); platform_device_put(pdev); return ERR_PTR(-EPROBE_DEFER); } if (!try_module_get(pdev->dev.driver->owner)) { - dev_err(dev, "Cannot get module reference.\n"); platform_device_put(pdev); return ERR_PTR(-EPROBE_DEFER); } diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c index 5767aed25cdc05..a123c05cbc9e66 100644 --- a/drivers/firmware/smccc/kvm_guest.c +++ b/drivers/firmware/smccc/kvm_guest.c @@ -95,7 +95,7 @@ void __init kvm_arm_target_impl_cpu_init(void) for (i = 0; i < max_cpus; i++) { arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID, - i, &res); + i, 0, 0, &res); if (res.a0 != SMCCC_RET_SUCCESS) { pr_warn("Discovering target implementation CPUs failed\n"); goto mem_free; @@ -103,7 +103,7 @@ void __init kvm_arm_target_impl_cpu_init(void) target[i].midr = res.a1; target[i].revidr = res.a2; target[i].aidr = res.a3; - }; + } if (!cpu_errata_set_target_impl(max_cpus, target)) { pr_warn("Failed to set target implementation CPUs\n"); diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 3c52cb73237a43..e3f990d888d718 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -1224,22 +1224,28 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev) if (!svc->intel_svc_fcs) { dev_err(dev, "failed to allocate %s device\n", INTEL_FCS); ret = -ENOMEM; - goto err_unregister_dev; + goto err_unregister_rsu_dev; } ret = platform_device_add(svc->intel_svc_fcs); if (ret) { platform_device_put(svc->intel_svc_fcs); - goto err_unregister_dev; + goto err_unregister_rsu_dev; } + ret = of_platform_default_populate(dev_of_node(dev), NULL, dev); + if (ret) + goto err_unregister_fcs_dev; + dev_set_drvdata(dev, svc); pr_info("Intel Service Layer Driver Initialized\n"); return 0; -err_unregister_dev: +err_unregister_fcs_dev: + platform_device_unregister(svc->intel_svc_fcs); +err_unregister_rsu_dev: platform_device_unregister(svc->stratix10_svc_rsu); err_free_kfifo: kfifo_free(&controller->svc_fifo); @@ -1253,6 +1259,8 @@ static void stratix10_svc_drv_remove(struct platform_device *pdev) struct stratix10_svc *svc = dev_get_drvdata(&pdev->dev); struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev); + of_platform_depopulate(ctrl->dev); + platform_device_unregister(svc->intel_svc_fcs); platform_device_unregister(svc->stratix10_svc_rsu); diff --git a/drivers/fpga/tests/fpga-bridge-test.c b/drivers/fpga/tests/fpga-bridge-test.c index b9ab29809e9619..124ba40e32b18c 100644 --- a/drivers/fpga/tests/fpga-bridge-test.c +++ b/drivers/fpga/tests/fpga-bridge-test.c @@ -170,4 +170,5 @@ static struct kunit_suite fpga_bridge_suite = { kunit_test_suite(fpga_bridge_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Bridge"); MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/tests/fpga-mgr-test.c b/drivers/fpga/tests/fpga-mgr-test.c index 9cb37aefbac4b2..62975a39ee14e4 100644 --- a/drivers/fpga/tests/fpga-mgr-test.c +++ b/drivers/fpga/tests/fpga-mgr-test.c @@ -263,6 +263,7 @@ static void fpga_mgr_test_img_load_sgt(struct kunit *test) img_buf = init_test_buffer(test, IMAGE_SIZE); sgt = kunit_kzalloc(test, sizeof(*sgt), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); ret = sg_alloc_table(sgt, 1, GFP_KERNEL); KUNIT_ASSERT_EQ(test, ret, 0); sg_init_one(sgt->sgl, img_buf, IMAGE_SIZE); @@ -330,4 +331,5 @@ static struct kunit_suite fpga_mgr_suite = { kunit_test_suite(fpga_mgr_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Manager"); MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/tests/fpga-region-test.c b/drivers/fpga/tests/fpga-region-test.c index 6a108cafded863..020ceac48509fa 100644 --- a/drivers/fpga/tests/fpga-region-test.c +++ b/drivers/fpga/tests/fpga-region-test.c @@ -214,4 +214,5 @@ static struct kunit_suite fpga_region_suite = { kunit_test_suite(fpga_region_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Region"); MODULE_LICENSE("GPL"); diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c index cb1ac9c4023938..bc6378506296cd 100644 --- a/drivers/fwctl/main.c +++ b/drivers/fwctl/main.c @@ -105,7 +105,7 @@ static int fwctl_cmd_rpc(struct fwctl_ucmd *ucmd) if (!test_and_set_bit(0, &fwctl_tainted)) { dev_warn( &fwctl->dev, - "%s(%d): has requested full access to the physical device device", + "%s(%d): has requested full access to the physical device", current->comm, task_pid_nr(current)); add_taint(TAINT_FWCTL, LOCKDEP_STILL_OK); } diff --git a/drivers/fwctl/pds/main.c b/drivers/fwctl/pds/main.c index 284c4165fdd435..9b9d1f6b55566c 100644 --- a/drivers/fwctl/pds/main.c +++ b/drivers/fwctl/pds/main.c @@ -105,12 +105,14 @@ static int pdsfc_identify(struct pdsfc_dev *pdsfc) static void pdsfc_free_endpoints(struct pdsfc_dev *pdsfc) { struct device *dev = &pdsfc->fwctl.dev; + u32 num_endpoints; int i; if (!pdsfc->endpoints) return; - for (i = 0; pdsfc->endpoint_info && i < pdsfc->endpoints->num_entries; i++) + num_endpoints = le32_to_cpu(pdsfc->endpoints->num_entries); + for (i = 0; pdsfc->endpoint_info && i < num_endpoints; i++) mutex_destroy(&pdsfc->endpoint_info[i].lock); vfree(pdsfc->endpoint_info); pdsfc->endpoint_info = NULL; @@ -199,7 +201,7 @@ static int pdsfc_init_endpoints(struct pdsfc_dev *pdsfc) ep_entry = (struct pds_fwctl_query_data_endpoint *)pdsfc->endpoints->entries; for (i = 0; i < num_endpoints; i++) { mutex_init(&pdsfc->endpoint_info[i].lock); - pdsfc->endpoint_info[i].endpoint = ep_entry[i].id; + pdsfc->endpoint_info[i].endpoint = le32_to_cpu(ep_entry[i].id); } return 0; @@ -214,6 +216,7 @@ static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc struct pds_fwctl_query_data *data; union pds_core_adminq_cmd cmd; dma_addr_t data_pa; + u32 num_entries; int err; int i; @@ -246,8 +249,9 @@ static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc *pa = data_pa; entries = (struct pds_fwctl_query_data_operation *)data->entries; - dev_dbg(dev, "num_entries %d\n", data->num_entries); - for (i = 0; i < data->num_entries; i++) { + num_entries = le32_to_cpu(data->num_entries); + dev_dbg(dev, "num_entries %d\n", num_entries); + for (i = 0; i < num_entries; i++) { /* Translate FW command attribute to fwctl scope */ switch (entries[i].scope) { @@ -267,7 +271,7 @@ static struct pds_fwctl_query_data *pdsfc_get_operations(struct pdsfc_dev *pdsfc break; } dev_dbg(dev, "endpoint %d operation: id %x scope %d\n", - ep, entries[i].id, entries[i].scope); + ep, le32_to_cpu(entries[i].id), entries[i].scope); } return data; @@ -280,24 +284,26 @@ static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc, struct pds_fwctl_query_data_operation *op_entry; struct pdsfc_rpc_endpoint_info *ep_info = NULL; struct device *dev = &pdsfc->fwctl.dev; + u32 num_entries; int i; /* validate rpc in_len & out_len based * on ident.max_req_sz & max_resp_sz */ - if (rpc->in.len > pdsfc->ident.max_req_sz) { + if (rpc->in.len > le32_to_cpu(pdsfc->ident.max_req_sz)) { dev_dbg(dev, "Invalid request size %u, max %u\n", - rpc->in.len, pdsfc->ident.max_req_sz); + rpc->in.len, le32_to_cpu(pdsfc->ident.max_req_sz)); return -EINVAL; } - if (rpc->out.len > pdsfc->ident.max_resp_sz) { + if (rpc->out.len > le32_to_cpu(pdsfc->ident.max_resp_sz)) { dev_dbg(dev, "Invalid response size %u, max %u\n", - rpc->out.len, pdsfc->ident.max_resp_sz); + rpc->out.len, le32_to_cpu(pdsfc->ident.max_resp_sz)); return -EINVAL; } - for (i = 0; i < pdsfc->endpoints->num_entries; i++) { + num_entries = le32_to_cpu(pdsfc->endpoints->num_entries); + for (i = 0; i < num_entries; i++) { if (pdsfc->endpoint_info[i].endpoint == rpc->in.ep) { ep_info = &pdsfc->endpoint_info[i]; break; @@ -326,8 +332,9 @@ static int pdsfc_validate_rpc(struct pdsfc_dev *pdsfc, /* reject unsupported and/or out of scope commands */ op_entry = (struct pds_fwctl_query_data_operation *)ep_info->operations->entries; - for (i = 0; i < ep_info->operations->num_entries; i++) { - if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, op_entry[i].id)) { + num_entries = le32_to_cpu(ep_info->operations->num_entries); + for (i = 0; i < num_entries; i++) { + if (PDS_FWCTL_RPC_OPCODE_CMP(rpc->in.op, le32_to_cpu(op_entry[i].id))) { if (scope < op_entry[i].scope) return -EPERM; return 0; @@ -402,7 +409,7 @@ static void *pdsfc_fw_rpc(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, cmd = (union pds_core_adminq_cmd) { .fwctl_rpc = { .opcode = PDS_FWCTL_CMD_RPC, - .flags = PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP, + .flags = cpu_to_le16(PDS_FWCTL_RPC_IND_REQ | PDS_FWCTL_RPC_IND_RESP), .ep = cpu_to_le32(rpc->in.ep), .op = cpu_to_le32(rpc->in.op), .req_pa = cpu_to_le64(in_payload_dma_addr), diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO index b5f0a7a2e1bf14..4b70cbaa1caacd 100644 --- a/drivers/gpio/TODO +++ b/drivers/gpio/TODO @@ -186,3 +186,37 @@ their hardware offsets within the chip. Encourage users to switch to using them and eventually remove the existing global export/unexport attribues. + +------------------------------------------------------------------------------- + +Remove GPIOD_FLAGS_BIT_NONEXCLUSIVE + +GPIOs in the linux kernel are meant to be an exclusive resource. This means +that the GPIO descriptors (the software representation of the hardware concept) +are not reference counted and - in general - only one user at a time can +request a GPIO line and control its settings. The consumer API is designed +around full control of the line's state as evidenced by the fact that, for +instance, gpiod_set_value() does indeed drive the line as requested, instead +of bumping an enable counter of some sort. + +A problematic use-case for GPIOs is when two consumers want to use the same +descriptor independently. An example of such a user is the regulator subsystem +which may instantiate several struct regulator_dev instances containing +a struct device but using the same enable GPIO line. + +A workaround was introduced in the form of the GPIOD_FLAGS_BIT_NONEXCLUSIVE +flag but its implementation is problematic: it does not provide any +synchronization of usage nor did it introduce any enable count meaning the +non-exclusive users of the same descriptor will in fact "fight" for the +control over it. This flag should be removed and replaced with a better +solution, possibly based on the new power sequencing subsystem. + +------------------------------------------------------------------------------- + +Remove devm_gpiod_unhinge() + +devm_gpiod_unhinge() is provided as a way to transfer the ownership of managed +enable GPIOs to the regulator core. Rather than doing that however, we should +make it possible for the regulator subsystem to deal with GPIO resources the +lifetime of which it doesn't control as logically, a GPIO obtained by a caller +should also be freed by it. diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 0cd4c36ae8aaf0..5415175364899e 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -410,7 +410,9 @@ static int mpc8xxx_probe(struct platform_device *pdev) goto err; } - device_init_wakeup(dev, true); + ret = devm_device_init_wakeup(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to init wakeup\n"); return 0; err: diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 442435ded020ac..13cc120cf11f14 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1204,6 +1204,8 @@ static int pca953x_restore_context(struct pca953x_chip *chip) guard(mutex)(&chip->i2c_lock); + if (chip->client->irq > 0) + enable_irq(chip->client->irq); regcache_cache_only(chip->regmap, false); regcache_mark_dirty(chip->regmap); ret = pca953x_regcache_sync(chip); @@ -1216,6 +1218,10 @@ static int pca953x_restore_context(struct pca953x_chip *chip) static void pca953x_save_context(struct pca953x_chip *chip) { guard(mutex)(&chip->i2c_lock); + + /* Disable IRQ to prevent early triggering while regmap "cache only" is on */ + if (chip->client->irq > 0) + disable_irq(chip->client->irq); regcache_cache_only(chip->regmap, true); } diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 6895b65c86aff5..d27bfac6c9f53d 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -823,6 +823,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev) struct gpio_irq_chip *irq; struct tegra_gpio *gpio; struct device_node *np; + struct resource *res; char **names; int err; @@ -842,19 +843,19 @@ static int tegra186_gpio_probe(struct platform_device *pdev) gpio->num_banks++; /* get register apertures */ - gpio->secure = devm_platform_ioremap_resource_byname(pdev, "security"); - if (IS_ERR(gpio->secure)) { - gpio->secure = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(gpio->secure)) - return PTR_ERR(gpio->secure); - } - - gpio->base = devm_platform_ioremap_resource_byname(pdev, "gpio"); - if (IS_ERR(gpio->base)) { - gpio->base = devm_platform_ioremap_resource(pdev, 1); - if (IS_ERR(gpio->base)) - return PTR_ERR(gpio->base); - } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "security"); + if (!res) + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + gpio->secure = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(gpio->secure)) + return PTR_ERR(gpio->secure); + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "gpio"); + if (!res) + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + gpio->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(gpio->base)) + return PTR_ERR(gpio->base); err = platform_irq_count(pdev); if (err < 0) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 13407fd4f0ebe8..eab6726953b411 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -401,10 +401,15 @@ static ssize_t gpio_virtuser_direction_do_write(struct file *file, char buf[32], *trimmed; int ret, dir, val = 0; - ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (count >= sizeof(buf)) + return -EINVAL; + + ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); if (ret < 0) return ret; + buf[ret] = '\0'; + trimmed = strim(buf); if (strcmp(trimmed, "input") == 0) { @@ -623,12 +628,15 @@ static ssize_t gpio_virtuser_consumer_write(struct file *file, char buf[GPIO_VIRTUSER_NAME_BUF_LEN + 2]; int ret; + if (count >= sizeof(buf)) + return -EINVAL; + ret = simple_write_to_buffer(buf, GPIO_VIRTUSER_NAME_BUF_LEN, ppos, user_buf, count); if (ret < 0) return ret; - buf[strlen(buf) - 1] = '\0'; + buf[ret] = '\0'; ret = gpiod_set_consumer_name(data->ad.desc, buf); if (ret) diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index be81fa2b17abc6..3dae63f3ea2177 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -1011,6 +1011,7 @@ static void zynq_gpio_remove(struct platform_device *pdev) ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) dev_warn(&pdev->dev, "pm_runtime_get_sync() Failed\n"); + device_init_wakeup(&pdev->dev, 0); gpiochip_remove(&gpio->chip); device_set_wakeup_capable(&pdev->dev, 0); pm_runtime_disable(&pdev->dev); diff --git a/drivers/gpio/gpiolib-devres.c b/drivers/gpio/gpiolib-devres.c index 08205f355cebe5..120d1ec5af3bd6 100644 --- a/drivers/gpio/gpiolib-devres.c +++ b/drivers/gpio/gpiolib-devres.c @@ -317,11 +317,15 @@ EXPORT_SYMBOL_GPL(devm_gpiod_put); * @dev: GPIO consumer * @desc: GPIO descriptor to remove resource management from * + * *DEPRECATED* + * This function should not be used. It's been provided as a workaround for + * resource ownership issues in the regulator framework and should be replaced + * with a better solution. + * * Remove resource management from a GPIO descriptor. This is needed when * you want to hand over lifecycle management of a descriptor to another * mechanism. */ - void devm_gpiod_unhinge(struct device *dev, struct gpio_desc *desc) { int ret; diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index eb667f8f1ead4f..65f6a7177b78ef 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -193,6 +193,8 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np, */ { "himax,hx8357", "gpios-reset", false }, { "himax,hx8369", "gpios-reset", false }, +#endif +#if IS_ENABLED(CONFIG_MTD_NAND_JZ4780) /* * The rb-gpios semantics was undocumented and qi,lb60 (along with * the ingenic driver) got it wrong. The active state encodes the @@ -266,6 +268,9 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np, { "fsl,imx8qm-fec", "phy-reset-gpios", "phy-reset-active-high" }, { "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" }, #endif +#if IS_ENABLED(CONFIG_MMC_ATMELMCI) + { "atmel,hsmci", "cd-gpios", "cd-inverted" }, +#endif #if IS_ENABLED(CONFIG_PCI_IMX6) { "fsl,imx6q-pcie", "reset-gpio", "reset-gpio-active-high" }, { "fsl,imx6sx-pcie", "reset-gpio", "reset-gpio-active-high" }, @@ -291,9 +296,6 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np, #if IS_ENABLED(CONFIG_REGULATOR_GPIO) { "regulator-gpio", "enable-gpio", "enable-active-high" }, { "regulator-gpio", "enable-gpios", "enable-active-high" }, -#endif -#if IS_ENABLED(CONFIG_MMC_ATMELMCI) - { "atmel,hsmci", "cd-gpios", "cd-inverted" }, #endif }; unsigned int i; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index b8197502a5ac59..113c5d90f2df46 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -742,6 +742,12 @@ EXPORT_SYMBOL_GPL(gpiochip_query_valid_mask); bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset) { + /* + * hog pins are requested before registering GPIO chip + */ + if (!gc->gpiodev) + return true; + /* No mask means all valid */ if (likely(!gc->gpiodev->valid_mask)) return true; @@ -2879,7 +2885,7 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) * output-only, but if there is then not even a .set() operation it * is pretty tricky to drive the output line. */ - if (!guard.gc->set && !guard.gc->direction_output) { + if (!guard.gc->set && !guard.gc->set_rv && !guard.gc->direction_output) { gpiod_warn(desc, "%s: missing set() and direction_output() operations\n", __func__); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2cba2b6ebe1c11..f01925ed8176b5 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -188,7 +188,7 @@ config DRM_DEBUG_DP_MST_TOPOLOGY_REFS bool "Enable refcount backtrace history in the DP MST helpers" depends on STACKTRACE_SUPPORT select STACKDEPOT - depends on DRM_KMS_HELPER + select DRM_KMS_HELPER depends on DEBUG_KERNEL depends on EXPERT help diff --git a/drivers/gpu/drm/Kconfig.debug b/drivers/gpu/drm/Kconfig.debug new file mode 100644 index 00000000000000..c493743e8aca0e --- /dev/null +++ b/drivers/gpu/drm/Kconfig.debug @@ -0,0 +1,116 @@ +config DRM_USE_DYNAMIC_DEBUG + bool "use dynamic debug to implement drm.debug" + default n + depends on BROKEN + depends on DRM + depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE + depends on JUMP_LABEL + help + Use dynamic-debug to avoid drm_debug_enabled() runtime overheads. + Due to callsite counts in DRM drivers (~4k in amdgpu) and 56 + bytes per callsite, the .data costs can be substantial, and + are therefore configurable. + +config DRM_WERROR + bool "Compile the drm subsystem with warnings as errors" + depends on DRM && EXPERT + depends on !WERROR + default n + help + A kernel build should not cause any compiler warnings, and this + enables the '-Werror' flag to enforce that rule in the drm subsystem. + + The drm subsystem enables more warnings than the kernel default, so + this config option is disabled by default. + + If in doubt, say N. + +config DRM_HEADER_TEST + bool "Ensure DRM headers are self-contained and pass kernel-doc" + depends on DRM && EXPERT + default n + help + Ensure the DRM subsystem headers both under drivers/gpu/drm and + include/drm compile, are self-contained, have header guards, and have + no kernel-doc warnings. + + If in doubt, say N. + +config DRM_DEBUG_MM + bool "Insert extra checks and debug info into the DRM range managers" + default n + depends on DRM + depends on STACKTRACE_SUPPORT + select STACKDEPOT + help + Enable allocation tracking of memory manager and leak detection on + shutdown. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_KUNIT_TEST_HELPERS + tristate + depends on DRM && KUNIT + select DRM_KMS_HELPER + help + KUnit Helpers for KMS drivers. + +config DRM_KUNIT_TEST + tristate "KUnit tests for DRM" if !KUNIT_ALL_TESTS + depends on DRM && KUNIT && MMU + select DRM_BRIDGE_CONNECTOR + select DRM_BUDDY + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HDMI_STATE_HELPER + select DRM_DISPLAY_HELPER + select DRM_EXEC + select DRM_EXPORT_FOR_TESTS if m + select DRM_GEM_SHMEM_HELPER + select DRM_KUNIT_TEST_HELPERS + select DRM_LIB_RANDOM + select PRIME_NUMBERS + default KUNIT_ALL_TESTS + help + This builds unit tests for DRM. This option is not useful for + distributions or general kernels, but only for kernel + developers working on DRM and associated drivers. + + For more information on KUnit and unit tests in general, + please refer to the KUnit documentation in + Documentation/dev-tools/kunit/. + + If in doubt, say "N". + +config DRM_TTM_KUNIT_TEST + tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS + default n + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) + select DRM_TTM + select DRM_BUDDY + select DRM_EXPORT_FOR_TESTS if m + select DRM_KUNIT_TEST_HELPERS + default KUNIT_ALL_TESTS + help + Enables unit tests for TTM, a GPU memory manager subsystem used + to manage memory buffers. This option is mostly useful for kernel + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. + + If in doubt, say "N". + +config DRM_SCHED_KUNIT_TEST + tristate "KUnit tests for the DRM scheduler" if !KUNIT_ALL_TESTS + select DRM_SCHED + depends on DRM && KUNIT + default KUNIT_ALL_TESTS + help + Choose this option to build unit tests for the DRM scheduler. + + Recommended for driver developers only. + + If in doubt, say "N". + +config DRM_EXPORT_FOR_TESTS + bool diff --git a/drivers/gpu/drm/adp/adp_drv.c b/drivers/gpu/drm/adp/adp_drv.c index c98c647f981d53..54cde090c3f42a 100644 --- a/drivers/gpu/drm/adp/adp_drv.c +++ b/drivers/gpu/drm/adp/adp_drv.c @@ -121,7 +121,6 @@ struct adp_drv_private { dma_addr_t mask_iova; int be_irq; int fe_irq; - spinlock_t irq_lock; struct drm_pending_vblank_event *event; }; @@ -288,6 +287,7 @@ static void adp_crtc_atomic_enable(struct drm_crtc *crtc, writel(BIT(0), adp->be + ADBE_BLEND_EN3); writel(BIT(0), adp->be + ADBE_BLEND_BYPASS); writel(BIT(0), adp->be + ADBE_BLEND_EN4); + drm_crtc_vblank_on(crtc); } static void adp_crtc_atomic_disable(struct drm_crtc *crtc, @@ -310,6 +310,7 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { u32 frame_num = 1; + unsigned long flags; struct adp_drv_private *adp = crtc_to_adp(crtc); struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state, crtc); u64 new_size = ALIGN(new_state->mode.hdisplay * @@ -330,13 +331,19 @@ static void adp_crtc_atomic_flush(struct drm_crtc *crtc, } writel(ADBE_FIFO_SYNC | frame_num, adp->be + ADBE_FIFO); //FIXME: use adbe flush interrupt - spin_lock_irq(&crtc->dev->event_lock); if (crtc->state->event) { - drm_crtc_vblank_get(crtc); - adp->event = crtc->state->event; + struct drm_pending_vblank_event *event = crtc->state->event; + + crtc->state->event = NULL; + spin_lock_irqsave(&crtc->dev->event_lock, flags); + + if (drm_crtc_vblank_get(crtc) != 0) + drm_crtc_send_vblank_event(crtc, event); + else + adp->event = event; + + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } - crtc->state->event = NULL; - spin_unlock_irq(&crtc->dev->event_lock); } static const struct drm_crtc_funcs adp_crtc_funcs = { @@ -482,8 +489,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) u32 int_status; u32 int_ctl; - spin_lock(&adp->irq_lock); - int_status = readl(adp->fe + ADP_INT_STATUS); if (int_status & ADP_INT_STATUS_VBLANK) { drm_crtc_handle_vblank(&adp->crtc); @@ -501,7 +506,6 @@ static irqreturn_t adp_fe_irq(int irq, void *arg) writel(int_status, adp->fe + ADP_INT_STATUS); - spin_unlock(&adp->irq_lock); return IRQ_HANDLED; } @@ -512,8 +516,7 @@ static int adp_drm_bind(struct device *dev) struct adp_drv_private *adp = to_adp(drm); int err; - adp_disable_vblank(adp); - writel(ADP_CTRL_FIFO_ON | ADP_CTRL_VBLANK_ON, adp->fe + ADP_CTRL); + writel(ADP_CTRL_FIFO_ON, adp->fe + ADP_CTRL); adp->next_bridge = drmm_of_get_bridge(&adp->drm, dev->of_node, 0, 0); if (IS_ERR(adp->next_bridge)) { @@ -567,8 +570,6 @@ static int adp_probe(struct platform_device *pdev) if (IS_ERR(adp)) return PTR_ERR(adp); - spin_lock_init(&adp->irq_lock); - dev_set_drvdata(&pdev->dev, &adp->drm); err = adp_parse_of(pdev, adp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6d83ccfa42eeb0..ce1072fe492187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -161,6 +161,7 @@ struct amdgpu_watchdog_timer { */ extern int amdgpu_modeset; extern unsigned int amdgpu_vram_limit; +extern int amdgpu_ignore_min_pcap; extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; extern int amdgpu_gtt_size; @@ -353,7 +354,6 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; -#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 @@ -1124,6 +1124,7 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + suspend_state_t last_suspend_state; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; @@ -1614,11 +1615,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); -void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } -static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b7f8f2ff143dd1..707e131f89d237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } -/** - * amdgpu_choose_low_power_state - * - * @adev: amdgpu_device_pointer - * - * Choose the target low power state for the GPU - */ -void amdgpu_choose_low_power_state(struct amdgpu_device *adev) -{ - if (adev->in_runpm) - return; - - if (amdgpu_acpi_is_s0ix_active(adev)) - adev->in_s0ix = true; - else if (amdgpu_acpi_is_s3_active(adev)) - adev->in_s3 = true; -} - #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82df06a72ee025..e18e180bf32c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1108,7 +1108,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) { for (i = 0; i < p->gang_size; ++i) { struct drm_sched_entity *entity = p->entities[i]; - struct drm_gpu_scheduler *sched = entity->rq->sched; + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); struct amdgpu_ring *ring = to_amdgpu_ring(sched); if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) @@ -1236,7 +1237,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - sched = p->gang_leader->base.entity->rq->sched; + sched = container_of(p->gang_leader->base.entity->rq, typeof(*sched), + rq); while ((fence = amdgpu_sync_get_fence(&p->sync))) { struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index cfdf558b48b648..02138aa557935e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a30111d2c3ea0e..f8b3e04d71eda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3510,6 +3510,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); amdgpu_seq64_fini(adev); + amdgpu_doorbell_fini(adev); } if (adev->ip_blocks[i].version->funcs->sw_fini) { r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); @@ -3643,6 +3644,13 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) adev, adev->ip_blocks[i].version->type)) continue; + /* Since we skip suspend for S0i3, we need to cancel the delayed + * idle work here as the suspend callback never gets called. + */ + if (adev->in_s0ix && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) + cancel_delayed_work_sync(&adev->gfx.idle_work); /* skip suspend of gfx/mes and psp for S0ix * gfx is in gfxoff state, so on resume it will exit gfxoff just * like at runtime. PSP is also part of the always on hardware @@ -4851,7 +4859,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); drm_dev_exit(idx); } @@ -4900,28 +4907,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * @data: data * * This function is called when the system is about to suspend or hibernate. - * It is used to evict resources from the device before the system goes to - * sleep while there is still access to swap. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. */ static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, void *data) { struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); - int r; switch (mode) { case PM_HIBERNATION_PREPARE: adev->in_s4 = true; - fallthrough; - case PM_SUSPEND_PREPARE: - r = amdgpu_device_evict_resources(adev); - /* - * This is considered non-fatal at this time because - * amdgpu_device_prepare() will also fatally evict resources. - * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 - */ - if (r) - drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; break; } @@ -4942,15 +4941,13 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; flush_delayed_work(&adev->gfx.gfx_off_delay_work); @@ -4961,15 +4958,10 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) - goto unprepare; + return r; } return 0; - -unprepare: - adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; - - return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index dc2713ec95a5bd..9e738fae2b74f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -120,6 +120,8 @@ MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin"); #define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 9f627caedc3f61..44e120f9f76497 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -43,6 +43,29 @@ #include #include +static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops; + +/** + * dma_buf_attach_adev - Helper to get adev of an attachment + * + * @attach: attachment + * + * Returns: + * A struct amdgpu_device * if the attaching device is an amdgpu device or + * partition, NULL otherwise. + */ +static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach) +{ + if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) { + struct drm_gem_object *obj = attach->importer_priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + return amdgpu_ttm_adev(bo->tbo.bdev); + } + + return NULL; +} + /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation * @@ -54,11 +77,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { + struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && + pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; amdgpu_vm_bo_update_shared(bo); @@ -75,11 +100,35 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, */ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { - struct drm_gem_object *obj = attach->dmabuf->priv; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct dma_buf *dmabuf = attach->dmabuf; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv); + u32 domains = bo->allowed_domains; + + dma_resv_assert_held(dmabuf->resv); + + /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP + * support if all attachments can do P2P. If any attachment can't do + * P2P just pin into GTT instead. + * + * To avoid with conflicting pinnings between GPUs and RDMA when move + * notifiers are disabled, only allow pinning in VRAM when move + * notiers are enabled. + */ + if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) { + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } else { + list_for_each_entry(attach, &dmabuf->attachments, node) + if (!attach->peer2peer) + domains &= ~AMDGPU_GEM_DOMAIN_VRAM; + } - /* pin buffer into GTT */ - return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (domains & AMDGPU_GEM_DOMAIN_VRAM) + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + if (WARN_ON(!domains)) + return -EINVAL; + + return amdgpu_bo_pin(bo, domains); } /** @@ -134,9 +183,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return ERR_PTR(r); - - } else if (bo->tbo.resource->mem_type != TTM_PL_TT) { - return ERR_PTR(-EBUSY); } switch (bo->tbo.resource->mem_type) { @@ -153,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, break; case TTM_PL_VRAM: + /* XGMI-accessible memory should never be DMA-mapped */ + if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible( + dma_buf_attach_adev(attach), bo))) + return ERR_PTR(-EINVAL); + r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, bo->tbo.base.size, attach->dev, dir, &sgt); @@ -184,7 +235,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - if (sgt->sgl->page_link) { + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); @@ -459,6 +510,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct drm_gem_object *obj = &bo->tbo.base; struct drm_gem_object *gobj; + if (!adev) + return false; + if (obj->import_attach) { struct dma_buf *dma_buf = obj->import_attach->dmabuf; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 26bf896f1444ed..6ef3fedc123321 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -143,6 +143,7 @@ enum AMDGPU_DEBUG_MASK { }; unsigned int amdgpu_vram_limit = UINT_MAX; +int amdgpu_ignore_min_pcap = 0; /* do not ignore by default */ int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ int amdgpu_gtt_size = -1; /* auto */ @@ -262,6 +263,15 @@ struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { .period = 0x0, /* default to 0x0 (timeout disable) */ }; +/** + * DOC: ignore_min_pcap (int) + * Ignore the minimum power cap. + * Useful on graphics cards where the minimum power cap is very high. + * The default is 0 (Do not ignore). + */ +MODULE_PARM_DESC(ignore_min_pcap, "Ignore the minimum power cap"); +module_param_named(ignore_min_pcap, amdgpu_ignore_min_pcap, int, 0600); + /** * DOC: vramlimit (int) * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM). @@ -2548,8 +2558,20 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) adev->in_s3 = true; - if (!adev->in_s0ix && !adev->in_s3) + if (!adev->in_s0ix && !adev->in_s3) { + /* don't allow going deep first time followed by s2idle the next time */ + if (adev->last_suspend_state != PM_SUSPEND_ON && + adev->last_suspend_state != pm_suspend_target_state) { + drm_err_once(drm_dev, "Unsupported suspend state %d\n", + pm_suspend_target_state); + return -EINVAL; + } return 0; + } + + /* cache the state last used for suspend */ + adev->last_suspend_state = pm_suspend_target_state; + return amdgpu_device_suspend(drm_dev, true); } @@ -2603,13 +2625,8 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; - - r = amdgpu_device_resume(drm_dev, true); - adev->in_s4 = false; - return r; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2622,9 +2639,6 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - - adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 72af5e5a894a29..cf2df7790077d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1438,9 +1438,11 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; struct drm_gpu_scheduler *sched = &ring->sched; struct drm_sched_entity entity; + static atomic_t counter; struct dma_fence *f; struct amdgpu_job *job; struct amdgpu_ib *ib; + void *owner; int i, r; /* Initialize the scheduler entity */ @@ -1451,9 +1453,15 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring) goto err; } - r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL, - 64, 0, - &job); + /* + * Use some unique dummy value as the owner to make sure we execute + * the cleaner shader on each submission. The value just need to change + * for each submission and is otherwise meaningless. + */ + owner = (void *)(unsigned long)atomic_inc_return(&counter); + + r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner, + 64, 0, &job); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 464625282872aa..ecb74ccf1d9081 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -699,12 +699,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst) { - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : - adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; unsigned int ndw; - int r; + int r, cnt = 0; uint32_t seq; /* @@ -761,10 +759,21 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq[inst].ring_lock); - if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) { + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) { dev_err(adev->dev, "timeout waiting for kiq fence\n"); r = -ETIME; - } + } else + r = 0; } error_unlock_reset: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index acb21fc8b3ce5d..e3d4f750373884 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -359,7 +359,9 @@ static struct dma_fence * amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity) { - struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); + struct drm_gpu_scheduler *sched = + container_of(s_entity->rq, typeof(*sched), rq); + struct amdgpu_ring *ring = to_amdgpu_ring(sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); struct dma_fence *fence; int r; @@ -459,25 +461,22 @@ drm_sched_entity_queue_pop(struct drm_sched_entity *entity) void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) { + struct drm_sched_rq *rq = &sched->rq; + struct drm_sched_entity *s_entity; struct drm_sched_job *s_job; - struct drm_sched_entity *s_entity = NULL; - int i; /* Signal all jobs not yet scheduled */ - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - struct drm_sched_rq *rq = sched->sched_rq[i]; - spin_lock(&rq->lock); - list_for_each_entry(s_entity, &rq->entities, list) { - while ((s_job = drm_sched_entity_queue_pop(s_entity))) { - struct drm_sched_fence *s_fence = s_job->s_fence; - - dma_fence_signal(&s_fence->scheduled); - dma_fence_set_error(&s_fence->finished, -EHWPOISON); - dma_fence_signal(&s_fence->finished); - } + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) { + while ((s_job = drm_sched_entity_queue_pop(s_entity))) { + struct drm_sched_fence *s_fence = s_job->s_fence; + + dma_fence_signal(&s_fence->scheduled); + dma_fence_set_error(&s_fence->finished, -EHWPOISON); + dma_fence_signal(&s_fence->finished); } - spin_unlock(&rq->lock); } + spin_unlock(&rq->lock); /* Signal all jobs already scheduled to HW */ list_for_each_entry(s_job, &sched->pending_list, list) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ce6b9ba967fff0..d6872baeba1e34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -85,7 +85,10 @@ struct amdgpu_job { static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job) { - return to_amdgpu_ring(job->base.entity->rq->sched); + struct drm_gpu_scheduler *sched = + container_of(job->base.entity->rq, typeof(*sched), rq); + + return to_amdgpu_ring(sched); } int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 80cd6f5273db3a..0b9987781f7622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -163,8 +163,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * When GTT is just an alternative to VRAM make sure that we * only use it as fallback and still try to fill up VRAM first. */ - if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && - !(adev->flags & AMD_IS_APU)) + if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) && + domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 11dd2e0f797964..197d20a37afb5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -145,6 +145,7 @@ TRACE_EVENT(amdgpu_cs, struct amdgpu_ib *ib), TP_ARGS(p, job, ib), TP_STRUCT__entry( + __field(struct drm_gpu_scheduler *, sched) __field(struct amdgpu_bo_list *, bo_list) __field(u32, ring) __field(u32, dw) @@ -152,11 +153,14 @@ TRACE_EVENT(amdgpu_cs, ), TP_fast_assign( + __entry->sched = container_of(job->base.entity->rq, + typeof(*__entry->sched), + rq); __entry->bo_list = p->bo_list; - __entry->ring = to_amdgpu_ring(job->base.entity->rq->sched)->idx; + __entry->ring = to_amdgpu_ring(__entry->sched)->idx; __entry->dw = ib->length_dw; __entry->fences = amdgpu_fence_count_emitted( - to_amdgpu_ring(job->base.entity->rq->sched)); + to_amdgpu_ring(__entry->sched)); ), TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", __entry->bo_list, __entry->ring, __entry->dw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index cdcdae7f71ce97..83adf81defc711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,7 +66,6 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 -#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 46d9fb433ab2a3..42f2bfb30af184 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -105,13 +105,13 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { + struct drm_gpu_scheduler *sched = + container_of(p->vm->delayed.rq, typeof(*sched), rq); + struct amdgpu_ring *ring = + container_of(sched, struct amdgpu_ring, sched); struct amdgpu_ib *ib = p->job->ibs; - struct amdgpu_ring *ring; struct dma_fence *f; - ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring, - sched); - WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 6da8994e0469af..2d7f82e98df92c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -24,6 +24,7 @@ #include #include +#include #include "amdgpu.h" #include "amdgpu_vm.h" @@ -907,6 +908,9 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) struct ttm_resource_manager *man = &mgr->manager; int err; + man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size); + if (IS_ERR(man->cg)) + return PTR_ERR(man->cg); ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 23b6f7a4aa4a11..ab132dae818371 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -420,15 +420,15 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev, void amdgpu_xcp_release_sched(struct amdgpu_device *adev, struct amdgpu_ctx_entity *entity) { - struct drm_gpu_scheduler *sched; - struct amdgpu_ring *ring; + struct drm_gpu_scheduler *sched = + container_of(entity->entity.rq, typeof(*sched), rq); if (!adev->xcp_mgr) return; - sched = entity->entity.rq->sched; if (drm_sched_wqueue_ready(sched)) { - ring = to_amdgpu_ring(entity->entity.rq->sched); + struct amdgpu_ring *ring = to_amdgpu_ring(sched); + atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt); } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a63ce747863f10..c68c2e2f4d61aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4800,7 +4800,7 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 101 && adev->gfx.pfp_fw_version >= 158 && - adev->gfx.mec_fw_version >= 152) { + adev->gfx.mec_fw_version >= 151) { adev->gfx.enable_cleaner_shader = true; r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); if (r) { @@ -6114,7 +6114,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -6192,7 +6192,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -6269,7 +6269,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -6644,7 +6644,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h index 5255378af53c0a..f67569ccf9f609 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = { 0xd70f6a01, 0x000202ff, 0x00000400, 0x80828102, 0xbf84fff7, 0xbefc03ff, - 0x00000068, 0xbe803080, - 0xbe813080, 0xbe823080, - 0xbe833080, 0x80fc847c, + 0x00000068, 0xbe803000, + 0xbe813000, 0xbe823000, + 0xbe833000, 0x80fc847c, 0xbf84fffa, 0xbeea0480, 0xbeec0480, 0xbeee0480, 0xbef00480, 0xbef20480, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm index 9ba3359253c95d..54f7ed9e2801c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -40,7 +40,6 @@ shader main type(CS) wave_size(32) // Note: original source code from SQ team - // // Create 32 waves in a threadgroup (CS waves) // Each allocates 64 VGPRs @@ -71,8 +70,8 @@ label_0005: s_sub_u32 s2, s2, 8 s_cbranch_scc0 label_0005 // - s_mov_b32 s2, 0x80000000 // Bit31 is first_wave - s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup // CLEAR LDS // @@ -99,10 +98,10 @@ label_001F: label_0023: s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) label_sgpr_loop: - s_movreld_b32 s0, 0 - s_movreld_b32 s1, 0 - s_movreld_b32 s2, 0 - s_movreld_b32 s3, 0 + s_movreld_b32 s0, s0 + s_movreld_b32 s1, s0 + s_movreld_b32 s2, s0 + s_movreld_b32 s3, s0 s_sub_u32 m0, m0, 4 s_cbranch_scc0 label_sgpr_loop diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d57db42f953600..2a5c2a1ae3c74f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -2428,7 +2428,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -2472,7 +2472,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -2517,7 +2517,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) } if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); @@ -3153,7 +3153,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -3371,7 +3371,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -4541,7 +4541,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index e7b58e47029271..62a257a4a3e9b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2324,7 +2324,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); @@ -2468,7 +2468,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); if (amdgpu_emu_mode == 1) - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); @@ -3426,7 +3426,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev) if (r) return r; - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 95d894a231fcfc..809b3a882d0d72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -969,7 +969,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index ad099f136f84eb..fec9a007533acc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -752,6 +752,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; + /* The mall_size is already calculated as mall_size_per_umc * num_umc. + * However, for gfx1151, which features a 2-to-1 UMC mapping, + * the result must be multiplied by 2 to determine the actual mall size. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 5, 1): + adev->gmc.mall_size *= 2; + break; + default: + break; + } + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): @@ -899,7 +911,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 05c026d0b0d96a..c6f290704d4731 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -297,7 +297,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, return; /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal @@ -881,7 +881,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev) return r; /* Flush HDP after it is initialized */ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 783e0c3b86b4c4..5effe8327d29fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2435,7 +2435,7 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block) adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index f1dc13b3ab38e6..cbbeadeb53f72d 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 43195c07974808..086a647308df07 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index fcb8dd2876bcc2..40940b4ab4007b 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index a88d25a06c29b5..6ccd31c8bc6928 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 49f7eb4fbd117d..2c9239a22f3986 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index e65916ada23b32..ef9538fbbf5371 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -894,6 +894,10 @@ static void mes_v11_0_get_fw_version(struct amdgpu_device *adev) { int pipe; + /* return early if we have already fetched these */ + if (adev->mes.sched_version && adev->mes.kiq_version) + return; + /* get MES scheduler/KIQ versions */ mutex_lock(&adev->srbm_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 183dd3346da576..e6ab617b9a4041 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1392,17 +1392,20 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, mes_v12_0_queue_init_register(ring); } - /* get MES scheduler/KIQ versions */ - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, 3, pipe, 0, 0); + if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) || + ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) { + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); - if (pipe == AMDGPU_MES_SCHED_PIPE) - adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) - adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 2ece3ae75ec125..bed5ef4d878892 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -360,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_BIF_LS; } -#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) +#define MMIO_REG_HOLE_OFFSET 0x44000 static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bb5dfc410a667f..215543575f477c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -533,7 +533,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index cc621064610f1d..afdf8ce3b4c59e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -610,7 +610,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 7c49c3f3c3881e..256288c6cd78ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -498,7 +498,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->hdp.funcs->flush_hdp(adev, NULL); + amdgpu_device_flush_hdp(adev, NULL); vfree(buf); drm_dev_exit(idx); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8e7a36f26e9cb3..b8d835c9e17eda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d716510b8dd686..3eec1b8feaeea4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 22ae1939476f0a..0b19f0ab4480da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -40,6 +40,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index c6f6392c1c20b6..1f777c125b00de 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_HARVEST_MMSCH 0 @@ -614,7 +615,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3e176b4b7c69dd..012f6ea928ec66 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index ba603b2246e2ee..f11df9c2ec1318 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) +#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000) #define VCN_HARVEST_MMSCH 0 @@ -1022,6 +1023,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL); + return 0; } @@ -1204,6 +1209,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + /* Keeping one read-back to ensure all register writes are done, otherwise + * it may introduce race conditions */ + RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index d99d05f42f1d93..b90da3d3e1406b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -533,7 +533,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 581d8629b9d956..e0e84ef7f56866 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -502,6 +502,52 @@ static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { } +/** + * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode + * + * @vinst: VCN instance + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) +{ + struct amdgpu_device *adev = vinst->adev; + uint32_t reg_data = 0; + int vcn_inst; + + vcn_inst = GET_INST(VCN, vinst->inst); + + /* pause/unpause if state is changed */ + if (vinst->pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n", + vinst->pause_state.fw_based, new_state->fw_based, + new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE"); + reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } else { + /* unpause DPG, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data); + } + vinst->pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + + /** * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode * @@ -518,6 +564,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE}; int vcn_inst; uint32_t tmp; @@ -582,6 +629,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* Pause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); @@ -775,9 +825,13 @@ static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; vcn_inst = GET_INST(VCN, inst_idx); + /* Unpause dpg */ + vcn_v5_0_1_pause_dpg_mode(vinst, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 86d8bc10d90ab2..9b3510e5311275 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -239,6 +239,13 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = .max_pixels_per_frame = 4096 * 4096, .max_level = 186, }, + { + .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, + .max_width = 4096, + .max_height = 4096, + .max_pixels_per_frame = 4096 * 4096, + .max_level = 0, + }, }; static const struct amdgpu_video_codecs cz_video_codecs_decode = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e477d7509646aa..9bbee484d57cc4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1983,9 +1983,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (kfd_dbg_has_ttmps_always_setup(dev->gpu)) dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID; - if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) - dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; - if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) { if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) || KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4)) @@ -2001,7 +1998,11 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; - dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; + if (!amdgpu_sriov_vf(dev->gpu->adev)) + dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; + + if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) + dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index abd3b6564373a4..46937e6fa78d43 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -56,4 +56,10 @@ config DRM_AMD_SECURE_DISPLAY This option enables the calculation of crc of specific region via debugfs. Cooperate with specific DMCU FW. +config AMD_PRIVATE_COLOR + bool "Enable KMS color management by AMD for AMD" + default n + help + This option extends the KMS color management API with AMD driver-specific properties to enhance the color management support on AMD Steam Deck. + endmenu diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d0d8ad5368c3f3..37b14f31cc4679 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -372,6 +372,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { + if (new_state->stream->adjust.timing_adjust_pending) + return true; if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED) return true; else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state)) @@ -1722,6 +1724,13 @@ static const struct dmi_system_id dmi_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 645 14 inch G11 Notebook PC"), + }, + }, { .callback = edp0_on_dp1_callback, .matches = { @@ -1729,6 +1738,20 @@ static const struct dmi_system_id dmi_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 445 14 inch G11 Notebook PC"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP ProBook 465 16 inch G11 Notebook PC"), + }, + }, {} /* TODO: refactor this from a fixed table to a dynamic option */ }; @@ -1899,26 +1922,6 @@ static enum dmub_ips_disable_type dm_get_default_ips_mode( switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(3, 5, 0): case IP_VERSION(3, 6, 0): - /* - * On DCN35 systems with Z8 enabled, it's possible for IPS2 + Z8 to - * cause a hard hang. A fix exists for newer PMFW. - * - * As a workaround, for non-fixed PMFW, force IPS1+RCG as the deepest - * IPS state in all cases, except for s0ix and all displays off (DPMS), - * where IPS2 is allowed. - * - * When checking pmfw version, use the major and minor only. - */ - if ((adev->pm.fw_version & 0x00FFFF00) < 0x005D6300) - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - else if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(11, 5, 0)) - /* - * Other ASICs with DCN35 that have residency issues with - * IPS2 in idle. - * We want them to use IPS2 only in display off cases. - */ - ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; - break; case IP_VERSION(3, 5, 1): ret = DMUB_IPS_RCG_IN_ACTIVE_IPS2_IN_OFF; break; @@ -3334,16 +3337,16 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, for (k = 0; k < dc_state->stream_count; k++) { bundle->stream_update.stream = dc_state->streams[k]; - for (m = 0; m < dc_state->stream_status->plane_count; m++) { + for (m = 0; m < dc_state->stream_status[k].plane_count; m++) { bundle->surface_updates[m].surface = - dc_state->stream_status->plane_states[m]; + dc_state->stream_status[k].plane_states[m]; bundle->surface_updates[m].surface->force_full_update = true; } update_planes_and_stream_adapter(dm->dc, UPDATE_TYPE_FULL, - dc_state->stream_status->plane_count, + dc_state->stream_status[k].plane_count, dc_state->streams[k], &bundle->stream_update, bundle->surface_updates); @@ -3460,11 +3463,6 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) return 0; } - - /* leave display off for S4 sequence */ - if (adev->in_s4) - return 0; - /* Recreate dc_state - DC invalidates it when setting power state to S3. */ dc_state_release(dm_state->context); dm_state->context = dc_state_create(dm->dc, NULL); @@ -4722,7 +4720,7 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return r; } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR if (amdgpu_dm_create_color_properties(adev)) { dc_state_release(state->context); kfree(state); @@ -6500,12 +6498,12 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, const struct drm_display_mode *native_mode, bool scale_enabled) { - if (scale_enabled) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); - } else if (native_mode->clock == drm_mode->clock && - native_mode->htotal == drm_mode->htotal && - native_mode->vtotal == drm_mode->vtotal) { - copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); + if (scale_enabled || ( + native_mode->clock == drm_mode->clock && + native_mode->htotal == drm_mode->htotal && + native_mode->vtotal == drm_mode->vtotal)) { + if (native_mode->crtc_clock) + copy_crtc_timing_for_drm_display_mode(native_mode, drm_mode); } else { /* no scaling nor amdgpu inserted, no need to patch */ } @@ -11022,6 +11020,9 @@ static bool should_reset_plane(struct drm_atomic_state *state, state->allow_modeset) return true; + if (amdgpu_in_reset(adev) && state->allow_modeset) + return true; + /* Exit early if we know that we're adding or removing the plane. */ if (old_plane_state->crtc != new_plane_state->crtc) return true; @@ -12739,7 +12740,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * Transient states before tunneling is enabled could * lead to this error. We can ignore this for now. */ - if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); @@ -12748,22 +12749,15 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( goto out; } + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF; + if (adev->dm.dmub_notify->aux_reply.command & 0xF0) + /* The reply is stored in the top nibble of the command. */ + payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; - if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { - - if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", - p_notify->aux_reply.length, - payload->address, payload->length); - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - goto out; - } - + /*write req may receive a byte indicating partially written number as well*/ + if (p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); - } /* success */ ret = p_notify->aux_reply.length; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index ebabfe3a512f49..4d3ebcaacca1ba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -97,7 +97,7 @@ static inline struct fixed31_32 amdgpu_dm_fixpt_from_s3132(__u64 x) return val; } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR /* Pre-defined Transfer Functions (TF) * * AMD driver supports pre-defined mathematical functions for transferring diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 36a830a7440f10..b4aafe6103bc47 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -113,6 +113,7 @@ bool amdgpu_dm_crtc_vrr_active(const struct dm_crtc_state *dm_state) * * Panel Replay and PSR SU * - Enable when: + * - VRR is disabled * - vblank counter is disabled * - entry is allowed: usermode demonstrates an adequate number of fast * commits) @@ -131,19 +132,20 @@ static void amdgpu_dm_crtc_set_panel_sr_feature( bool is_sr_active = (link->replay_settings.replay_allow_active || link->psr_settings.psr_allow_active); bool is_crc_window_active = false; + bool vrr_active = amdgpu_dm_crtc_vrr_active_irq(vblank_work->acrtc); #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY is_crc_window_active = amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base); #endif - if (link->replay_settings.replay_feature_enabled && + if (link->replay_settings.replay_feature_enabled && !vrr_active && allow_sr_entry && !is_sr_active && !is_crc_window_active) { amdgpu_dm_replay_enable(vblank_work->stream, true); } else if (vblank_enabled) { if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && is_sr_active) amdgpu_dm_psr_disable(vblank_work->stream, false); - } else if (link->psr_settings.psr_feature_enabled && + } else if (link->psr_settings.psr_feature_enabled && !vrr_active && allow_sr_entry && !is_sr_active && !is_crc_window_active) { struct amdgpu_dm_connector *aconn = @@ -244,6 +246,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) struct vblank_control_work *vblank_work = container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; + struct amdgpu_device *adev = drm_to_adev(dm->ddev); + int r; mutex_lock(&dm->dc_lock); @@ -271,8 +275,15 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->acrtc->dm_irq_params.allow_sr_entry); } - if (dm->active_vblank_irq_count == 0) + if (dm->active_vblank_irq_count == 0) { + r = amdgpu_dpm_pause_power_profile(adev, true); + if (r) + dev_warn(adev->dev, "failed to set default power profile mode\n"); dc_allow_idle_optimizations(dm->dc, true); + r = amdgpu_dpm_pause_power_profile(adev, false); + if (r) + dev_warn(adev->dev, "failed to restore the power profile mode\n"); + } mutex_unlock(&dm->dc_lock); @@ -470,7 +481,7 @@ static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) } #endif -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR /** * dm_crtc_additional_color_mgmt - enable additional color properties * @crtc: DRM CRTC @@ -552,7 +563,7 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { #if defined(CONFIG_DEBUG_FS) .late_register = amdgpu_dm_crtc_late_register, #endif -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR .atomic_set_property = amdgpu_dm_atomic_crtc_set_property, .atomic_get_property = amdgpu_dm_atomic_crtc_get_property, #endif @@ -731,7 +742,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR dm_crtc_additional_color_mgmt(&acrtc->base); #endif return 0; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 5198a079b46343..8f22ad9665430a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -173,6 +173,9 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -220,7 +223,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, unsigned int conn_index = aconnector->base.index; guard(mutex)(&hdcp_w->mutex); - hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and * we'd expect the Content Protection (CP) property changed back to @@ -236,7 +238,10 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, } mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); - + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } process_output(hdcp_w); } @@ -254,6 +259,10 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { hdcp_w->encryption_status[conn_index] = MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF; + if (hdcp_w->aconnector[conn_index]) { + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = NULL; + } } process_output(hdcp_w); @@ -488,6 +497,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) struct hdcp_workqueue *hdcp_work = handle; struct amdgpu_dm_connector *aconnector = config->dm_stream_ctx; int link_index = aconnector->dc_link->link_index; + unsigned int conn_index = aconnector->base.index; struct mod_hdcp_display *display = &hdcp_work[link_index].display; struct mod_hdcp_link *link = &hdcp_work[link_index].link; struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; @@ -544,7 +554,10 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); - + drm_connector_get(&aconnector->base); + if (hdcp_w->aconnector[conn_index]) + drm_connector_put(&hdcp_w->aconnector[conn_index]->base); + hdcp_w->aconnector[conn_index] = aconnector; process_output(hdcp_w); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 2cd35392e2da7e..1395a748d726c4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -918,7 +918,7 @@ dm_helpers_probe_acpi_edid(void *data, u8 *buf, unsigned int block, size_t len) { struct drm_connector *connector = data; struct acpi_device *acpidev = ACPI_COMPANION(connector->dev->dev); - unsigned char start = block * EDID_LENGTH; + unsigned short start = block * EDID_LENGTH; struct edid *edid; int r; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7ceedf626d23fe..5cdbc86ef8f5a9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -51,6 +51,9 @@ #define PEAK_FACTOR_X1000 1006 +/* + * This function handles both native AUX and I2C-Over-AUX transactions. + */ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -59,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum aux_return_code_type operation_result; struct amdgpu_device *adev; struct ddc_service *ddc; + uint8_t copy[16]; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -74,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, (msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0; payload.defer_delay = 0; + if (payload.write) { + memcpy(copy, msg->buffer, msg->size); + payload.data = copy; + } + result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); @@ -87,15 +96,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (adev->dm.aux_hpd_discon_quirk) { if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && operation_result == AUX_RET_ERROR_HPD_DISCON) { - result = 0; + result = msg->size; operation_result = AUX_RET_SUCCESS; } } - if (payload.write && result >= 0) - result = msg->size; + /* + * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER + */ + if (payload.write && result >= 0) { + if (result) { + /*one byte indicating partially written bytes*/ + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = payload.data[0]; + } else if (!payload.reply[0]) + /*I2C_ACK|AUX_ACK*/ + result = msg->size; + } - if (result < 0) + if (result < 0) { switch (operation_result) { case AUX_RET_SUCCESS: break; @@ -114,6 +133,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } + drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + } + + if (payload.reply[0]) + drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + payload.reply[0]); + return result; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 3e0f45f1711c1b..f645cb7831a0ae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1601,7 +1601,7 @@ static void amdgpu_dm_plane_drm_plane_destroy_state(struct drm_plane *plane, drm_atomic_helper_plane_destroy_state(plane, state); } -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR static void dm_atomic_plane_attach_color_mgmt_properties(struct amdgpu_display_manager *dm, struct drm_plane *plane) @@ -1792,7 +1792,7 @@ static const struct drm_plane_funcs dm_plane_funcs = { .atomic_duplicate_state = amdgpu_dm_plane_drm_plane_duplicate_state, .atomic_destroy_state = amdgpu_dm_plane_drm_plane_destroy_state, .format_mod_supported = amdgpu_dm_plane_format_mod_supported, -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR .atomic_set_property = dm_atomic_plane_set_property, .atomic_get_property = dm_atomic_plane_get_property, #endif @@ -1888,7 +1888,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, else drm_plane_helper_add(plane, &dm_plane_helper_funcs); -#ifdef AMD_PRIVATE_COLOR +#ifdef CONFIG_AMD_PRIVATE_COLOR dm_atomic_plane_attach_color_mgmt_properties(dm, plane); #endif /* Create (reset) the plane state */ diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 88d3f9d7dd556a..452206b5095eb0 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -51,8 +51,6 @@ static inline unsigned long long complete_integer_division_u64( { unsigned long long result; - ASSERT(divisor); - result = div64_u64_rem(dividend, divisor, remainder); return result; @@ -213,9 +211,6 @@ struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) * @note * Good idea to use Newton's method */ - - ASSERT(arg.value); - return dc_fixpt_from_fraction( dc_fixpt_one.value, arg.value); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 28d1353f403dfb..ba4ce8a63158bb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -439,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, * Don't adjust DRR while there's bandwidth optimizations pending to * avoid conflicting with firmware updates. */ - if (dc->ctx->dce_version > DCE_VERSION_MAX) - if (dc->optimized_required || dc->wm_optimized_required) + if (dc->ctx->dce_version > DCE_VERSION_MAX) { + if (dc->optimized_required || dc->wm_optimized_required) { + stream->adjust.timing_adjust_pending = true; return false; + } + } dc_exit_ips_for_hw_access(dc); @@ -3168,7 +3171,8 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->crtc_timing_adjust) { if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || - stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max) + stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max || + stream->adjust.timing_adjust_pending) update->crtc_timing_adjust->timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; update->crtc_timing_adjust->timing_adjust_pending = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 0c8ec30ea67268..731fbd4bc600b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -910,7 +910,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm } //TODO : Could be possibly moved to a common helper layer. -static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id) +static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id) { int i, j; @@ -918,10 +918,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str return false; for (i = 0; i < context->stream_count; i++) { - for (j = 0; j < context->stream_status[i].plane_count; j++) { - if (context->stream_status[i].plane_states[j] == plane) { - *plane_id = (i << 16) | j; - return true; + if (context->streams[i]->stream_id == stream_id) { + for (j = 0; j < context->stream_status[i].plane_count; j++) { + if (context->stream_status[i].plane_states[j] == plane) { + *plane_id = (i << 16) | j; + return true; + } } } } @@ -944,14 +946,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d return location; } -static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, +static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id, const struct dc_plane_state *plane, const struct dc_state *context) { unsigned int plane_id; int i = 0; int location = -1; - if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) { + if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) { ASSERT(false); return -1; } @@ -1037,7 +1039,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; } else { for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) { - disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context); + disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context); if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; @@ -1048,7 +1050,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location; - if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) + if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location])) dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true; /* apply forced pstate policy */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index be54f0e696ce28..ed6584535e898e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -2,6 +2,7 @@ // // Copyright 2024 Advanced Micro Devices, Inc. +#include #include "dml2_internal_types.h" #include "dml_top.h" @@ -13,11 +14,11 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) { - *dml_ctx = kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + *dml_ctx = vzalloc(sizeof(struct dml2_context)); if (!(*dml_ctx)) return false; - (*dml_ctx)->v21.dml_init.dml2_instance = kzalloc(sizeof(struct dml2_instance), GFP_KERNEL); + (*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance)); if (!((*dml_ctx)->v21.dml_init.dml2_instance)) return false; @@ -27,7 +28,7 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx) (*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config; (*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config; - (*dml_ctx)->v21.mode_programming.programming = kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL); + (*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming)); if (!((*dml_ctx)->v21.mode_programming.programming)) return false; @@ -86,6 +87,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co /* Store configuration options */ (*dml_ctx)->config = *config; + DC_FP_START(); + /*Initialize SOCBB and DCNIP params */ dml21_initialize_soc_bb_params(&(*dml_ctx)->v21.dml_init, config, in_dc); dml21_initialize_ip_params(&(*dml_ctx)->v21.dml_init, config, in_dc); @@ -96,6 +99,8 @@ static void dml21_init(const struct dc *in_dc, struct dml2_context **dml_ctx, co /*Initialize DML21 instance */ dml2_initialize_instance(&(*dml_ctx)->v21.dml_init); + + DC_FP_END(); } bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const struct dml2_configuration_options *config) @@ -111,8 +116,8 @@ bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const s void dml21_destroy(struct dml2_context *dml2) { - kfree(dml2->v21.dml_init.dml2_instance); - kfree(dml2->v21.mode_programming.programming); + vfree(dml2->v21.dml_init.dml2_instance); + vfree(dml2->v21.mode_programming.programming); } static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state, @@ -229,7 +234,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s if (!result) return false; + DC_FP_START(); result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); if (!result) return false; @@ -272,7 +279,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co mode_support->dml2_instance = dml_init->dml2_instance; dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); if (!is_supported) return false; @@ -284,10 +293,11 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml bool out = false; /* Use dml_validate_only for fast_validate path */ - if (fast_validate) { + if (fast_validate) out = dml21_check_mode_support(in_dc, context, dml_ctx); - } else + else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); + return out; } @@ -426,8 +436,12 @@ void dml21_copy(struct dml2_context *dst_dml_ctx, dst_dml_ctx->v21.mode_programming.programming = dst_dml2_programming; + DC_FP_START(); + /* need to initialize copied instance for internal references to be correct */ dml2_initialize_instance(&dst_dml_ctx->v21.dml_init); + + DC_FP_END(); } bool dml21_create_copy(struct dml2_context **dst_dml_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2061d43b92e1b9..ab6baf2698012c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -973,7 +973,9 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context, struct scaler_data *out) +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) { int i; struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; @@ -994,7 +996,7 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc } ASSERT(i < MAX_PIPES); - memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); + return &temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, @@ -1057,11 +1059,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out const struct dc_plane_state *in, struct dc_state *context, const struct soc_bounding_box_st *soc) { - struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); - if (!scaler_data) - return; - - get_scaler_data_for_plane(in, context, scaler_data); + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; @@ -1126,8 +1124,6 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->DynamicMetadataTransmittedBytes[location] = 0; out->NumberOfCursors[location] = 1; - - kfree(scaler_data); } static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 939ee0708bd23b..e89571874185ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -24,6 +24,8 @@ * */ +#include + #include "display_mode_core.h" #include "dml2_internal_types.h" #include "dml2_utils.h" @@ -732,17 +734,22 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2 return out; } + DC_FP_START(); + /* Use dml_validate_only for fast_validate path */ if (fast_validate) out = dml2_validate_only(context); else out = dml2_validate_and_build_resource(in_dc, context); + + DC_FP_END(); + return out; } static inline struct dml2_context *dml2_allocate_memory(void) { - return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL); + return (struct dml2_context *) vzalloc(sizeof(struct dml2_context)); } static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) @@ -779,11 +786,15 @@ static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_op break; } + DC_FP_START(); + initialize_dml2_ip_params(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.ip); initialize_dml2_soc_bbox(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc); initialize_dml2_soc_states(*dml2, in_dc, &(*dml2)->v20.dml_core_ctx.soc, &(*dml2)->v20.dml_core_ctx.states); + + DC_FP_END(); } bool dml2_create(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2) @@ -812,7 +823,7 @@ void dml2_destroy(struct dml2_context *dml2) if (dml2->architecture == dml2_architecture_21) dml21_destroy(dml2); - kfree(dml2); + vfree(dml2); } void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2, diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c index 1236e0f9a2560c..712aff7e17f7a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c @@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes( enum dc_cursor_color_format color_format = cursor_attributes->color_format; int cur_rom_en = 0; - // DCN4 should always do Cursor degamma for Cursor Color modes if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA || color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) { - cur_rom_en = 1; + if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) { + cur_rom_en = 1; + } } REG_UPDATE_3(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 5489f3d431f64d..3af6a3402b8949 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1980,9 +1980,9 @@ void dcn401_program_pipe( dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size); } - if (pipe_ctx->update_flags.raw || - (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) || - pipe_ctx->stream->update_flags.raw) + if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw || + pipe_ctx->plane_state->update_flags.raw || + pipe_ctx->stream->update_flags.raw)) dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context); if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable || diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 268626e73c543d..53c961f86d43c0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -148,6 +148,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) void link_set_all_streams_dpms_off_for_link(struct dc_link *link) { struct pipe_ctx *pipes[MAX_PIPES]; + struct dc_stream_state *streams[MAX_PIPES]; struct dc_state *state = link->dc->current_state; uint8_t count; int i; @@ -160,10 +161,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link) link_get_master_pipes_with_dpms_on(link, state, &count, pipes); + /* The subsequent call to dc_commit_updates_for_stream for a full update + * will release the current state and swap to a new state. Releasing the + * current state results in the stream pointers in the pipe_ctx structs + * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream. + */ + for (i = 0; i < count; i++) + streams[i] = pipes[i]->stream; + for (i = 0; i < count; i++) { - stream_update.stream = pipes[i]->stream; + stream_update.stream = streams[i]; dc_commit_updates_for_stream(link->ctx->dc, NULL, 0, - pipes[i]->stream, &stream_update, + streams[i], &stream_update, state); } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 34d2e097ca2e6b..5a5d48fadbf27b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -35,6 +35,17 @@ #define DC_LOGGER \ link->ctx->logger +static void get_default_8b_10b_lttpr_aux_rd_interval( + union training_aux_rd_interval *training_rd_interval) +{ + /* LTTPR are required to program DPCD 0000Eh to 0x4 (16ms) upon AUX + * read reply to this register. Since old sinks with DPCD rev 1.1 + * and earlier may not support this register, assume the mandatory + * value is programmed by the LTTPR to avoid AUX timeout issues. + */ + training_rd_interval->raw = 0x4; +} + static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, const struct dc_link_settings *link_settings, enum lttpr_mode lttpr_mode) @@ -43,17 +54,22 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, uint32_t wait_in_micro_secs = 100; memset(&training_rd_interval, 0, sizeof(training_rd_interval)); - if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); - if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) - wait_in_micro_secs = 400; - if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) - wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); + + if (training_rd_interval.raw != 0) { + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; + if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) + wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; + } } return wait_in_micro_secs; } @@ -71,13 +87,15 @@ static uint32_t get_eq_training_aux_rd_interval( DP_128B132B_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); - } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && - link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) { - core_link_read_dpcd( - link, - DP_TRAINING_AUX_RD_INTERVAL, - (uint8_t *)&training_rd_interval, - sizeof(training_rd_interval)); + } else if (link_dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING) { + if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_12) + core_link_read_dpcd( + link, + DP_TRAINING_AUX_RD_INTERVAL, + (uint8_t *)&training_rd_interval, + sizeof(training_rd_interval)); + else if (dp_is_lttpr_present(link)) + get_default_8b_10b_lttpr_aux_rd_interval(&training_rd_interval); } switch (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 2a59cc61ed8c91..944650cb13ded7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2114,8 +2114,6 @@ static bool dcn32_resource_construct( #define REG_STRUCT dccg_regs dccg_regs_init(); - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn32; @@ -2501,14 +2499,10 @@ static bool dcn32_resource_construct( if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) dc->config.sdpif_request_limit_words_per_umc = 16; - DC_FP_END(); - return true; create_fail: - DC_FP_END(); - dcn32_resource_destruct(pool); return false; diff --git a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c index 52d97918a3bd21..ebf0287417e0eb 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/sspl/spl_fixpt31_32.c @@ -29,8 +29,6 @@ static inline unsigned long long spl_complete_integer_division_u64( { unsigned long long result; - SPL_ASSERT(divisor); - result = spl_div64_u64_rem(dividend, divisor, remainder); return result; @@ -196,8 +194,6 @@ struct spl_fixed31_32 spl_fixpt_recip(struct spl_fixed31_32 arg) * Good idea to use Newton's method */ - SPL_ASSERT(arg.value); - return spl_fixpt_from_fraction( spl_fixpt_one.value, arg.value); diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2a9606118d8994..21dc956b5f35d4 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -429,6 +429,7 @@ struct amd_pm_funcs { int (*set_pp_table)(void *handle, const char *buf, size_t size); void (*debugfs_print_current_performance_level)(void *handle, struct seq_file *m); int (*switch_power_profile)(void *handle, enum PP_SMC_POWER_PROFILE type, bool en); + int (*pause_power_profile)(void *handle, bool pause); /* export to amdgpu */ struct amd_vce_state *(*get_vce_clock_state)(void *handle, u32 idx); int (*dispatch_tasks)(void *handle, enum amd_pp_task task_id, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 81e9b443ca0adc..3533d43ed1e73d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -349,6 +349,25 @@ int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev, + bool pause) +{ + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + int ret = 0; + + if (amdgpu_sriov_vf(adev)) + return 0; + + if (pp_funcs && pp_funcs->pause_power_profile) { + mutex_lock(&adev->pm.mutex); + ret = pp_funcs->pause_power_profile( + adev->powerplay.pp_handle, pause); + mutex_unlock(&adev->pm.mutex); + } + + return ret; +} + int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, uint32_t pstate) { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 922def51685b0a..da76611edb107d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3055,6 +3055,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, struct device_attribute *attr, char *buf) { + if (amdgpu_ignore_min_pcap) + return sysfs_emit(buf, "%i\n", 0); + return amdgpu_hwmon_show_power_cap_generic(dev, attr, buf, PP_PWR_LIMIT_MIN); } diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index f93d287dbf1376..4c0f7ad1481661 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -410,6 +410,8 @@ int amdgpu_dpm_set_xgmi_pstate(struct amdgpu_device *adev, int amdgpu_dpm_switch_power_profile(struct amdgpu_device *adev, enum PP_SMC_POWER_PROFILE type, bool en); +int amdgpu_dpm_pause_power_profile(struct amdgpu_device *adev, + bool pause); int amdgpu_dpm_baco_reset(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index 4bd92fd782be6a..8d40ed0f0e8383 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -143,6 +143,10 @@ int atomctrl_initialize_mc_reg_table( vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); @@ -180,6 +184,10 @@ int atomctrl_initialize_mc_reg_table_v2_2( vram_info = (ATOM_VRAM_INFO_HEADER_V2_2 *) smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); + if (!vram_info) { + pr_err("Could not retrieve the VramInfo table!"); + return -EINVAL; + } if (module_index >= vram_info->ucNumOfVRAMModule) { pr_err("Invalid VramInfo table."); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 033c3229b555f0..e3adb7aea969d0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2398,7 +2398,11 @@ static int smu_switch_power_profile(void *handle, smu_power_profile_mode_get(smu, type); else smu_power_profile_mode_put(smu, type); - ret = smu_bump_power_profile_mode(smu, NULL, 0); + /* don't switch the active workload when paused */ + if (smu->pause_workload) + ret = 0; + else + ret = smu_bump_power_profile_mode(smu, NULL, 0); if (ret) { if (enable) smu_power_profile_mode_put(smu, type); @@ -2411,6 +2415,35 @@ static int smu_switch_power_profile(void *handle, return 0; } +static int smu_pause_power_profile(void *handle, + bool pause) +{ + struct smu_context *smu = handle; + struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); + u32 workload_mask = 1 << PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + int ret; + + if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) + return -EOPNOTSUPP; + + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { + smu->pause_workload = pause; + + /* force to bootup default profile */ + if (smu->pause_workload && smu->ppt_funcs->set_power_profile_mode) + ret = smu->ppt_funcs->set_power_profile_mode(smu, + workload_mask, + NULL, + 0); + else + ret = smu_bump_power_profile_mode(smu, NULL, 0); + return ret; + } + + return 0; +} + static enum amd_dpm_forced_level smu_get_performance_level(void *handle) { struct smu_context *smu = handle; @@ -2821,7 +2854,10 @@ int smu_get_power_limit(void *handle, *limit = smu->max_power_limit; break; case SMU_PPT_LIMIT_MIN: - *limit = smu->min_power_limit; + if (amdgpu_ignore_min_pcap) + *limit = 0; + else + *limit = smu->min_power_limit; break; default: return -EINVAL; @@ -2845,7 +2881,14 @@ static int smu_set_power_limit(void *handle, uint32_t limit) if (smu->ppt_funcs->set_power_limit) return smu->ppt_funcs->set_power_limit(smu, limit_type, limit); - if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { + if (amdgpu_ignore_min_pcap) { + if ((limit > smu->max_power_limit)) { + dev_err(smu->adev->dev, + "New power limit (%d) is over the max allowed %d\n", + limit, smu->max_power_limit); + return -EINVAL; + } + } else if ((limit > smu->max_power_limit) || (limit < smu->min_power_limit)) { dev_err(smu->adev->dev, "New power limit (%d) is out of range [%d,%d]\n", limit, smu->min_power_limit, smu->max_power_limit); @@ -3733,6 +3776,7 @@ static const struct amd_pm_funcs swsmu_pm_funcs = { .get_pp_table = smu_sys_get_pp_table, .set_pp_table = smu_sys_set_pp_table, .switch_power_profile = smu_switch_power_profile, + .pause_power_profile = smu_pause_power_profile, /* export to amdgpu */ .dispatch_tasks = smu_handle_dpm_task, .load_firmware = smu_load_microcode, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 3ba169639f5460..dd6d0e7aa2425d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -558,6 +558,7 @@ struct smu_context { /* asic agnostic workload mask */ uint32_t workload_mask; + bool pause_workload; /* default/user workload preference */ uint32_t power_profile_mode; uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 78391d8f35a9cb..25fabf336a6401 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1204,7 +1204,7 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; - if (speed == 0) + if (!speed || speed > UINT_MAX/8) return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 071168aa0c3bda..5222b1e9f533d0 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1597,10 +1597,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) } dp->reg_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(dp->reg_base)) { - ret = PTR_ERR(dp->reg_base); - goto err_disable_clk; - } + if (IS_ERR(dp->reg_base)) + return ERR_CAST(dp->reg_base); dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd"); @@ -1612,8 +1610,7 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (IS_ERR(dp->hpd_gpiod)) { dev_err(dev, "error getting HDP GPIO: %ld\n", PTR_ERR(dp->hpd_gpiod)); - ret = PTR_ERR(dp->hpd_gpiod); - goto err_disable_clk; + return ERR_CAST(dp->hpd_gpiod); } if (dp->hpd_gpiod) { @@ -1633,8 +1630,7 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (dp->irq == -ENXIO) { dev_err(&pdev->dev, "failed to get irq\n"); - ret = -ENODEV; - goto err_disable_clk; + return ERR_PTR(-ENODEV); } ret = devm_request_threaded_irq(&pdev->dev, dp->irq, @@ -1643,15 +1639,22 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) irq_flags, "analogix-dp", dp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); - goto err_disable_clk; + return ERR_PTR(ret); } disable_irq(dp->irq); - return dp; + dp->aux.name = "DP-AUX"; + dp->aux.transfer = analogix_dpaux_transfer; + dp->aux.dev = dp->dev; + drm_dp_aux_init(&dp->aux); -err_disable_clk: - clk_disable_unprepare(dp->clock); - return ERR_PTR(ret); + pm_runtime_use_autosuspend(dp->dev); + pm_runtime_set_autosuspend_delay(dp->dev, 100); + ret = devm_pm_runtime_enable(dp->dev); + if (ret) + return ERR_PTR(ret); + + return dp; } EXPORT_SYMBOL_GPL(analogix_dp_probe); @@ -1696,25 +1699,12 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) dp->drm_dev = drm_dev; dp->encoder = dp->plat_data->encoder; - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_use_autosuspend(dp->dev); - pm_runtime_set_autosuspend_delay(dp->dev, 100); - pm_runtime_enable(dp->dev); - } else { - ret = analogix_dp_resume(dp); - if (ret) - return ret; - } - - dp->aux.name = "DP-AUX"; - dp->aux.transfer = analogix_dpaux_transfer; - dp->aux.dev = dp->dev; dp->aux.drm_dev = drm_dev; ret = drm_dp_aux_register(&dp->aux); if (ret) { DRM_ERROR("failed to register AUX (%d)\n", ret); - goto err_disable_pm_runtime; + return ret; } ret = analogix_dp_create_bridge(drm_dev, dp); @@ -1727,13 +1717,6 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) err_unregister_aux: drm_dp_aux_unregister(&dp->aux); -err_disable_pm_runtime: - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - } else { - analogix_dp_suspend(dp); - } return ret; } @@ -1750,13 +1733,6 @@ void analogix_dp_unbind(struct analogix_dp_device *dp) } drm_dp_aux_unregister(&dp->aux); - - if (IS_ENABLED(CONFIG_PM)) { - pm_runtime_dont_use_autosuspend(dp->dev); - pm_runtime_disable(dp->dev); - } else { - analogix_dp_suspend(dp); - } } EXPORT_SYMBOL_GPL(analogix_dp_unbind); diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index f4c3ff1fdc6923..f6e714feeea54c 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -880,7 +880,11 @@ static int lt9611uxc_probe(struct i2c_client *client) } } - return lt9611uxc_audio_init(dev, lt9611uxc); + ret = lt9611uxc_audio_init(dev, lt9611uxc); + if (ret) + goto err_remove_bridge; + + return 0; err_remove_bridge: free_irq(client->irq, lt9611uxc); diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml index f04aabe8327c6b..b06b9e7d3d09bf 100644 --- a/drivers/gpu/drm/ci/gitlab-ci.yml +++ b/drivers/gpu/drm/ci/gitlab-ci.yml @@ -143,11 +143,11 @@ stages: # Pre-merge pipeline - if: &is-pre-merge $CI_PIPELINE_SOURCE == "merge_request_event" # Push to a branch on a fork - - if: &is-fork-push $CI_PROJECT_NAMESPACE != "mesa" && $CI_PIPELINE_SOURCE == "push" + - if: &is-fork-push $CI_PIPELINE_SOURCE == "push" # nightly pipeline - if: &is-scheduled-pipeline $CI_PIPELINE_SOURCE == "schedule" # pipeline for direct pushes that bypassed the CI - - if: &is-direct-push $CI_PROJECT_NAMESPACE == "mesa" && $CI_PIPELINE_SOURCE == "push" && $GITLAB_USER_LOGIN != "marge-bot" + - if: &is-direct-push $CI_PIPELINE_SOURCE == "push" && $GITLAB_USER_LOGIN != "marge-bot" # Rules applied to every job in the pipeline @@ -170,26 +170,15 @@ stages: - !reference [.disable-farm-mr-rules, rules] # Never run immediately after merging, as we just ran everything - !reference [.never-post-merge-rules, rules] - # Build everything in merge pipelines, if any files affecting the pipeline - # were changed + # Build everything in merge pipelines - if: *is-merge-attempt - changes: &all_paths - - drivers/gpu/drm/ci/**/* when: on_success # Same as above, but for pre-merge pipelines - if: *is-pre-merge - changes: - *all_paths when: manual - # Skip everything for pre-merge and merge pipelines which don't change - # anything in the build - - if: *is-merge-attempt - when: never - - if: *is-pre-merge - when: never # Build everything after someone bypassed the CI - if: *is-direct-push - when: on_success + when: manual # Build everything in scheduled pipelines - if: *is-scheduled-pipeline when: on_success diff --git a/drivers/gpu/drm/display/drm_hdmi_audio_helper.c b/drivers/gpu/drm/display/drm_hdmi_audio_helper.c index 05afc9f0bdd6b6..ae8a0cf595fc6f 100644 --- a/drivers/gpu/drm/display/drm_hdmi_audio_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_audio_helper.c @@ -103,7 +103,8 @@ static int drm_connector_hdmi_audio_hook_plugged_cb(struct device *dev, connector->hdmi_audio.plugged_cb = fn; connector->hdmi_audio.plugged_cb_dev = codec_dev; - fn(codec_dev, connector->hdmi_audio.last_state); + if (fn) + fn(codec_dev, connector->hdmi_audio.last_state); mutex_unlock(&connector->hdmi_audio.lock); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 17fc5dc708f472..60e5ac179c151a 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -549,7 +549,7 @@ int drm_dev_wedged_event(struct drm_device *dev, unsigned long method) if (drm_WARN_ONCE(dev, !recovery, "invalid recovery method %u\n", opt)) break; - len += scnprintf(event_string + len, sizeof(event_string), "%s,", recovery); + len += scnprintf(event_string + len, sizeof(event_string) - len, "%s,", recovery); } if (recovery) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 13bc4c290b17d5..9edb3247c767b8 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -6596,6 +6596,7 @@ static void drm_reset_display_info(struct drm_connector *connector) info->has_hdmi_infoframe = false; info->rgb_quant_range_selectable = false; memset(&info->hdmi, 0, sizeof(info->hdmi)); + memset(&connector->hdr_sink_metadata, 0, sizeof(connector->hdr_sink_metadata)); info->edid_hdmi_rgb444_dc_modes = 0; info->edid_hdmi_ycbcr444_dc_modes = 0; diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index c299cd94d3f78f..cf2463090d3acc 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -964,6 +964,10 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) struct drm_file *file = f->private_data; struct drm_device *dev = file->minor->dev; struct drm_printer p = drm_seq_file_printer(m); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; drm_printf(&p, "drm-driver:\t%s\n", dev->driver->name); drm_printf(&p, "drm-client-id:\t%llu\n", file->client_id); @@ -983,6 +987,8 @@ void drm_show_fdinfo(struct seq_file *m, struct file *f) if (dev->driver->show_fdinfo) dev->driver->show_fdinfo(&p, file); + + drm_dev_exit(idx); } EXPORT_SYMBOL(drm_show_fdinfo); diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 38431e8360e783..4b2f32889f00f8 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, lockdep_assert_held(&gpusvm->notifier_lock); if (range->flags.has_dma_mapping) { + struct drm_gpusvm_range_flags flags = { + .__flags = range->flags.__flags, + }; + for (i = 0, j = 0; i < npages; j++) { struct drm_pagemap_device_addr *addr = &range->dma_addr[j]; @@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, dev, *addr); i += 1 << addr->order; } - range->flags.has_devmem_pages = false; - range->flags.has_dma_mapping = false; + + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + flags.has_devmem_pages = false; + flags.has_dma_mapping = false; + WRITE_ONCE(range->flags.__flags, flags.__flags); + range->dpagemap = NULL; } } @@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, int err = 0; struct dev_pagemap *pagemap; struct drm_pagemap *dpagemap; + struct drm_gpusvm_range_flags flags; retry: hmm_range.notifier_seq = mmu_interval_read_begin(notifier); @@ -1378,7 +1387,8 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, */ drm_gpusvm_notifier_lock(gpusvm); - if (range->flags.unmapped) { + flags.__flags = range->flags.__flags; + if (flags.unmapped) { drm_gpusvm_notifier_unlock(gpusvm); err = -EFAULT; goto err_free; @@ -1454,6 +1464,11 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, goto err_unmap; } + if (ctx->devmem_only) { + err = -EFAULT; + goto err_unmap; + } + addr = dma_map_page(gpusvm->drm->dev, page, 0, PAGE_SIZE << order, @@ -1469,14 +1484,17 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, } i += 1 << order; num_dma_mapped = i; + flags.has_dma_mapping = true; } - range->flags.has_dma_mapping = true; if (zdd) { - range->flags.has_devmem_pages = true; + flags.has_devmem_pages = true; range->dpagemap = dpagemap; } + /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ + WRITE_ONCE(range->flags.__flags, flags.__flags); + drm_gpusvm_notifier_unlock(gpusvm); kvfree(pfns); set_seqno: @@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, goto err_finalize; /* Upon success bind devmem allocation to range and zdd */ + devmem_allocation->timeslice_expiration = get_jiffies_64() + + msecs_to_jiffies(ctx->timeslice_ms); zdd->devmem_allocation = devmem_allocation; /* Owns ref */ err_finalize: @@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, void *buf; int i, err = 0; + if (page) { + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), + zdd->devmem_allocation->timeslice_expiration)) + return 0; + } + start = ALIGN_DOWN(fault_addr, size); end = ALIGN(fault_addr + 1, size); diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 89e05a5bed1de8..a4cd476f9b3026 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -404,12 +404,16 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) u16 height = drm->mode_config.min_height; u16 width = drm->mode_config.min_width; struct mipi_dbi *dbi = &dbidev->dbi; - size_t len = width * height * 2; + const struct drm_format_info *dst_format; + size_t len; int idx; if (!drm_dev_enter(drm, &idx)) return; + dst_format = drm_format_info(dbidev->pixel_format); + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + memset(dbidev->tx_buf, 0, len); mipi_dbi_set_window_address(dbidev, 0, width - 1, 0, height - 1); diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index f2a99681b99858..de2ddf5dbbd3f1 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -366,8 +366,48 @@ impl Segment<'_> { SegmentIterator { segment: self, offset: 0, - carry: 0, - carry_len: 0, + decfifo: Default::default(), + } + } +} + +/// Max fifo size is 17 (max push) + 2 (max remaining) +const MAX_FIFO_SIZE: usize = 19; + +/// A simple Decimal digit FIFO +#[derive(Default)] +struct DecFifo { + decimals: [u8; MAX_FIFO_SIZE], + len: usize, +} + +impl DecFifo { + fn push(&mut self, data: u64, len: usize) { + let mut chunk = data; + for i in (0..self.len).rev() { + self.decimals[i + len] = self.decimals[i]; + } + for i in 0..len { + self.decimals[i] = (chunk % 10) as u8; + chunk /= 10; + } + self.len += len; + } + + /// Pop 3 decimal digits from the FIFO + fn pop3(&mut self) -> Option<(u16, usize)> { + if self.len == 0 { + None + } else { + let poplen = 3.min(self.len); + self.len -= poplen; + let mut out = 0; + let mut exp = 1; + for i in 0..poplen { + out += self.decimals[self.len + i] as u16 * exp; + exp *= 10; + } + Some((out, NUM_CHARS_BITS[poplen])) } } } @@ -375,8 +415,7 @@ impl Segment<'_> { struct SegmentIterator<'a> { segment: &'a Segment<'a>, offset: usize, - carry: u64, - carry_len: usize, + decfifo: DecFifo, } impl Iterator for SegmentIterator<'_> { @@ -394,31 +433,17 @@ impl Iterator for SegmentIterator<'_> { } } Segment::Numeric(data) => { - if self.carry_len < 3 && self.offset < data.len() { - // If there are less than 3 decimal digits in the carry, - // take the next 7 bytes of input, and add them to the carry. + if self.decfifo.len < 3 && self.offset < data.len() { + // If there are less than 3 decimal digits in the fifo, + // take the next 7 bytes of input, and push them to the fifo. let mut buf = [0u8; 8]; let len = 7.min(data.len() - self.offset); buf[..len].copy_from_slice(&data[self.offset..self.offset + len]); let chunk = u64::from_le_bytes(buf); - let pow = u64::pow(10, BYTES_TO_DIGITS[len] as u32); - self.carry = chunk + self.carry * pow; + self.decfifo.push(chunk, BYTES_TO_DIGITS[len]); self.offset += len; - self.carry_len += BYTES_TO_DIGITS[len]; - } - match self.carry_len { - 0 => None, - len => { - // take the next 3 decimal digits of the carry - // and return 10bits of numeric data. - let out_len = 3.min(len); - self.carry_len -= out_len; - let pow = u64::pow(10, self.carry_len as u32); - let out = (self.carry / pow) as u16; - self.carry = self.carry % pow; - Some((out, NUM_CHARS_BITS[out_len])) - } } + self.decfifo.pop3() } } } diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 5170f72b08309d..f91daefa9d2bc5 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -43,13 +43,13 @@ struct decon_data { unsigned int wincon_burstlen_shift; }; -static struct decon_data exynos7_decon_data = { +static const struct decon_data exynos7_decon_data = { .vidw_buf_start_base = 0x80, .shadowcon_win_protect_shift = 10, .wincon_burstlen_shift = 11, }; -static struct decon_data exynos7870_decon_data = { +static const struct decon_data exynos7870_decon_data = { .vidw_buf_start_base = 0x880, .shadowcon_win_protect_shift = 8, .wincon_burstlen_shift = 10, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index f313ae7bc3a323..6cc7bf77bcacc6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -355,8 +355,7 @@ static void exynos_drm_platform_shutdown(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); - if (drm) - drm_atomic_helper_shutdown(drm); + drm_atomic_helper_shutdown(drm); } static struct platform_driver exynos_drm_platform_driver = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index b150cfd92f6ee9..09e33a26caaff9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -908,7 +908,7 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, u32 buf_num; u32 cfg; - DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueu[%d]\n", buf_id, enqueue); + DRM_DEV_DEBUG_KMS(ctx->dev, "buf_id[%d]enqueue[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 1ad87584b1c2c8..c394cc702d7d42 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -731,7 +731,7 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win, /* * Setting dma-burst to 16Word causes permanent tearing for very small * buffers, e.g. cursor buffer. Burst Mode switching which based on - * plane size is not recommended as plane size varies alot towards the + * plane size is not recommended as plane size varies a lot towards the * end of the screen and rapid movement causes unstable DMA, but it is * still better to change dma-burst than displaying garbage. */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 08cf79a6202533..e644e2382d77f4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -312,9 +312,6 @@ static int vidi_get_modes(struct drm_connector *connector) else drm_edid = drm_edid_alloc(fake_edid_info, sizeof(fake_edid_info)); - if (!drm_edid) - return 0; - drm_edid_connector_update(connector, drm_edid); count = drm_edid_connector_add_modes(connector); diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 048be287224774..98b898a1de8ffc 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -244,6 +244,7 @@ static int icl_get_qgv_points(struct drm_i915_private *dev_priv, qi->deinterleave = 4; break; case INTEL_DRAM_GDDR: + case INTEL_DRAM_GDDR_ECC: qi->channel_width = 32; break; default: @@ -398,6 +399,12 @@ static const struct intel_sa_info xe2_hpd_sa_info = { /* Other values not used by simplified algorithm */ }; +static const struct intel_sa_info xe2_hpd_ecc_sa_info = { + .derating = 45, + .deprogbwlimit = 53, + /* Other values not used by simplified algorithm */ +}; + static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) { struct intel_qgv_info qi = {}; @@ -740,10 +747,15 @@ static unsigned int icl_qgv_bw(struct drm_i915_private *i915, void intel_bw_init_hw(struct drm_i915_private *dev_priv) { + const struct dram_info *dram_info = &dev_priv->dram_info; + if (!HAS_DISPLAY(dev_priv)) return; - if (DISPLAY_VERx100(dev_priv) >= 1401 && IS_DGFX(dev_priv)) + if (DISPLAY_VERx100(dev_priv) >= 1401 && IS_DGFX(dev_priv) && + dram_info->type == INTEL_DRAM_GDDR_ECC) + xe2_hpd_get_bw_info(dev_priv, &xe2_hpd_ecc_sa_info); + else if (DISPLAY_VERx100(dev_priv) >= 1401 && IS_DGFX(dev_priv)) xe2_hpd_get_bw_info(dev_priv, &xe2_hpd_sa_info); else if (DISPLAY_VER(dev_priv) >= 14) tgl_get_bw_info(dev_priv, &mtl_sa_info); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3afb85fe8536df..3b509c70fb581d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -968,7 +968,9 @@ static bool vrr_params_changed(const struct intel_crtc_state *old_crtc_state, old_crtc_state->vrr.vmin != new_crtc_state->vrr.vmin || old_crtc_state->vrr.vmax != new_crtc_state->vrr.vmax || old_crtc_state->vrr.guardband != new_crtc_state->vrr.guardband || - old_crtc_state->vrr.pipeline_full != new_crtc_state->vrr.pipeline_full; + old_crtc_state->vrr.pipeline_full != new_crtc_state->vrr.pipeline_full || + old_crtc_state->vrr.vsync_start != new_crtc_state->vrr.vsync_start || + old_crtc_state->vrr.vsync_end != new_crtc_state->vrr.vsync_end; } static bool cmrr_params_changed(const struct intel_crtc_state *old_crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h index 717286981687a2..7a3bb77c7af7c2 100644 --- a/drivers/gpu/drm/i915/display/intel_display_device.h +++ b/drivers/gpu/drm/i915/display/intel_display_device.h @@ -161,6 +161,7 @@ struct intel_display_platforms { #define HAS_DPT(__display) (DISPLAY_VER(__display) >= 13) #define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb) #define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc) +#define HAS_DSC_3ENGINES(__display) (DISPLAY_VERx100(__display) == 1401 && HAS_DSC(__display)) #define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display)) #define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0) #define HAS_FBC_DIRTY_RECT(__display) (DISPLAY_VER(__display) >= 30) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index a236b5fc7a3d7b..cd8f728d5fddc4 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -172,10 +172,28 @@ int intel_dp_link_symbol_clock(int rate) static int max_dprx_rate(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + int max_rate; + if (intel_dp_tunnel_bw_alloc_is_enabled(intel_dp)) - return drm_dp_tunnel_max_dprx_rate(intel_dp->tunnel); + max_rate = drm_dp_tunnel_max_dprx_rate(intel_dp->tunnel); + else + max_rate = drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); - return drm_dp_bw_code_to_link_rate(intel_dp->dpcd[DP_MAX_LINK_RATE]); + /* + * Some broken eDP sinks illegally declare support for + * HBR3 without TPS4, and are unable to produce a stable + * output. Reject HBR3 when TPS4 is not available. + */ + if (max_rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", + encoder->base.base.id, encoder->base.name); + max_rate = 540000; + } + + return max_rate; } static int max_dprx_lane_count(struct intel_dp *intel_dp) @@ -1032,10 +1050,11 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector, u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes; /* - * 3 DSC Slices per pipe need 3 DSC engines, - * which is supported only with Ultrajoiner. + * 3 DSC Slices per pipe need 3 DSC engines, which is supported only + * with Ultrajoiner only for some platforms. */ - if (valid_dsc_slicecount[i] == 3 && num_joined_pipes != 4) + if (valid_dsc_slicecount[i] == 3 && + (!HAS_DSC_3ENGINES(display) || num_joined_pipes != 4)) continue; if (test_slice_count > @@ -2504,6 +2523,7 @@ intel_dp_dsc_compute_pipe_bpp_limits(struct intel_dp *intel_dp, bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, + struct intel_connector *connector, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, @@ -2557,7 +2577,7 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, intel_dp_test_compute_config(intel_dp, crtc_state, limits); return intel_dp_compute_config_link_bpp_limits(intel_dp, - intel_dp->attached_connector, + connector, crtc_state, dsc, limits); @@ -2618,7 +2638,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, joiner_needs_dsc = intel_dp_joiner_needs_dsc(display, num_joined_pipes); dsc_needed = joiner_needs_dsc || intel_dp->force_dsc_en || - !intel_dp_compute_config_limits(intel_dp, pipe_config, + !intel_dp_compute_config_limits(intel_dp, connector, pipe_config, respect_downstream_limits, false, &limits); @@ -2652,7 +2672,7 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, str_yes_no(ret), str_yes_no(joiner_needs_dsc), str_yes_no(intel_dp->force_dsc_en)); - if (!intel_dp_compute_config_limits(intel_dp, pipe_config, + if (!intel_dp_compute_config_limits(intel_dp, connector, pipe_config, respect_downstream_limits, true, &limits)) @@ -4170,6 +4190,9 @@ static void intel_edp_mso_init(struct intel_dp *intel_dp) static void intel_edp_set_sink_rates(struct intel_dp *intel_dp) { + struct intel_display *display = to_intel_display(intel_dp); + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + intel_dp->num_sink_rates = 0; if (intel_dp->edp_dpcd[0] >= DP_EDP_14) { @@ -4180,10 +4203,7 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) sink_rates, sizeof(sink_rates)); for (i = 0; i < ARRAY_SIZE(sink_rates); i++) { - int val = le16_to_cpu(sink_rates[i]); - - if (val == 0) - break; + int rate; /* Value read multiplied by 200kHz gives the per-lane * link rate in kHz. The source rates are, however, @@ -4191,7 +4211,24 @@ intel_edp_set_sink_rates(struct intel_dp *intel_dp) * back to symbols is * (val * 200kHz)*(8/10 ch. encoding)*(1/8 bit to Byte) */ - intel_dp->sink_rates[i] = (val * 200) / 10; + rate = le16_to_cpu(sink_rates[i]) * 200 / 10; + + if (rate == 0) + break; + + /* + * Some broken eDP sinks illegally declare support for + * HBR3 without TPS4, and are unable to produce a stable + * output. Reject HBR3 when TPS4 is not available. + */ + if (rate >= 810000 && !drm_dp_tps4_supported(intel_dp->dpcd)) { + drm_dbg_kms(display->drm, + "[ENCODER:%d:%s] Rejecting HBR3 due to missing TPS4 support\n", + encoder->base.base.id, encoder->base.name); + break; + } + + intel_dp->sink_rates[i] = rate; } intel_dp->num_sink_rates = i; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 9189db4c25946a..98f90955fdb1db 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -194,6 +194,7 @@ void intel_dp_wait_source_oui(struct intel_dp *intel_dp); int intel_dp_output_bpp(enum intel_output_format output_format, int bpp); bool intel_dp_compute_config_limits(struct intel_dp *intel_dp, + struct intel_connector *connector, struct intel_crtc_state *crtc_state, bool respect_downstream_limits, bool dsc, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 02f95108c63799..fe685f098ba9a2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -242,7 +242,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - bool is_mst = intel_dp->is_mst; + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int bpp_x16, slots = -EINVAL; int dsc_slice_count = 0; int max_dpt_bpp_x16; @@ -590,12 +590,13 @@ adjust_limits_for_dsc_hblank_expansion_quirk(struct intel_dp *intel_dp, static bool mst_stream_compute_config_limits(struct intel_dp *intel_dp, - const struct intel_connector *connector, + struct intel_connector *connector, struct intel_crtc_state *crtc_state, bool dsc, struct link_config_limits *limits) { - if (!intel_dp_compute_config_limits(intel_dp, crtc_state, false, dsc, + if (!intel_dp_compute_config_limits(intel_dp, connector, + crtc_state, false, dsc, limits)) return false; diff --git a/drivers/gpu/drm/i915/display/intel_psr_regs.h b/drivers/gpu/drm/i915/display/intel_psr_regs.h index 795e6b9cc575c8..248136456048e3 100644 --- a/drivers/gpu/drm/i915/display/intel_psr_regs.h +++ b/drivers/gpu/drm/i915/display/intel_psr_regs.h @@ -325,8 +325,8 @@ #define PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(20, 16) #define PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) #define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(12, 8) -#define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) +#define PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_FIRST_LFPS_HALF_CYCLE_DURATION_MASK, val) #define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK REG_GENMASK(4, 0) -#define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LFPS_HALF_CYCLE_DURATION_MASK, val) +#define PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION(val) REG_FIELD_PREP(PORT_ALPM_LFPS_CTL_LAST_LFPS_HALF_CYCLE_DURATION_MASK, val) #endif /* __INTEL_PSR_REGS_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c index c6321dafef4f3c..74bb3bedf30f5d 100644 --- a/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c +++ b/drivers/gpu/drm/i915/display/intel_snps_hdmi_pll.c @@ -41,12 +41,12 @@ static s64 interp(s64 x, s64 x1, s64 x2, s64 y1, s64 y2) { s64 dydx; - dydx = DIV_ROUND_UP_ULL((y2 - y1) * 100000, (x2 - x1)); + dydx = DIV64_U64_ROUND_UP((y2 - y1) * 100000, (x2 - x1)); - return (y1 + DIV_ROUND_UP_ULL(dydx * (x - x1), 100000)); + return (y1 + DIV64_U64_ROUND_UP(dydx * (x - x1), 100000)); } -static void get_ana_cp_int_prop(u32 vco_clk, +static void get_ana_cp_int_prop(u64 vco_clk, u32 refclk_postscalar, int mpll_ana_v2i, int c, int a, @@ -115,16 +115,16 @@ static void get_ana_cp_int_prop(u32 vco_clk, CURVE0_MULTIPLIER)); scaled_interpolated_sqrt = - int_sqrt(DIV_ROUND_UP_ULL(interpolated_product, vco_div_refclk_float) * + int_sqrt(DIV64_U64_ROUND_UP(interpolated_product, vco_div_refclk_float) * DIV_ROUND_DOWN_ULL(1000000000000ULL, 55)); /* Scale vco_div_refclk for ana_cp_int */ scaled_vco_div_refclk2 = DIV_ROUND_UP_ULL(vco_div_refclk_float, 1000000); - adjusted_vco_clk2 = 1460281 * DIV_ROUND_UP_ULL(scaled_interpolated_sqrt * + adjusted_vco_clk2 = 1460281 * DIV64_U64_ROUND_UP(scaled_interpolated_sqrt * scaled_vco_div_refclk2, curve_1_interpolated); - *ana_cp_prop = DIV_ROUND_UP_ULL(adjusted_vco_clk2, curve_2_scaled2); + *ana_cp_prop = DIV64_U64_ROUND_UP(adjusted_vco_clk2, curve_2_scaled2); *ana_cp_prop = max(1, min(*ana_cp_prop, 127)); } @@ -165,10 +165,10 @@ static void compute_hdmi_tmds_pll(u64 pixel_clock, u32 refclk, /* Select appropriate v2i point */ if (datarate <= INTEL_SNPS_PHY_HDMI_9999MHZ) { mpll_ana_v2i = 2; - tx_clk_div = ilog2(DIV_ROUND_DOWN_ULL(INTEL_SNPS_PHY_HDMI_9999MHZ, datarate)); + tx_clk_div = ilog2(div64_u64(INTEL_SNPS_PHY_HDMI_9999MHZ, datarate)); } else { mpll_ana_v2i = 3; - tx_clk_div = ilog2(DIV_ROUND_DOWN_ULL(INTEL_SNPS_PHY_HDMI_16GHZ, datarate)); + tx_clk_div = ilog2(div64_u64(INTEL_SNPS_PHY_HDMI_16GHZ, datarate)); } vco_clk = (datarate << tx_clk_div) >> 1; diff --git a/drivers/gpu/drm/i915/display/intel_vblank.c b/drivers/gpu/drm/i915/display/intel_vblank.c index 4efd4f7d497abb..7b240ce681a01a 100644 --- a/drivers/gpu/drm/i915/display/intel_vblank.c +++ b/drivers/gpu/drm/i915/display/intel_vblank.c @@ -222,7 +222,9 @@ int intel_crtc_scanline_offset(const struct intel_crtc_state *crtc_state) * However if queried just before the start of vblank we'll get an * answer that's slightly in the future. */ - if (DISPLAY_VER(display) == 2) + if (DISPLAY_VER(display) >= 20 || display->platform.battlemage) + return 1; + else if (DISPLAY_VER(display) == 2) return -1; else if (HAS_DDI(display) && intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) return 2; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 9378d5901c4939..9ca42589da4dad 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -117,21 +117,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | GEN6_RC_CTL_EI_MODE(1); - /* - * BSpec 52698 - Render powergating must be off. - * FIXME BSpec is outdated, disabling powergating for MTL is just - * temporary wa and should be removed after fixing real cause - * of forcewake timeouts. - */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) - pg_enable = - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; - else - pg_enable = - GEN9_RENDER_PG_ENABLE | - GEN9_MEDIA_PG_ENABLE | - GEN11_MEDIA_SAMPLER_PG_ENABLE; + pg_enable = + GEN9_RENDER_PG_ENABLE | + GEN9_MEDIA_PG_ENABLE | + GEN11_MEDIA_SAMPLER_PG_ENABLE; if (GRAPHICS_VER(gt->i915) >= 12 && !IS_DG1(gt->i915)) { for (i = 0; i < I915_MAX_VCS; i++) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 64e9317f58fb6f..71ee01d9ef642c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1001,6 +1001,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + intel_guc_slpc_dec_waiters(slpc); } else { atomic_dec(&rps->num_waiters); @@ -1029,11 +1033,15 @@ void intel_rps_boost(struct i915_request *rq) if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) return; - if (slpc->min_freq_softlimit >= slpc->boost_freq) - return; - /* Return if old value is non zero */ if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); queue_work(rps_to_gt(rps)->i915->unordered_wq, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f8cb7c630d5b83..127316d2c8aa99 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -633,7 +633,7 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, atomic_inc(&guc->outstanding_submission_g2h); ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - if (ret) + if (ret && g2h_len_dw) atomic_dec(&guc->outstanding_submission_g2h); return ret; @@ -3443,18 +3443,29 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) * GuC is active, lets destroy this context, but at this point we can still be racing * with suspend, so we undo everything if the H2G fails in deregister_context so * that GuC reset will find this context during clean up. + * + * There is a race condition where the reset code could have altered + * this context's state and done a wakeref put before we try to + * deregister it here. So check if the context is still set to be + * destroyed before undoing earlier changes, to avoid two wakeref puts + * on the same context. */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { + bool pending_destroyed; spin_lock_irqsave(&ce->guc_state.lock, flags); - set_context_registered(ce); - clr_context_destroyed(ce); + pending_destroyed = context_destroyed(ce); + if (pending_destroyed) { + set_context_registered(ce); + clr_context_destroyed(ce); + } spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. */ - intel_wakeref_put_async(>->wakeref); + if (pending_destroyed) + intel_wakeref_put_async(>->wakeref); } return ret; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index d791f9baa11d1f..456d3372eef840 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -317,6 +317,11 @@ void intel_huc_init_early(struct intel_huc *huc) } } +void intel_huc_fini_late(struct intel_huc *huc) +{ + delayed_huc_load_fini(huc); +} + #define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy") static int check_huc_loading_mode(struct intel_huc *huc) { @@ -414,12 +419,6 @@ int intel_huc_init(struct intel_huc *huc) void intel_huc_fini(struct intel_huc *huc) { - /* - * the fence is initialized in init_early, so we need to clean it up - * even if HuC loading is off. - */ - delayed_huc_load_fini(huc); - if (huc->heci_pkt) i915_vma_unpin_and_release(&huc->heci_pkt, 0); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index d5e441b9e08d63..921ad4b1687f0b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -55,6 +55,7 @@ struct intel_huc { int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); +void intel_huc_fini_late(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc, enum intel_huc_authentication_type type); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 90ba1b0b4c9d25..4a3493e8d4333e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -136,6 +136,7 @@ void intel_uc_init_late(struct intel_uc *uc) void intel_uc_driver_late_release(struct intel_uc *uc) { + intel_huc_fini_late(&uc->huc); } /** diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 509f9ccae3a9f0..dbad4d853d3ade 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -222,7 +222,6 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) u8 *buf; struct opregion_header *header; struct vbt v; - const char opregion_signature[16] = OPREGION_SIGNATURE; gvt_dbg_core("init vgpu%d opregion\n", vgpu->id); vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL | @@ -236,8 +235,10 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu) /* emulated opregion with VBT mailbox only */ buf = (u8 *)vgpu_opregion(vgpu)->va; header = (struct opregion_header *)buf; - memcpy(header->signature, opregion_signature, - sizeof(opregion_signature)); + + static_assert(sizeof(header->signature) == sizeof(OPREGION_SIGNATURE) - 1); + memcpy(header->signature, OPREGION_SIGNATURE, sizeof(header->signature)); + header->size = 0x8; header->opregion_ver = 0x02000000; header->mboxes = MBOX_VBT; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ffc346379cc2c2..54538b6f85df5a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -305,6 +305,7 @@ struct drm_i915_private { INTEL_DRAM_DDR5, INTEL_DRAM_LPDDR5, INTEL_DRAM_GDDR, + INTEL_DRAM_GDDR_ECC, } type; u8 num_qgv_points; u8 num_psf_gv_points; diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h index 9aae779c4da318..4969d3de2bac3d 100644 --- a/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.h @@ -23,6 +23,7 @@ int intel_pxp_gsccs_init(struct intel_pxp *pxp); int intel_pxp_gsccs_create_session(struct intel_pxp *pxp, int arb_session_id); void intel_pxp_gsccs_end_arb_fw_session(struct intel_pxp *pxp, u32 arb_session_id); +bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); #else static inline void intel_pxp_gsccs_fini(struct intel_pxp *pxp) @@ -34,8 +35,11 @@ static inline int intel_pxp_gsccs_init(struct intel_pxp *pxp) return 0; } -#endif +static inline bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp) +{ + return false; +} -bool intel_pxp_gsccs_is_ready_for_sessions(struct intel_pxp *pxp); +#endif #endif /*__INTEL_PXP_GSCCS_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index fee76c1d2f4500..889281819c5b13 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -23,7 +23,9 @@ #include +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_regs.h" #include "gt/uc/intel_gsc_fw.h" #include "i915_driver.h" @@ -253,11 +255,27 @@ int i915_mock_selftests(void) int i915_live_selftests(struct pci_dev *pdev) { struct drm_i915_private *i915 = pdev_to_i915(pdev); + struct intel_uncore *uncore = &i915->uncore; int err; + u32 pg_enable; + intel_wakeref_t wakeref; if (!i915_selftest.live) return 0; + /* + * FIXME Disable render powergating, this is temporary wa and should be removed + * after fixing real cause of forcewake timeouts. + */ + with_intel_runtime_pm(uncore->rpm, wakeref) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 00), IP_VER(12, 74))) { + pg_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE); + if (pg_enable & GEN9_RENDER_PG_ENABLE) + intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, + pg_enable & ~GEN9_RENDER_PG_ENABLE); + } + } + __wait_gsc_proxy_completed(i915); __wait_gsc_huc_load_completed(i915); diff --git a/drivers/gpu/drm/i915/soc/intel_dram.c b/drivers/gpu/drm/i915/soc/intel_dram.c index 9e310f4099f423..f60eedb0e92cfe 100644 --- a/drivers/gpu/drm/i915/soc/intel_dram.c +++ b/drivers/gpu/drm/i915/soc/intel_dram.c @@ -687,6 +687,10 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) drm_WARN_ON(&i915->drm, !IS_DGFX(i915)); dram_info->type = INTEL_DRAM_GDDR; break; + case 9: + drm_WARN_ON(&i915->drm, !IS_DGFX(i915)); + dram_info->type = INTEL_DRAM_GDDR_ECC; + break; default: MISSING_CASE(val); return -EINVAL; diff --git a/drivers/gpu/drm/imagination/pvr_fw.c b/drivers/gpu/drm/imagination/pvr_fw.c index 3debc9870a82ae..d09c4c68411627 100644 --- a/drivers/gpu/drm/imagination/pvr_fw.c +++ b/drivers/gpu/drm/imagination/pvr_fw.c @@ -732,7 +732,7 @@ pvr_fw_process(struct pvr_device *pvr_dev) fw_mem->core_data, fw_mem->core_code_alloc_size); if (err) - goto err_free_fw_core_data_obj; + goto err_free_kdata; memcpy(fw_code_ptr, fw_mem->code, fw_mem->code_alloc_size); memcpy(fw_data_ptr, fw_mem->data, fw_mem->data_alloc_size); @@ -742,10 +742,14 @@ pvr_fw_process(struct pvr_device *pvr_dev) memcpy(fw_core_data_ptr, fw_mem->core_data, fw_mem->core_data_alloc_size); /* We're finished with the firmware section memory on the CPU, unmap. */ - if (fw_core_data_ptr) + if (fw_core_data_ptr) { pvr_fw_object_vunmap(fw_mem->core_data_obj); - if (fw_core_code_ptr) + fw_core_data_ptr = NULL; + } + if (fw_core_code_ptr) { pvr_fw_object_vunmap(fw_mem->core_code_obj); + fw_core_code_ptr = NULL; + } pvr_fw_object_vunmap(fw_mem->data_obj); fw_data_ptr = NULL; pvr_fw_object_vunmap(fw_mem->code_obj); @@ -753,7 +757,7 @@ pvr_fw_process(struct pvr_device *pvr_dev) err = pvr_fw_create_fwif_connection_ctl(pvr_dev); if (err) - goto err_free_fw_core_data_obj; + goto err_free_kdata; return 0; @@ -763,13 +767,16 @@ pvr_fw_process(struct pvr_device *pvr_dev) kfree(fw_mem->data); kfree(fw_mem->code); -err_free_fw_core_data_obj: if (fw_core_data_ptr) - pvr_fw_object_unmap_and_destroy(fw_mem->core_data_obj); + pvr_fw_object_vunmap(fw_mem->core_data_obj); + if (fw_mem->core_data_obj) + pvr_fw_object_destroy(fw_mem->core_data_obj); err_free_fw_core_code_obj: if (fw_core_code_ptr) - pvr_fw_object_unmap_and_destroy(fw_mem->core_code_obj); + pvr_fw_object_vunmap(fw_mem->core_code_obj); + if (fw_mem->core_code_obj) + pvr_fw_object_destroy(fw_mem->core_code_obj); err_free_fw_data_obj: if (fw_data_ptr) @@ -836,6 +843,12 @@ pvr_fw_cleanup(struct pvr_device *pvr_dev) struct pvr_fw_mem *fw_mem = &pvr_dev->fw_dev.mem; pvr_fw_fini_fwif_connection_ctl(pvr_dev); + + kfree(fw_mem->core_data); + kfree(fw_mem->core_code); + kfree(fw_mem->data); + kfree(fw_mem->code); + if (fw_mem->core_code_obj) pvr_fw_object_destroy(fw_mem->core_code_obj); if (fw_mem->core_data_obj) diff --git a/drivers/gpu/drm/imagination/pvr_job.c b/drivers/gpu/drm/imagination/pvr_job.c index 1cdb3cfd058d7d..59b334d094fa82 100644 --- a/drivers/gpu/drm/imagination/pvr_job.c +++ b/drivers/gpu/drm/imagination/pvr_job.c @@ -671,6 +671,13 @@ pvr_jobs_link_geom_frag(struct pvr_job_data *job_data, u32 *job_count) geom_job->paired_job = frag_job; frag_job->paired_job = geom_job; + /* The geometry job pvr_job structure is used when the fragment + * job is being prepared by the GPU scheduler. Have the fragment + * job hold a reference on the geometry job to prevent it being + * freed until the fragment job has finished with it. + */ + pvr_job_get(geom_job); + /* Skip the fragment job we just paired to the geometry job. */ i++; } diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index eba69309bb6cf3..5e9bc0992824f3 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -866,6 +866,10 @@ static void pvr_queue_free_job(struct drm_sched_job *sched_job) struct pvr_job *job = container_of(sched_job, struct pvr_job, base); drm_sched_job_cleanup(sched_job); + + if (job->type == DRM_PVR_JOB_TYPE_FRAGMENT && job->paired_job) + pvr_job_put(job->paired_job); + job->paired_job = NULL; pvr_job_put(job); } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 74158b9d65035b..7c0c12dde48859 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -470,7 +470,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) ret = drmm_mode_config_init(drm); if (ret) - goto put_mutex_dev; + return ret; drm->mode_config.min_width = 64; drm->mode_config.min_height = 64; @@ -488,8 +488,11 @@ static int mtk_drm_kms_init(struct drm_device *drm) for (i = 0; i < private->data->mmsys_dev_num; i++) { drm->dev_private = private->all_drm_private[i]; ret = component_bind_all(private->all_drm_private[i]->dev, drm); - if (ret) - goto put_mutex_dev; + if (ret) { + while (--i >= 0) + component_unbind_all(private->all_drm_private[i]->dev, drm); + return ret; + } } /* @@ -582,9 +585,6 @@ static int mtk_drm_kms_init(struct drm_device *drm) err_component_unbind: for (i = 0; i < private->data->mmsys_dev_num; i++) component_unbind_all(private->all_drm_private[i]->dev, drm); -put_mutex_dev: - for (i = 0; i < private->data->mmsys_dev_num; i++) - put_device(private->all_drm_private[i]->mutex_dev); return ret; } @@ -655,8 +655,10 @@ static int mtk_drm_bind(struct device *dev) return 0; drm = drm_dev_alloc(&mtk_drm_driver, dev); - if (IS_ERR(drm)) - return PTR_ERR(drm); + if (IS_ERR(drm)) { + ret = PTR_ERR(drm); + goto err_put_dev; + } private->drm_master = true; drm->dev_private = private; @@ -682,18 +684,31 @@ static int mtk_drm_bind(struct device *dev) drm_dev_put(drm); for (i = 0; i < private->data->mmsys_dev_num; i++) private->all_drm_private[i]->drm = NULL; +err_put_dev: + for (i = 0; i < private->data->mmsys_dev_num; i++) { + /* For device_find_child in mtk_drm_get_all_priv() */ + put_device(private->all_drm_private[i]->dev); + } + put_device(private->mutex_dev); return ret; } static void mtk_drm_unbind(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); + int i; /* for multi mmsys dev, unregister drm dev in mmsys master */ if (private->drm_master) { drm_dev_unregister(private->drm); mtk_drm_kms_deinit(private->drm); drm_dev_put(private->drm); + + for (i = 0; i < private->data->mmsys_dev_num; i++) { + /* For device_find_child in mtk_drm_get_all_priv() */ + put_device(private->all_drm_private[i]->dev); + } + put_device(private->mutex_dev); } private->mtk_drm_bound = false; private->drm_master = false; diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 81d2ee37e7732d..49ff9f1f16d32a 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -169,7 +169,7 @@ static const struct meson_drm_soc_attr meson_drm_soc_attrs[] = { /* S805X/S805Y HDMI PLL won't lock for HDMI PHY freq > 1,65GHz */ { .limits = { - .max_hdmi_phy_freq = 1650000, + .max_hdmi_phy_freq = 1650000000, }, .attrs = (const struct soc_device_attribute []) { { .soc_id = "GXL (S805*)", }, diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h index 3f9345c14f31c1..be4b0e4df6e13e 100644 --- a/drivers/gpu/drm/meson/meson_drv.h +++ b/drivers/gpu/drm/meson/meson_drv.h @@ -37,7 +37,7 @@ struct meson_drm_match_data { }; struct meson_drm_soc_limits { - unsigned int max_hdmi_phy_freq; + unsigned long long max_hdmi_phy_freq; }; struct meson_drm { diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 6d1c9262a2cfb7..2bccda1e52a17d 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -70,12 +70,12 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, { struct meson_drm *priv = encoder_hdmi->priv; int vic = drm_match_cea_mode(mode); - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; - vclk_freq = mode->clock; + vclk_freq = mode->clock * 1000ULL; /* For 420, pixel clock is half unlike venc clock */ if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24) @@ -107,7 +107,8 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "vclk:%d phy=%d venc=%d hdmi=%d enci=%d\n", + dev_dbg(priv->dev, + "phy:%lluHz vclk=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); @@ -122,10 +123,11 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge); struct meson_drm *priv = encoder_hdmi->priv; bool is_hdmi2_sink = display_info->hdmi.scdc.supported; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int hdmi_freq; + unsigned long long clock = mode->clock * 1000ULL; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long hdmi_freq; int vic = drm_match_cea_mode(mode); enum drm_mode_status status; @@ -144,12 +146,12 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (status != MODE_OK) return status; - return meson_vclk_dmt_supported_freq(priv, mode->clock); + return meson_vclk_dmt_supported_freq(priv, clock); /* Check against supported VIC modes */ } else if (!meson_venc_hdmi_supported_vic(vic)) return MODE_BAD; - vclk_freq = mode->clock; + vclk_freq = clock; /* For 420, pixel clock is half unlike venc clock */ if (drm_mode_is_420_only(display_info, mode) || @@ -179,7 +181,8 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri if (mode->flags & DRM_MODE_FLAG_DBLCLK) venc_freq /= 2; - dev_dbg(priv->dev, "%s: vclk:%d phy=%d venc=%d hdmi=%d\n", + dev_dbg(priv->dev, + "%s: vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz\n", __func__, phy_freq, vclk_freq, venc_freq, hdmi_freq); return meson_vclk_vic_supported_freq(priv, phy_freq, vclk_freq); diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 2a942dc6a6dc23..dfe0c28a0f054c 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,7 +110,7 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST(_freq * 1000, 1001) +#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) /* VID PLL Dividers */ enum { @@ -360,11 +360,11 @@ enum { }; struct meson_vclk_params { - unsigned int pll_freq; - unsigned int phy_freq; - unsigned int vclk_freq; - unsigned int venc_freq; - unsigned int pixel_freq; + unsigned long long pll_freq; + unsigned long long phy_freq; + unsigned long long vclk_freq; + unsigned long long venc_freq; + unsigned long long pixel_freq; unsigned int pll_od1; unsigned int pll_od2; unsigned int pll_od3; @@ -372,11 +372,11 @@ struct meson_vclk_params { unsigned int vclk_div; } params[] = { [MESON_VCLK_HDMI_ENCI_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 54000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 54000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -384,11 +384,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_54000] = { - .pll_freq = 4320000, - .phy_freq = 270000, - .vclk_freq = 54000, - .venc_freq = 54000, - .pixel_freq = 27000, + .pll_freq = 4320000000, + .phy_freq = 270000000, + .vclk_freq = 54000000, + .venc_freq = 54000000, + .pixel_freq = 27000000, .pll_od1 = 4, .pll_od2 = 4, .pll_od3 = 1, @@ -396,11 +396,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_DDR_148500] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 74250000, .pll_od1 = 4, .pll_od2 = 1, .pll_od3 = 1, @@ -408,11 +408,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_74250] = { - .pll_freq = 2970000, - .phy_freq = 742500, - .vclk_freq = 74250, - .venc_freq = 74250, - .pixel_freq = 74250, + .pll_freq = 2970000000, + .phy_freq = 742500000, + .vclk_freq = 74250000, + .venc_freq = 74250000, + .pixel_freq = 74250000, .pll_od1 = 2, .pll_od2 = 2, .pll_od3 = 2, @@ -420,11 +420,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_148500] = { - .pll_freq = 2970000, - .phy_freq = 1485000, - .vclk_freq = 148500, - .venc_freq = 148500, - .pixel_freq = 148500, + .pll_freq = 2970000000, + .phy_freq = 1485000000, + .vclk_freq = 148500000, + .venc_freq = 148500000, + .pixel_freq = 148500000, .pll_od1 = 1, .pll_od2 = 2, .pll_od3 = 2, @@ -432,11 +432,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_297000] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 297000, - .vclk_freq = 297000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 297000000, + .vclk_freq = 297000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -444,11 +444,11 @@ struct meson_vclk_params { .vclk_div = 2, }, [MESON_VCLK_HDMI_594000] = { - .pll_freq = 5940000, - .phy_freq = 5940000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 594000, + .pll_freq = 5940000000, + .phy_freq = 5940000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 594000000, .pll_od1 = 1, .pll_od2 = 1, .pll_od3 = 2, @@ -456,11 +456,11 @@ struct meson_vclk_params { .vclk_div = 1, }, [MESON_VCLK_HDMI_594000_YUV420] = { - .pll_freq = 5940000, - .phy_freq = 2970000, - .venc_freq = 594000, - .vclk_freq = 594000, - .pixel_freq = 297000, + .pll_freq = 5940000000, + .phy_freq = 2970000000, + .venc_freq = 594000000, + .vclk_freq = 594000000, + .pixel_freq = 297000000, .pll_od1 = 2, .pll_od2 = 1, .pll_od3 = 1, @@ -617,16 +617,16 @@ static void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m, 3 << 20, pll_od_to_reg(od3) << 20); } -#define XTAL_FREQ 24000 +#define XTAL_FREQ (24 * 1000 * 1000) static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { /* The GXBB PLL has a /2 pre-multiplier */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) - pll_freq /= 2; + pll_freq = DIV_ROUND_DOWN_ULL(pll_freq, 2); - return pll_freq / XTAL_FREQ; + return DIV_ROUND_DOWN_ULL(pll_freq, XTAL_FREQ); } #define HDMI_FRAC_MAX_GXBB 4096 @@ -635,12 +635,13 @@ static unsigned int meson_hdmi_pll_get_m(struct meson_drm *priv, static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, unsigned int m, - unsigned int pll_freq) + unsigned long long pll_freq) { - unsigned int parent_freq = XTAL_FREQ; + unsigned long long parent_freq = XTAL_FREQ; unsigned int frac_max = HDMI_FRAC_MAX_GXL; unsigned int frac_m; unsigned int frac; + u32 remainder; /* The GXBB PLL has a /2 pre-multiplier and a larger FRAC width */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -652,11 +653,11 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, frac_max = HDMI_FRAC_MAX_G12A; /* We can have a perfect match !*/ - if (pll_freq / m == parent_freq && - pll_freq % m == 0) + if (div_u64_rem(pll_freq, m, &remainder) == parent_freq && + remainder == 0) return 0; - frac = div_u64((u64)pll_freq * (u64)frac_max, parent_freq); + frac = mul_u64_u64_div_u64(pll_freq, frac_max, parent_freq); frac_m = m * frac_max; if (frac_m > frac) return frac_max; @@ -666,7 +667,7 @@ static unsigned int meson_hdmi_pll_get_frac(struct meson_drm *priv, } static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, - unsigned int m, + unsigned long long m, unsigned int frac) { if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { @@ -694,7 +695,7 @@ static bool meson_hdmi_pll_validate_params(struct meson_drm *priv, } static bool meson_hdmi_pll_find_params(struct meson_drm *priv, - unsigned int freq, + unsigned long long freq, unsigned int *m, unsigned int *frac, unsigned int *od) @@ -706,7 +707,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, continue; *frac = meson_hdmi_pll_get_frac(priv, *m, freq * *od); - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d\n", freq, *m, *frac, *od); if (meson_hdmi_pll_validate_params(priv, *m, *frac)) @@ -718,7 +719,7 @@ static bool meson_hdmi_pll_find_params(struct meson_drm *priv, /* pll_freq is the frequency after the OD dividers */ enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq) +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq) { unsigned int od, m, frac; @@ -741,7 +742,7 @@ EXPORT_SYMBOL_GPL(meson_vclk_dmt_supported_freq); /* pll_freq is the frequency after the OD dividers */ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, - unsigned int pll_freq) + unsigned long long pll_freq) { unsigned int od, m, frac, od1, od2, od3; @@ -756,7 +757,7 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, od1 = od / od2; } - DRM_DEBUG_DRIVER("PLL params for %dkHz: m=%x frac=%x od=%d/%d/%d\n", + DRM_DEBUG_DRIVER("PLL params for %lluHz: m=%x frac=%x od=%d/%d/%d\n", pll_freq, m, frac, od1, od2, od3); meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); @@ -764,17 +765,48 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, return; } - DRM_ERROR("Fatal, unable to find parameters for PLL freq %d\n", + DRM_ERROR("Fatal, unable to find parameters for PLL freq %lluHz\n", pll_freq); } +static bool meson_vclk_freqs_are_matching_param(unsigned int idx, + unsigned long long phy_freq, + unsigned long long vclk_freq) +{ + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + idx, params[idx].vclk_freq, + FREQ_1000_1001(params[idx].vclk_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", + idx, params[idx].phy_freq, + FREQ_1000_1001(params[idx].phy_freq)); + + /* Match strict frequency */ + if (phy_freq == params[idx].phy_freq && + vclk_freq == params[idx].vclk_freq) + return true; + + /* Match 1000/1001 variant: vclk deviation has to be less than 1kHz + * (drm EDID is defined in 1kHz steps, so everything smaller must be + * rounding error) and the PHY freq deviation has to be less than + * 10kHz (as the TMDS clock is 10 times the pixel clock, so anything + * smaller must be rounding error as well). + */ + if (abs(vclk_freq - FREQ_1000_1001(params[idx].vclk_freq)) < 1000 && + abs(phy_freq - FREQ_1000_1001(params[idx].phy_freq)) < 10000) + return true; + + /* no match */ + return false; +} + enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq) +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq) { int i; - DRM_DEBUG_DRIVER("phy_freq = %d vclk_freq = %d\n", + DRM_DEBUG_DRIVER("phy_freq = %lluHz vclk_freq = %lluHz\n", phy_freq, vclk_freq); /* Check against soc revision/package limits */ @@ -785,19 +817,7 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %d alt = %d\n", - i, params[i].pixel_freq, - FREQ_1000_1001(params[i].pixel_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %d alt = %d\n", - i, params[i].phy_freq, - FREQ_1000_1001(params[i].phy_freq/1000)*1000); - /* Match strict frequency */ - if (phy_freq == params[i].phy_freq && - vclk_freq == params[i].vclk_freq) - return MODE_OK; - /* Match 1000/1001 variant */ - if (phy_freq == (FREQ_1000_1001(params[i].phy_freq/1000)*1000) && - vclk_freq == FREQ_1000_1001(params[i].vclk_freq)) + if (meson_vclk_freqs_are_matching_param(i, phy_freq, vclk_freq)) return MODE_OK; } @@ -805,8 +825,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, } EXPORT_SYMBOL_GPL(meson_vclk_vic_supported_freq); -static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, - unsigned int od1, unsigned int od2, unsigned int od3, +static void meson_vclk_set(struct meson_drm *priv, + unsigned long long pll_base_freq, unsigned int od1, + unsigned int od2, unsigned int od3, unsigned int vid_pll_div, unsigned int vclk_div, unsigned int hdmi_tx_div, unsigned int venc_div, bool hdmi_use_enci, bool vic_alternate_clock) @@ -826,15 +847,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_generic_set(priv, pll_base_freq); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x3d; frac = vic_alternate_clock ? 0xd02 : 0xe00; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0x59 : 0x5a; frac = vic_alternate_clock ? 0xe8f : 0; break; - case 5940000: + case 5940000000: m = 0x7b; frac = vic_alternate_clock ? 0xa05 : 0xc00; break; @@ -844,15 +865,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x281 : 0x300; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x347 : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x102 : 0x200; break; @@ -861,15 +882,15 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, meson_hdmi_pll_set_params(priv, m, frac, od1, od2, od3); } else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { switch (pll_base_freq) { - case 2970000: + case 2970000000: m = 0x7b; frac = vic_alternate_clock ? 0x140b4 : 0x18000; break; - case 4320000: + case 4320000000: m = vic_alternate_clock ? 0xb3 : 0xb4; frac = vic_alternate_clock ? 0x1a3ee : 0; break; - case 5940000: + case 5940000000: m = 0xf7; frac = vic_alternate_clock ? 0x8148 : 0x10000; break; @@ -1025,14 +1046,14 @@ static void meson_vclk_set(struct meson_drm *priv, unsigned int pll_base_freq, } void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci) { bool vic_alternate_clock = false; - unsigned int freq; - unsigned int hdmi_tx_div; - unsigned int venc_div; + unsigned long long freq; + unsigned long long hdmi_tx_div; + unsigned long long venc_div; if (target == MESON_VCLK_TARGET_CVBS) { meson_venci_cvbs_clock_config(priv); @@ -1052,27 +1073,25 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, return; } - hdmi_tx_div = vclk_freq / dac_freq; + hdmi_tx_div = DIV_ROUND_DOWN_ULL(vclk_freq, dac_freq); if (hdmi_tx_div == 0) { - pr_err("Fatal Error, invalid HDMI-TX freq %d\n", + pr_err("Fatal Error, invalid HDMI-TX freq %lluHz\n", dac_freq); return; } - venc_div = vclk_freq / venc_freq; + venc_div = DIV_ROUND_DOWN_ULL(vclk_freq, venc_freq); if (venc_div == 0) { - pr_err("Fatal Error, invalid HDMI venc freq %d\n", + pr_err("Fatal Error, invalid HDMI venc freq %lluHz\n", venc_freq); return; } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { - if ((phy_freq == params[freq].phy_freq || - phy_freq == FREQ_1000_1001(params[freq].phy_freq/1000)*1000) && - (vclk_freq == params[freq].vclk_freq || - vclk_freq == FREQ_1000_1001(params[freq].vclk_freq))) { + if (meson_vclk_freqs_are_matching_param(freq, phy_freq, + vclk_freq)) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else @@ -1098,7 +1117,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } if (!params[freq].pixel_freq) { - pr_err("Fatal Error, invalid HDMI vclk freq %d\n", vclk_freq); + pr_err("Fatal Error, invalid HDMI vclk freq %lluHz\n", + vclk_freq); return; } diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h index 60617aaf18dd1c..7ac55744e57494 100644 --- a/drivers/gpu/drm/meson/meson_vclk.h +++ b/drivers/gpu/drm/meson/meson_vclk.h @@ -20,17 +20,18 @@ enum { }; /* 27MHz is the CVBS Pixel Clock */ -#define MESON_VCLK_CVBS 27000 +#define MESON_VCLK_CVBS (27 * 1000 * 1000) enum drm_mode_status -meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned int freq); +meson_vclk_dmt_supported_freq(struct meson_drm *priv, unsigned long long freq); enum drm_mode_status -meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned int phy_freq, - unsigned int vclk_freq); +meson_vclk_vic_supported_freq(struct meson_drm *priv, + unsigned long long phy_freq, + unsigned long long vclk_freq); void meson_vclk_setup(struct meson_drm *priv, unsigned int target, - unsigned int phy_freq, unsigned int vclk_freq, - unsigned int venc_freq, unsigned int dac_freq, + unsigned long long phy_freq, unsigned long long vclk_freq, + unsigned long long venc_freq, unsigned long long dac_freq, bool hdmi_use_enci); #endif /* __MESON_VCLK_H */ diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index fb71658c3117b2..6067d08aeee34b 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -223,7 +223,7 @@ void mgag200_set_mode_regs(struct mga_device *mdev, const struct drm_display_mod vsyncstr = mode->crtc_vsync_start - 1; vsyncend = mode->crtc_vsync_end - 1; vtotal = mode->crtc_vtotal - 2; - vblkstr = mode->crtc_vblank_start; + vblkstr = mode->crtc_vblank_start - 1; vblkend = vtotal + 1; linecomp = vdispend; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 06465bc2d0b4b1..90991ba5a4ae10 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -242,10 +242,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -377,10 +377,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -655,7 +655,6 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) if (adreno_is_7c3(gpu)) { gpu->ubwc_config.highest_bank_bit = 14; gpu->ubwc_config.amsbc = 1; - gpu->ubwc_config.rgb565_predicator = 1; gpu->ubwc_config.uavflagprd_inv = 2; gpu->ubwc_config.macrotile_mode = 1; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h index 1f32807bb5e5d4..39027a21c6feec 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_14_msm8937.h @@ -100,14 +100,12 @@ static const struct dpu_pingpong_cfg msm8937_pp[] = { { .name = "pingpong_0", .id = PINGPONG_0, .base = 0x70000, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12), }, { .name = "pingpong_1", .id = PINGPONG_1, .base = 0x70800, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13), @@ -132,7 +130,6 @@ static const struct dpu_intf_cfg msm8937_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x268, @@ -141,7 +138,6 @@ static const struct dpu_intf_cfg msm8937_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h index 42131959ff2202..8d1b43ea1663cf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_15_msm8917.h @@ -93,7 +93,6 @@ static const struct dpu_pingpong_cfg msm8917_pp[] = { { .name = "pingpong_0", .id = PINGPONG_0, .base = 0x70000, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12), @@ -118,7 +117,6 @@ static const struct dpu_intf_cfg msm8917_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h index 2b4723a5c67606..16c12499b24bb4 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_16_msm8953.h @@ -100,14 +100,12 @@ static const struct dpu_pingpong_cfg msm8953_pp[] = { { .name = "pingpong_0", .id = PINGPONG_0, .base = 0x70000, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12), }, { .name = "pingpong_1", .id = PINGPONG_1, .base = 0x70800, .len = 0xd4, - .features = PINGPONG_MSM8996_MASK, .sblk = &msm8996_pp_sblk, .intr_done = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 9), .intr_rdptr = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13), @@ -131,7 +129,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x268, @@ -140,7 +137,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x268, @@ -149,7 +145,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = { .prog_fetch_lines_worst_case = 14, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h index 5cf19de71f0608..ae18a354e5d2a3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_1_7_msm8996.h @@ -241,7 +241,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x268, @@ -250,7 +249,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x268, @@ -259,7 +257,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, { .name = "intf_3", .id = INTF_3, .base = 0x6b800, .len = 0x268, @@ -267,7 +264,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = { .prog_fetch_lines_worst_case = 25, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h index 4f2f68b07f203a..bb89da0a481dec 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_2_sdm660.h @@ -202,7 +202,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x280, @@ -211,7 +210,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, { .name = "intf_2", .id = INTF_2, .base = 0x6b000, .len = 0x280, @@ -220,7 +218,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h index c70bef025ac419..7caf876ca3e30c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_3_3_sdm630.h @@ -147,7 +147,6 @@ static const struct dpu_intf_cfg sdm630_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25), - .intr_tear_rd_ptr = -1, }, { .name = "intf_1", .id = INTF_1, .base = 0x6a800, .len = 0x280, @@ -156,7 +155,6 @@ static const struct dpu_intf_cfg sdm630_intf[] = { .prog_fetch_lines_worst_case = 21, .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26), .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27), - .intr_tear_rd_ptr = -1, }, }; diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 979527d98fbcb1..8e23dbfeef3543 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -76,7 +76,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -84,7 +84,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -92,7 +92,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -100,7 +100,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -108,7 +108,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -116,7 +116,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -124,7 +124,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -132,7 +132,7 @@ static const struct dpu_sspp_cfg sm8150_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index d76b8992a6c18c..e736eb73a7e615 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -75,7 +75,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { { .name = "sspp_0", .id = SSPP_VIG0, .base = 0x4000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 0, .type = SSPP_TYPE_VIG, @@ -83,7 +83,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_1", .id = SSPP_VIG1, .base = 0x6000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 4, .type = SSPP_TYPE_VIG, @@ -91,7 +91,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_2", .id = SSPP_VIG2, .base = 0x8000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 8, .type = SSPP_TYPE_VIG, @@ -99,7 +99,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_3", .id = SSPP_VIG3, .base = 0xa000, .len = 0x1f0, - .features = VIG_SDM845_MASK, + .features = VIG_SDM845_MASK_SDMA, .sblk = &dpu_vig_sblk_qseed3_1_4, .xin_id = 12, .type = SSPP_TYPE_VIG, @@ -107,7 +107,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_8", .id = SSPP_DMA0, .base = 0x24000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 1, .type = SSPP_TYPE_DMA, @@ -115,7 +115,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_9", .id = SSPP_DMA1, .base = 0x26000, .len = 0x1f0, - .features = DMA_SDM845_MASK, + .features = DMA_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 5, .type = SSPP_TYPE_DMA, @@ -123,7 +123,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_10", .id = SSPP_DMA2, .base = 0x28000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 9, .type = SSPP_TYPE_DMA, @@ -131,7 +131,7 @@ static const struct dpu_sspp_cfg sc8180x_sspp[] = { }, { .name = "sspp_11", .id = SSPP_DMA3, .base = 0x2a000, .len = 0x1f0, - .features = DMA_CURSOR_SDM845_MASK, + .features = DMA_CURSOR_SDM845_MASK_SDMA, .sblk = &dpu_dma_sblk, .xin_id = 13, .type = SSPP_TYPE_DMA, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 8610bbf2b87ca2..862e9e6bf0a552 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1666,7 +1666,7 @@ static void _dpu_encoder_trigger_flush(struct drm_encoder *drm_enc, */ static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys) { - struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(phys->parent); + struct dpu_encoder_virt *dpu_enc; if (!phys) { DPU_ERROR("invalid argument(s)\n"); @@ -1678,6 +1678,8 @@ static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys) return; } + dpu_enc = to_dpu_encoder_virt(phys->parent); + if (phys->parent->encoder_type == DRM_MODE_ENCODER_VIRTUAL && dpu_enc->cwb_mask) { DPU_DEBUG("encoder %d CWB enabled, skipping\n", DRMID(phys->parent)); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index af3e541f60c303..e03d6091f73640 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -729,12 +729,40 @@ static int dpu_plane_check_inline_rotation(struct dpu_plane *pdpu, static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe, struct dpu_sw_pipe_cfg *pipe_cfg, - const struct msm_format *fmt, - const struct drm_display_mode *mode) + const struct drm_display_mode *mode, + struct drm_plane_state *new_plane_state) { uint32_t min_src_size; struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base); int ret; + const struct msm_format *fmt; + uint32_t supported_rotations; + const struct dpu_sspp_cfg *pipe_hw_caps; + const struct dpu_sspp_sub_blks *sblk; + + pipe_hw_caps = pipe->sspp->cap; + sblk = pipe->sspp->cap->sblk; + + /* + * We already have verified scaling against platform limitations. + * Now check if the SSPP supports scaling at all. + */ + if (!sblk->scaler_blk.len && + ((drm_rect_width(&new_plane_state->src) >> 16 != + drm_rect_width(&new_plane_state->dst)) || + (drm_rect_height(&new_plane_state->src) >> 16 != + drm_rect_height(&new_plane_state->dst)))) + return -ERANGE; + + fmt = msm_framebuffer_format(new_plane_state->fb); + + supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; + + if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) + supported_rotations |= DRM_MODE_ROTATE_90; + + pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, + supported_rotations); min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; @@ -923,47 +951,20 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane, struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state); struct dpu_sw_pipe *pipe = &pstate->pipe; struct dpu_sw_pipe *r_pipe = &pstate->r_pipe; - const struct msm_format *fmt; struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg; struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg; - uint32_t supported_rotations; - const struct dpu_sspp_cfg *pipe_hw_caps; - const struct dpu_sspp_sub_blks *sblk; int ret = 0; - pipe_hw_caps = pipe->sspp->cap; - sblk = pipe->sspp->cap->sblk; - - /* - * We already have verified scaling against platform limitations. - * Now check if the SSPP supports scaling at all. - */ - if (!sblk->scaler_blk.len && - ((drm_rect_width(&new_plane_state->src) >> 16 != - drm_rect_width(&new_plane_state->dst)) || - (drm_rect_height(&new_plane_state->src) >> 16 != - drm_rect_height(&new_plane_state->dst)))) - return -ERANGE; - - fmt = msm_framebuffer_format(new_plane_state->fb); - - supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0; - - if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION)) - supported_rotations |= DRM_MODE_ROTATE_90; - - pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation, - supported_rotations); - r_pipe_cfg->rotation = pipe_cfg->rotation; - - ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, - &crtc_state->adjusted_mode); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, + &crtc_state->adjusted_mode, + new_plane_state); if (ret) return ret; if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) { - ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt, - &crtc_state->adjusted_mode); + ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, + &crtc_state->adjusted_mode, + new_plane_state); if (ret) return ret; } @@ -1059,6 +1060,9 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane, struct drm_crtc_state *crtc_state; int ret; + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + if (plane_state->crtc) crtc_state = drm_atomic_get_new_crtc_state(state, plane_state->crtc); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index bbc47d86ae9e67..ab8c1f19dcb42d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -367,17 +367,21 @@ static int msm_dp_display_send_hpd_notification(struct msm_dp_display_private *d return 0; } -static void msm_dp_display_lttpr_init(struct msm_dp_display_private *dp) +static int msm_dp_display_lttpr_init(struct msm_dp_display_private *dp, u8 *dpcd) { - u8 lttpr_caps[DP_LTTPR_COMMON_CAP_SIZE]; - int rc; + int rc, lttpr_count; - if (drm_dp_read_lttpr_common_caps(dp->aux, dp->panel->dpcd, lttpr_caps)) - return; + if (drm_dp_read_lttpr_common_caps(dp->aux, dpcd, dp->link->lttpr_common_caps)) + return 0; - rc = drm_dp_lttpr_init(dp->aux, drm_dp_lttpr_count(lttpr_caps)); - if (rc) + lttpr_count = drm_dp_lttpr_count(dp->link->lttpr_common_caps); + rc = drm_dp_lttpr_init(dp->aux, lttpr_count); + if (rc) { DRM_ERROR("failed to set LTTPRs transparency mode, rc=%d\n", rc); + return 0; + } + + return lttpr_count; } static int msm_dp_display_process_hpd_high(struct msm_dp_display_private *dp) @@ -385,12 +389,17 @@ static int msm_dp_display_process_hpd_high(struct msm_dp_display_private *dp) struct drm_connector *connector = dp->msm_dp_display.connector; const struct drm_display_info *info = &connector->display_info; int rc = 0; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; - rc = msm_dp_panel_read_sink_caps(dp->panel, connector); + rc = drm_dp_read_dpcd_caps(dp->aux, dpcd); if (rc) goto end; - msm_dp_display_lttpr_init(dp); + dp->link->lttpr_count = msm_dp_display_lttpr_init(dp, dpcd); + + rc = msm_dp_panel_read_sink_caps(dp->panel, connector); + if (rc) + goto end; msm_dp_link_process_request(dp->link); diff --git a/drivers/gpu/drm/msm/dp/dp_link.h b/drivers/gpu/drm/msm/dp/dp_link.h index 8db5d5698a97cf..ba47c6d19fbfac 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.h +++ b/drivers/gpu/drm/msm/dp/dp_link.h @@ -7,6 +7,7 @@ #define _DP_LINK_H_ #include "dp_aux.h" +#include #define DS_PORT_STATUS_CHANGED 0x200 #define DP_TEST_BIT_DEPTH_UNKNOWN 0xFFFFFFFF @@ -60,6 +61,9 @@ struct msm_dp_link_phy_params { }; struct msm_dp_link { + u8 lttpr_common_caps[DP_LTTPR_COMMON_CAP_SIZE]; + int lttpr_count; + u32 sink_request; u32 test_response; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 92415bf8aa1665..4e8ab75c771b1e 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -47,7 +47,7 @@ static void msm_dp_panel_read_psr_cap(struct msm_dp_panel_private *panel) static int msm_dp_panel_read_dpcd(struct msm_dp_panel *msm_dp_panel) { - int rc; + int rc, max_lttpr_lanes, max_lttpr_rate; struct msm_dp_panel_private *panel; struct msm_dp_link_info *link_info; u8 *dpcd, major, minor; @@ -75,6 +75,16 @@ static int msm_dp_panel_read_dpcd(struct msm_dp_panel *msm_dp_panel) if (link_info->rate > msm_dp_panel->max_dp_link_rate) link_info->rate = msm_dp_panel->max_dp_link_rate; + /* Limit data lanes from LTTPR capabilities, if any */ + max_lttpr_lanes = drm_dp_lttpr_max_lane_count(panel->link->lttpr_common_caps); + if (max_lttpr_lanes && max_lttpr_lanes < link_info->num_lanes) + link_info->num_lanes = max_lttpr_lanes; + + /* Limit link rate from LTTPR capabilities, if any */ + max_lttpr_rate = drm_dp_lttpr_max_link_rate(panel->link->lttpr_common_caps); + if (max_lttpr_rate && max_lttpr_rate < link_info->rate) + link_info->rate = max_lttpr_rate; + drm_dbg_dp(panel->drm_dev, "version: %d.%d\n", major, minor); drm_dbg_dp(panel->drm_dev, "link_rate=%d\n", link_info->rate); drm_dbg_dp(panel->drm_dev, "lane_count=%d\n", link_info->num_lanes); diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 55a35182858cca..5a6ae9fc319451 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2259,5 +2259,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index db961eade2257f..2016c1e7242fe3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -144,6 +144,9 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) nouveau_bo_del_io_reserve_lru(bo); nv10_bo_put_tile_region(dev, nvbo->tile, NULL); + if (bo->base.import_attach) + drm_prime_gem_destroy(&bo->base, bo->sg); + /* * If nouveau_bo_new() allocated this buffer, the GEM object was never * initialized, so don't attempt to release it. diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 7cc84472cecec2..edddfc036c6d1e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -90,7 +90,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) while (!list_empty(&fctx->pending)) { fence = list_entry(fctx->pending.next, typeof(*fence), head); - if (error) + if (error && !dma_fence_is_signaled_locked(&fence->base)) dma_fence_set_error(&fence->base, error); if (nouveau_fence_signal(fence)) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9ae2cee1c7c580..67e3c99de73ae6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -87,9 +87,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem) return; } - if (gem->import_attach) - drm_prime_gem_destroy(gem, nvbo->bo.sg); - ttm_bo_put(&nvbo->bo); pm_runtime_mark_last_busy(dev); diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 7d68a8acfe2ea4..eb0f8373258c34 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -129,11 +129,11 @@ static int jadard_unprepare(struct drm_panel *panel) { struct jadard *jadard = panel_to_jadard(panel); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(120); if (jadard->desc->reset_before_power_off_vcioo) { - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); usleep_range(1000, 2000); } diff --git a/drivers/gpu/drm/panel/panel-samsung-sofef00.c b/drivers/gpu/drm/panel/panel-samsung-sofef00.c index 04ce925b3d9dbd..49cfa84b34f0ca 100644 --- a/drivers/gpu/drm/panel/panel-samsung-sofef00.c +++ b/drivers/gpu/drm/panel/panel-samsung-sofef00.c @@ -22,7 +22,6 @@ struct sofef00_panel { struct mipi_dsi_device *dsi; struct regulator *supply; struct gpio_desc *reset_gpio; - const struct drm_display_mode *mode; }; static inline @@ -159,26 +158,11 @@ static const struct drm_display_mode enchilada_panel_mode = { .height_mm = 145, }; -static const struct drm_display_mode fajita_panel_mode = { - .clock = (1080 + 72 + 16 + 36) * (2340 + 32 + 4 + 18) * 60 / 1000, - .hdisplay = 1080, - .hsync_start = 1080 + 72, - .hsync_end = 1080 + 72 + 16, - .htotal = 1080 + 72 + 16 + 36, - .vdisplay = 2340, - .vsync_start = 2340 + 32, - .vsync_end = 2340 + 32 + 4, - .vtotal = 2340 + 32 + 4 + 18, - .width_mm = 68, - .height_mm = 145, -}; - static int sofef00_panel_get_modes(struct drm_panel *panel, struct drm_connector *connector) { struct drm_display_mode *mode; - struct sofef00_panel *ctx = to_sofef00_panel(panel); - mode = drm_mode_duplicate(connector->dev, ctx->mode); + mode = drm_mode_duplicate(connector->dev, &enchilada_panel_mode); if (!mode) return -ENOMEM; @@ -239,13 +223,6 @@ static int sofef00_panel_probe(struct mipi_dsi_device *dsi) if (!ctx) return -ENOMEM; - ctx->mode = of_device_get_match_data(dev); - - if (!ctx->mode) { - dev_err(dev, "Missing device mode\n"); - return -ENODEV; - } - ctx->supply = devm_regulator_get(dev, "vddio"); if (IS_ERR(ctx->supply)) return dev_err_probe(dev, PTR_ERR(ctx->supply), @@ -295,14 +272,7 @@ static void sofef00_panel_remove(struct mipi_dsi_device *dsi) } static const struct of_device_id sofef00_panel_of_match[] = { - { // OnePlus 6 / enchilada - .compatible = "samsung,sofef00", - .data = &enchilada_panel_mode, - }, - { // OnePlus 6T / fajita - .compatible = "samsung,s6e3fc2x01", - .data = &fajita_panel_mode, - }, + { .compatible = "samsung,sofef00" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sofef00_panel_of_match); diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 232b03c1a259eb..3aaac96c0bfbf5 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1027,27 +1027,28 @@ static const struct panel_desc auo_g070vvn01 = { }, }; -static const struct drm_display_mode auo_g101evn010_mode = { - .clock = 68930, - .hdisplay = 1280, - .hsync_start = 1280 + 82, - .hsync_end = 1280 + 82 + 2, - .htotal = 1280 + 82 + 2 + 84, - .vdisplay = 800, - .vsync_start = 800 + 8, - .vsync_end = 800 + 8 + 2, - .vtotal = 800 + 8 + 2 + 6, +static const struct display_timing auo_g101evn010_timing = { + .pixelclock = { 64000000, 68930000, 85000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 8, 64, 256 }, + .hback_porch = { 8, 64, 256 }, + .hsync_len = { 40, 168, 767 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 4, 8, 100 }, + .vback_porch = { 4, 8, 100 }, + .vsync_len = { 8, 16, 223 }, }; static const struct panel_desc auo_g101evn010 = { - .modes = &auo_g101evn010_mode, - .num_modes = 1, + .timings = &auo_g101evn010_timing, + .num_timings = 1, .bpc = 6, .size = { .width = 216, .height = 135, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; @@ -2198,13 +2199,14 @@ static const struct display_timing evervision_vgg644804_timing = { static const struct panel_desc evervision_vgg644804 = { .timings = &evervision_vgg644804_timing, .num_timings = 1, - .bpc = 8, + .bpc = 6, .size = { .width = 115, .height = 86, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, - .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, }; static const struct display_timing evervision_vgg804821_timing = { diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index a9da1d1eeb7071..1e8811c6716dfa 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -171,10 +171,6 @@ int panthor_device_init(struct panthor_device *ptdev) struct page *p; int ret; - ret = panthor_gpu_coherency_init(ptdev); - if (ret) - return ret; - init_completion(&ptdev->unplug.done); ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); if (ret) @@ -247,6 +243,10 @@ int panthor_device_init(struct panthor_device *ptdev) if (ret) goto err_rpm_put; + ret = panthor_gpu_coherency_init(ptdev); + if (ret) + goto err_unplug_gpu; + ret = panthor_mmu_init(ptdev); if (ret) goto err_unplug_gpu; diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 12a02e28f50fd8..7cca97d298ea10 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -781,6 +781,7 @@ int panthor_vm_active(struct panthor_vm *vm) if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); + ptdev->mmu->irq.mask |= panthor_mmu_as_fault_mask(ptdev, as); gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); } diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h index b7b3b3add16627..a7a323dc5cf92a 100644 --- a/drivers/gpu/drm/panthor/panthor_regs.h +++ b/drivers/gpu/drm/panthor/panthor_regs.h @@ -133,8 +133,8 @@ #define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) #define GPU_COHERENCY_PROTOCOL 0x304 -#define GPU_COHERENCY_ACE 0 -#define GPU_COHERENCY_ACE_LITE 1 +#define GPU_COHERENCY_ACE_LITE 0 +#define GPU_COHERENCY_ACE 1 #define GPU_COHERENCY_NONE 31 #define MCU_CONTROL 0x700 diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c index 70d8ad065bfa1d..4c8fe83dd6101b 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c @@ -705,7 +705,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) ret = of_parse_phandle_with_fixed_args(np, vsps_prop_name, cells, i, &args); if (ret < 0) - goto error; + goto done; /* * Add the VSP to the list or update the corresponding existing @@ -743,13 +743,11 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) vsp->dev = rcdu; ret = rcar_du_vsp_init(vsp, vsps[i].np, vsps[i].crtcs_mask); - if (ret < 0) - goto error; + if (ret) + goto done; } - return 0; - -error: +done: for (i = 0; i < ARRAY_SIZE(vsps); ++i) of_node_put(vsps[i].np); diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c index 3d1dddb346035c..7d531b6f4c098c 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c @@ -94,6 +94,7 @@ struct rockchip_hdmi_qp { struct gpio_desc *enable_gpio; struct delayed_work hpd_work; int port_id; + const struct rockchip_hdmi_qp_ctrl_ops *ctrl_ops; }; struct rockchip_hdmi_qp_ctrl_ops { @@ -461,6 +462,7 @@ static int dw_hdmi_qp_rockchip_bind(struct device *dev, struct device *master, return -ENODEV; } + hdmi->ctrl_ops = cfg->ctrl_ops; hdmi->dev = &pdev->dev; hdmi->port_id = -ENODEV; @@ -600,27 +602,8 @@ static void dw_hdmi_qp_rockchip_remove(struct platform_device *pdev) static int __maybe_unused dw_hdmi_qp_rockchip_resume(struct device *dev) { struct rockchip_hdmi_qp *hdmi = dev_get_drvdata(dev); - u32 val; - val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) | - HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) | - HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) | - HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK); - regmap_write(hdmi->vo_regmap, - hdmi->port_id ? RK3588_GRF_VO1_CON6 : RK3588_GRF_VO1_CON3, - val); - - val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK, - RK3588_SET_HPD_PATH_MASK); - regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val); - - if (hdmi->port_id) - val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL, - RK3588_HDMI1_GRANT_SEL); - else - val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, - RK3588_HDMI0_GRANT_SEL); - regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON9, val); + hdmi->ctrl_ops->io_init(hdmi); dw_hdmi_qp_resume(dev, hdmi->hdmi); diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index 14958d6b3d2e70..0a2840cbe8e22d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -1754,9 +1754,9 @@ static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags); break; case ROCKCHIP_VOP2_EP_DP1: - die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX; - die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 | - FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id); + die &= ~RK3588_SYS_DSP_INFACE_EN_DP1_MUX; + die |= RK3588_SYS_DSP_INFACE_EN_DP1 | + FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP1_MUX, vp->id); dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL; dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags); break; diff --git a/drivers/gpu/drm/scheduler/.kunitconfig b/drivers/gpu/drm/scheduler/.kunitconfig new file mode 100644 index 00000000000000..cece53609fcf66 --- /dev/null +++ b/drivers/gpu/drm/scheduler/.kunitconfig @@ -0,0 +1,12 @@ +CONFIG_KUNIT=y +CONFIG_DRM=y +CONFIG_DRM_SCHED_KUNIT_TEST=y +CONFIG_EXPERT=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_LOCK_DEBUGGING_SUPPORT=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_LIST=y diff --git a/drivers/gpu/drm/scheduler/Makefile b/drivers/gpu/drm/scheduler/Makefile index 53863621829f14..74e75eff6df50c 100644 --- a/drivers/gpu/drm/scheduler/Makefile +++ b/drivers/gpu/drm/scheduler/Makefile @@ -20,6 +20,8 @@ # OTHER DEALINGS IN THE SOFTWARE. # # -gpu-sched-y := sched_main.o sched_fence.o sched_entity.o +gpu-sched-y := sched_main.o sched_fence.o sched_entity.o sched_rq.o obj-$(CONFIG_DRM_SCHED) += gpu-sched.o + +obj-$(CONFIG_DRM_SCHED_KUNIT_TEST) += tests/ diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index bd39db7bb24087..0b871c611456a7 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -32,6 +32,29 @@ #include "gpu_scheduler_trace.h" + +void drm_sched_entity_stats_release(struct kref *kref) +{ + struct drm_sched_entity_stats *stats = + container_of(kref, typeof(*stats), kref); + + kfree(stats); +} + +static struct drm_sched_entity_stats *drm_sched_entity_stats_alloc(void) +{ + struct drm_sched_entity_stats *stats; + + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) + return NULL; + + kref_init(&stats->kref); + spin_lock_init(&stats->lock); + + return stats; +} + /** * drm_sched_entity_init - Init a context entity used by scheduler when * submit to HW ring. @@ -65,6 +88,11 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, return -EINVAL; memset(entity, 0, sizeof(struct drm_sched_entity)); + + entity->stats = drm_sched_entity_stats_alloc(); + if (!entity->stats) + return -ENOMEM; + INIT_LIST_HEAD(&entity->list); entity->rq = NULL; entity->guilty = guilty; @@ -76,29 +104,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, * is initialized itself. */ entity->sched_list = num_sched_list > 1 ? sched_list : NULL; + if (num_sched_list) { + entity->sched_list = num_sched_list > 1 ? sched_list : NULL; + entity->rq = &sched_list[0]->rq; + } RCU_INIT_POINTER(entity->last_scheduled, NULL); RB_CLEAR_NODE(&entity->rb_tree_node); - - if (num_sched_list && !sched_list[0]->sched_rq) { - /* Since every entry covered by num_sched_list - * should be non-NULL and therefore we warn drivers - * not to do this and to fix their DRM calling order. - */ - pr_warn("%s: called with uninitialized scheduler\n", __func__); - } else if (num_sched_list) { - /* The "priority" of an entity cannot exceed the number of run-queues of a - * scheduler. Protect against num_rqs being 0, by converting to signed. Choose - * the lowest priority available. - */ - if (entity->priority >= sched_list[0]->num_rqs) { - dev_err(sched_list[0]->dev, "entity has out-of-bounds priority: %u. num_rqs: %u\n", - entity->priority, sched_list[0]->num_rqs); - entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1, - (s32) DRM_SCHED_PRIORITY_KERNEL); - } - entity->rq = sched_list[0]->sched_rq[entity->priority]; - } - init_completion(&entity->entity_idle); /* We start in an idle state. */ @@ -176,6 +187,7 @@ static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); + drm_sched_fence_scheduled(job->s_fence, NULL); drm_sched_fence_finished(job->s_fence, -ESRCH); WARN_ON(job->s_fence->parent); job->sched->ops->free_job(job); @@ -283,7 +295,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) if (!entity->rq) return 0; - sched = entity->rq->sched; + sched = container_of(entity->rq, typeof(*sched), rq); /** * The client will not queue more IBs during this fini, consume existing * queued IBs or discard them on SIGKILL @@ -337,6 +349,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity) dma_fence_put(rcu_dereference_check(entity->last_scheduled, true)); RCU_INIT_POINTER(entity->last_scheduled, NULL); + drm_sched_entity_stats_put(entity->stats); } EXPORT_SYMBOL(drm_sched_entity_fini); @@ -374,9 +387,11 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, { struct drm_sched_entity *entity = container_of(cb, struct drm_sched_entity, cb); + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup(entity->rq->sched); + drm_sched_wakeup(sched); } /** @@ -402,7 +417,8 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); */ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) { - struct drm_gpu_scheduler *sched = entity->rq->sched; + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); struct dma_fence *fence = entity->dependency; struct drm_sched_fence *s_fence; @@ -500,26 +516,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) spsc_queue_pop(&entity->job_queue); - /* - * Update the entity's location in the min heap according to - * the timestamp of the next job, if any. - */ - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) { - struct drm_sched_job *next; - - next = drm_sched_entity_queue_peek(entity); - if (next) { - struct drm_sched_rq *rq; - - spin_lock(&entity->lock); - rq = entity->rq; - spin_lock(&rq->lock); - drm_sched_rq_update_fifo_locked(entity, rq, - next->submit_ts); - spin_unlock(&rq->lock); - spin_unlock(&entity->lock); - } - } + drm_sched_rq_pop_entity(entity); /* Jobs and entities might have different lifecycles. Since we're * removing the job from the entities queue, set the jobs entity pointer @@ -560,7 +557,7 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) spin_lock(&entity->lock); sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); - rq = sched ? sched->sched_rq[entity->priority] : NULL; + rq = sched ? &sched->rq : NULL; if (rq != entity->rq) { drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; @@ -583,48 +580,25 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) void drm_sched_entity_push_job(struct drm_sched_job *sched_job) { struct drm_sched_entity *entity = sched_job->entity; + struct drm_gpu_scheduler *sched = + container_of(entity->rq, typeof(*sched), rq); bool first; - ktime_t submit_ts; trace_drm_sched_job(sched_job, entity); - atomic_inc(entity->rq->sched->score); + atomic_inc(sched->score); WRITE_ONCE(entity->last_user, current->group_leader); /* * After the sched_job is pushed into the entity queue, it may be * completed and freed up at any time. We can no longer access it. - * Make sure to set the submit_ts first, to avoid a race. */ - sched_job->submit_ts = submit_ts = ktime_get(); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); /* first job wakes up scheduler */ if (first) { - struct drm_gpu_scheduler *sched; - struct drm_sched_rq *rq; - - /* Add the entity to the run queue */ - spin_lock(&entity->lock); - if (entity->stopped) { - spin_unlock(&entity->lock); - - DRM_ERROR("Trying to push to a killed entity\n"); - return; - } - - rq = entity->rq; - sched = rq->sched; - - spin_lock(&rq->lock); - drm_sched_rq_add_entity(rq, entity); - - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); - - spin_unlock(&rq->lock); - spin_unlock(&entity->lock); - - drm_sched_wakeup(sched); + sched = drm_sched_rq_add_entity(entity); + if (sched) + drm_sched_wakeup(sched); } } EXPORT_SYMBOL(drm_sched_entity_push_job); diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index e971528504a53c..bb48e690862d24 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -225,7 +225,7 @@ void drm_sched_fence_init(struct drm_sched_fence *fence, { unsigned seq; - fence->sched = entity->rq->sched; + fence->sched = container_of(entity->rq, typeof(*fence->sched), rq); seq = atomic_inc_return(&entity->fence_seq); dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled, &fence->lock, entity->fence_context, seq); diff --git a/drivers/gpu/drm/scheduler/sched_internal.h b/drivers/gpu/drm/scheduler/sched_internal.h index 599cf6e1bb7400..8d4bfe499b8445 100644 --- a/drivers/gpu/drm/scheduler/sched_internal.h +++ b/drivers/gpu/drm/scheduler/sched_internal.h @@ -3,22 +3,28 @@ #ifndef _DRM_GPU_SCHEDULER_INTERNAL_H_ #define _DRM_GPU_SCHEDULER_INTERNAL_H_ - -/* Used to choose between FIFO and RR job-scheduling */ -extern int drm_sched_policy; - -#define DRM_SCHED_POLICY_RR 0 -#define DRM_SCHED_POLICY_FIFO 1 +#include +#include +#include + +struct drm_sched_entity_stats { + struct kref kref; + spinlock_t lock; + ktime_t runtime; + ktime_t prev_runtime; + u64 vruntime; +}; void drm_sched_wakeup(struct drm_gpu_scheduler *sched); -void drm_sched_rq_add_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity); +void drm_sched_rq_init(struct drm_gpu_scheduler *sched); +struct drm_sched_entity * +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched); +struct drm_gpu_scheduler * +drm_sched_rq_add_entity(struct drm_sched_entity *entity); void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity); - -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, - struct drm_sched_rq *rq, ktime_t ts); +void drm_sched_rq_pop_entity(struct drm_sched_entity *entity); void drm_sched_entity_select_rq(struct drm_sched_entity *entity); struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity); @@ -88,4 +94,98 @@ drm_sched_entity_is_ready(struct drm_sched_entity *entity) return true; } +void drm_sched_entity_stats_release(struct kref *kref); + +static inline struct drm_sched_entity_stats * +drm_sched_entity_stats_get(struct drm_sched_entity_stats *stats) +{ + kref_get(&stats->kref); + + return stats; +} + +static inline void +drm_sched_entity_stats_put(struct drm_sched_entity_stats *stats) +{ + kref_put(&stats->kref, drm_sched_entity_stats_release); +} + +static inline void +drm_sched_entity_stats_job_add_gpu_time(struct drm_sched_job *job) +{ + struct drm_sched_entity_stats *stats = job->entity_stats; + struct drm_sched_fence *s_fence = job->s_fence; + ktime_t start, end; + + start = dma_fence_timestamp(&s_fence->scheduled); + end = dma_fence_timestamp(&s_fence->finished); + + spin_lock(&stats->lock); + stats->runtime = ktime_add(stats->runtime, ktime_sub(end, start)); + spin_unlock(&stats->lock); +} + +static inline void +drm_sched_entity_save_vruntime(struct drm_sched_entity *entity, + ktime_t min_vruntime) +{ + struct drm_sched_entity_stats *stats = entity->stats; + ktime_t vruntime; + + spin_lock(&stats->lock); + vruntime = stats->vruntime; + if (min_vruntime && vruntime > min_vruntime) + vruntime = ktime_sub(vruntime, min_vruntime); + else + vruntime = 0; + stats->vruntime = vruntime; + spin_unlock(&stats->lock); +} + +static inline ktime_t +drm_sched_entity_restore_vruntime(struct drm_sched_entity *entity, + ktime_t min_vruntime) +{ + struct drm_sched_entity_stats *stats = entity->stats; + ktime_t vruntime; + + spin_lock(&stats->lock); + vruntime = ktime_add(min_vruntime, stats->vruntime); + stats->vruntime = vruntime; + spin_unlock(&stats->lock); + + return vruntime; +} + +static inline ktime_t +drm_sched_entity_update_vruntime(struct drm_sched_entity *entity) +{ + static const unsigned int shift[] = { + [DRM_SCHED_PRIORITY_KERNEL] = 1, + [DRM_SCHED_PRIORITY_HIGH] = 2, + [DRM_SCHED_PRIORITY_NORMAL] = 4, + [DRM_SCHED_PRIORITY_LOW] = 7, + }; + struct drm_sched_entity_stats *stats = entity->stats; + ktime_t runtime, prev; + + spin_lock(&stats->lock); + prev = stats->prev_runtime; + runtime = stats->runtime; + stats->prev_runtime = runtime; + runtime = ktime_add_ns(stats->vruntime, + ktime_to_ns(ktime_sub(runtime, prev)) << + shift[entity->priority]); + stats->vruntime = runtime; + spin_unlock(&stats->lock); + + return runtime; +} + +static inline ktime_t +drm_sched_entity_get_job_ts(struct drm_sched_entity *entity) +{ + return drm_sched_entity_update_vruntime(entity); +} + #endif diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index bfea608a7106e2..7e98ce17211403 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -83,21 +83,6 @@ #define CREATE_TRACE_POINTS #include "gpu_scheduler_trace.h" -#ifdef CONFIG_LOCKDEP -static struct lockdep_map drm_sched_lockdep_map = { - .name = "drm_sched_lockdep_map" -}; -#endif - -int drm_sched_policy = DRM_SCHED_POLICY_FIFO; - -/** - * DOC: sched_policy (int) - * Used to override default entities scheduling policy in a run queue. - */ -MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); -module_param_named(sched_policy, drm_sched_policy, int, 0444); - static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) { u32 credits; @@ -109,248 +94,6 @@ static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) return credits; } -/** - * drm_sched_can_queue -- Can we queue more to the hardware? - * @sched: scheduler instance - * @entity: the scheduler entity - * - * Return true if we can push at least one more job from @entity, false - * otherwise. - */ -static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) -{ - struct drm_sched_job *s_job; - - s_job = drm_sched_entity_queue_peek(entity); - if (!s_job) - return false; - - /* If a job exceeds the credit limit, truncate it to the credit limit - * itself to guarantee forward progress. - */ - if (s_job->credits > sched->credit_limit) { - dev_WARN(sched->dev, - "Jobs may not exceed the credit limit, truncate.\n"); - s_job->credits = sched->credit_limit; - } - - return drm_sched_available_credits(sched) >= s_job->credits; -} - -static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, - const struct rb_node *b) -{ - struct drm_sched_entity *ent_a = rb_entry((a), struct drm_sched_entity, rb_tree_node); - struct drm_sched_entity *ent_b = rb_entry((b), struct drm_sched_entity, rb_tree_node); - - return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting); -} - -static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, - struct drm_sched_rq *rq) -{ - if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { - rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); - RB_CLEAR_NODE(&entity->rb_tree_node); - } -} - -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, - struct drm_sched_rq *rq, - ktime_t ts) -{ - /* - * Both locks need to be grabbed, one to protect from entity->rq change - * for entity from within concurrent drm_sched_entity_select_rq and the - * other to update the rb tree structure. - */ - lockdep_assert_held(&entity->lock); - lockdep_assert_held(&rq->lock); - - drm_sched_rq_remove_fifo_locked(entity, rq); - - entity->oldest_job_waiting = ts; - - rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root, - drm_sched_entity_compare_before); -} - -/** - * drm_sched_rq_init - initialize a given run queue struct - * - * @sched: scheduler instance to associate with this run queue - * @rq: scheduler run queue - * - * Initializes a scheduler runqueue. - */ -static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) -{ - spin_lock_init(&rq->lock); - INIT_LIST_HEAD(&rq->entities); - rq->rb_tree_root = RB_ROOT_CACHED; - rq->current_entity = NULL; - rq->sched = sched; -} - -/** - * drm_sched_rq_add_entity - add an entity - * - * @rq: scheduler run queue - * @entity: scheduler entity - * - * Adds a scheduler entity to the run queue. - */ -void drm_sched_rq_add_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity) -{ - lockdep_assert_held(&entity->lock); - lockdep_assert_held(&rq->lock); - - if (!list_empty(&entity->list)) - return; - - atomic_inc(rq->sched->score); - list_add_tail(&entity->list, &rq->entities); -} - -/** - * drm_sched_rq_remove_entity - remove an entity - * - * @rq: scheduler run queue - * @entity: scheduler entity - * - * Removes a scheduler entity from the run queue. - */ -void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity) -{ - lockdep_assert_held(&entity->lock); - - if (list_empty(&entity->list)) - return; - - spin_lock(&rq->lock); - - atomic_dec(rq->sched->score); - list_del_init(&entity->list); - - if (rq->current_entity == entity) - rq->current_entity = NULL; - - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_remove_fifo_locked(entity, rq); - - spin_unlock(&rq->lock); -} - -/** - * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run - * - * @sched: the gpu scheduler - * @rq: scheduler run queue to check. - * - * Try to find the next ready entity. - * - * Return an entity if one is found; return an error-pointer (!NULL) if an - * entity was ready, but the scheduler had insufficient credits to accommodate - * its job; return NULL, if no ready entity was found. - */ -static struct drm_sched_entity * -drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) -{ - struct drm_sched_entity *entity; - - spin_lock(&rq->lock); - - entity = rq->current_entity; - if (entity) { - list_for_each_entry_continue(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current - * entity in terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } - } - } - - list_for_each_entry(entity, &rq->entities, list) { - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current entity in - * terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - rq->current_entity = entity; - reinit_completion(&entity->entity_idle); - spin_unlock(&rq->lock); - return entity; - } - - if (entity == rq->current_entity) - break; - } - - spin_unlock(&rq->lock); - - return NULL; -} - -/** - * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run - * - * @sched: the gpu scheduler - * @rq: scheduler run queue to check. - * - * Find oldest waiting ready entity. - * - * Return an entity if one is found; return an error-pointer (!NULL) if an - * entity was ready, but the scheduler had insufficient credits to accommodate - * its job; return NULL, if no ready entity was found. - */ -static struct drm_sched_entity * -drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, - struct drm_sched_rq *rq) -{ - struct rb_node *rb; - - spin_lock(&rq->lock); - for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { - struct drm_sched_entity *entity; - - entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); - if (drm_sched_entity_is_ready(entity)) { - /* If we can't queue yet, preserve the current entity in - * terms of fairness. - */ - if (!drm_sched_can_queue(sched, entity)) { - spin_unlock(&rq->lock); - return ERR_PTR(-ENOSPC); - } - - reinit_completion(&entity->entity_idle); - break; - } - } - spin_unlock(&rq->lock); - - return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; -} - /** * drm_sched_run_job_queue - enqueue run-job work * @sched: scheduler instance @@ -371,22 +114,6 @@ static void __drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) queue_work(sched->submit_wq, &sched->work_free_job); } -/** - * drm_sched_run_free_queue - enqueue free-job work if ready - * @sched: scheduler instance - */ -static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_job *job; - - spin_lock(&sched->job_list_lock); - job = list_first_entry_or_null(&sched->pending_list, - struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) - __drm_sched_run_free_queue(sched); - spin_unlock(&sched->job_list_lock); -} - /** * drm_sched_job_done - complete a job * @s_job: pointer to the job which is done @@ -538,17 +265,15 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) static void drm_sched_job_timedout(struct work_struct *work) { - struct drm_gpu_scheduler *sched; + struct drm_gpu_scheduler *sched = + container_of(work, struct drm_gpu_scheduler, work_tdr.work); + enum drm_gpu_sched_stat status; struct drm_sched_job *job; - enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; - - sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); /* Protects against concurrent deletion in drm_sched_get_finished_job */ spin_lock(&sched->job_list_lock); job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job) { /* * Remove the bad job so it cannot be freed by concurrent @@ -556,20 +281,21 @@ static void drm_sched_job_timedout(struct work_struct *work) * is parked at which point it's safe. */ list_del_init(&job->list); - spin_unlock(&sched->job_list_lock); + } + spin_unlock(&sched->job_list_lock); - status = job->sched->ops->timedout_job(job); + if (!job) + return; - /* - * Guilty job did complete and hence needs to be manually removed - * See drm_sched_stop doc. - */ - if (sched->free_guilty) { - job->sched->ops->free_job(job); - sched->free_guilty = false; - } - } else { - spin_unlock(&sched->job_list_lock); + status = job->sched->ops->timedout_job(job); + + /* + * Guilty job did complete and hence needs to be manually removed. See + * documentation for drm_sched_stop. + */ + if (sched->free_guilty) { + job->sched->ops->free_job(job); + sched->free_guilty = false; } if (status != DRM_GPU_SCHED_STAT_ENODEV) @@ -828,11 +554,15 @@ EXPORT_SYMBOL(drm_sched_job_init); * * This arms a scheduler job for execution. Specifically it initializes the * &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv - * or other places that need to track the completion of this job. + * or other places that need to track the completion of this job. It also + * initializes sequence numbers, which are fundamental for fence ordering. * * Refer to drm_sched_entity_push_job() documentation for locking * considerations. * + * Once this function was called, you *must* submit @job with + * drm_sched_entity_push_job(). + * * This can only be called if drm_sched_job_init() succeeded. */ void drm_sched_job_arm(struct drm_sched_job *job) @@ -842,10 +572,11 @@ void drm_sched_job_arm(struct drm_sched_job *job) BUG_ON(!entity); drm_sched_entity_select_rq(entity); - sched = entity->rq->sched; + sched = container_of(entity->rq, typeof(*sched), rq); job->sched = sched; job->s_priority = entity->priority; + job->entity_stats = drm_sched_entity_stats_get(entity->stats); job->id = atomic64_inc_return(&sched->job_id_count); drm_sched_fence_init(job->s_fence, job->entity); @@ -1015,13 +746,14 @@ EXPORT_SYMBOL(drm_sched_job_has_dependency); * Cleans up the resources allocated with drm_sched_job_init(). * * Drivers should call this from their error unwind code if @job is aborted - * before it was submitted to an entity with drm_sched_entity_push_job(). + * before drm_sched_job_arm() is called. * - * Since calling drm_sched_job_arm() causes the job's fences to be initialized, - * it is up to the driver to ensure that fences that were exposed to external - * parties get signaled. drm_sched_job_cleanup() does not ensure this. + * drm_sched_job_arm() is a point of no return since it initializes the fences + * and their sequence number etc. Once that function has been called, you *must* + * submit it with drm_sched_entity_push_job() and cannot simply abort it by + * calling drm_sched_job_cleanup(). * - * This function must also be called in &struct drm_sched_backend_ops.free_job + * This function should be called in the &drm_sched_backend_ops.free_job callback. */ void drm_sched_job_cleanup(struct drm_sched_job *job) { @@ -1029,10 +761,16 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) unsigned long index; if (kref_read(&job->s_fence->finished.refcount)) { - /* drm_sched_job_arm() has been called */ + /* The job has been processed by the scheduler, i.e., + * drm_sched_job_arm() and drm_sched_entity_push_job() have + * been called. + */ dma_fence_put(&job->s_fence->finished); + drm_sched_entity_stats_put(job->entity_stats); } else { - /* aborted job before arming */ + /* The job was aborted before it has been committed to be run; + * notably, drm_sched_job_arm() has not been called. + */ drm_sched_fence_free(job->s_fence); } @@ -1057,36 +795,6 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched) drm_sched_run_job_queue(sched); } -/** - * drm_sched_select_entity - Select next entity to process - * - * @sched: scheduler instance - * - * Return an entity to process or NULL if none are found. - * - * Note, that we break out of the for-loop when "entity" is non-null, which can - * also be an error-pointer--this assures we don't process lower priority - * run-queues. See comments in the respectively called functions. - */ -static struct drm_sched_entity * -drm_sched_select_entity(struct drm_gpu_scheduler *sched) -{ - struct drm_sched_entity *entity; - int i; - - /* Start with the highest priority. - */ - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? - drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : - drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); - if (entity) - break; - } - - return IS_ERR(entity) ? NULL : entity; -} - /** * drm_sched_get_finished_job - fetch the next finished job to be destroyed * @@ -1104,22 +812,22 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); - if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from pending_list */ list_del_init(&job->list); /* cancel this job's TO timer */ cancel_delayed_work(&sched->work_tdr); - /* make the scheduled timestamp more accurate */ + next = list_first_entry_or_null(&sched->pending_list, typeof(*next), list); - if (next) { + /* make the scheduled timestamp more accurate */ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &next->s_fence->scheduled.flags)) next->s_fence->scheduled.timestamp = dma_fence_timestamp(&job->s_fence->finished); + /* start TO timer for next job */ drm_sched_start_timeout(sched); } @@ -1179,11 +887,11 @@ static void drm_sched_free_job_work(struct work_struct *w) container_of(w, struct drm_gpu_scheduler, work_free_job); struct drm_sched_job *job; - job = drm_sched_get_finished_job(sched); - if (job) + while ((job = drm_sched_get_finished_job(sched))) { + drm_sched_entity_stats_job_add_gpu_time(job); sched->ops->free_job(job); + } - drm_sched_run_free_queue(sched); drm_sched_run_job_queue(sched); } @@ -1196,51 +904,91 @@ static void drm_sched_run_job_work(struct work_struct *w) { struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_run_job); + u32 job_credits, submitted_credits = 0; struct drm_sched_entity *entity; - struct dma_fence *fence; struct drm_sched_fence *s_fence; struct drm_sched_job *sched_job; - int r; + struct dma_fence *fence; - /* Find entity with a ready job */ - entity = drm_sched_select_entity(sched); - if (!entity) - return; /* No more work */ + while (!READ_ONCE(sched->pause_submit)) { + /* Find entity with a ready job */ + entity = drm_sched_rq_select_entity(sched); + if (!entity) + break; /* No more work */ - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { - complete_all(&entity->entity_idle); - drm_sched_run_job_queue(sched); - return; - } + /* + * If a job exceeds the credit limit truncate it to guarantee + * forward progress. + */ + sched_job = drm_sched_entity_queue_peek(entity); + job_credits = sched_job->credits; + if (dev_WARN_ONCE(sched->dev, job_credits > sched->credit_limit, + "Jobs may not exceed the credit limit, truncating.\n")) + job_credits = sched_job->credits = sched->credit_limit; + + if (job_credits > drm_sched_available_credits(sched)) { + complete_all(&entity->entity_idle); + break; + } - s_fence = sched_job->s_fence; + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) { + /* Top entity is not yet runnable after all */ + complete_all(&entity->entity_idle); + continue; + } - atomic_add(sched_job->credits, &sched->credit_count); - drm_sched_job_begin(sched_job); + s_fence = sched_job->s_fence; + drm_sched_job_begin(sched_job); + trace_drm_run_job(sched_job, entity); + submitted_credits += job_credits; + atomic_add(job_credits, &sched->credit_count); - trace_drm_run_job(sched_job, entity); - fence = sched->ops->run_job(sched_job); - complete_all(&entity->entity_idle); - drm_sched_fence_scheduled(s_fence, fence); + fence = sched->ops->run_job(sched_job); + drm_sched_fence_scheduled(s_fence, fence); - if (!IS_ERR_OR_NULL(fence)) { - /* Drop for original kref_init of the fence */ - dma_fence_put(fence); + if (!IS_ERR_OR_NULL(fence)) { + int r; - r = dma_fence_add_callback(fence, &sched_job->cb, - drm_sched_job_done_cb); - if (r == -ENOENT) - drm_sched_job_done(sched_job, fence->error); - else if (r) - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); - } else { - drm_sched_job_done(sched_job, IS_ERR(fence) ? - PTR_ERR(fence) : 0); + /* Drop for original kref_init of the fence */ + dma_fence_put(fence); + + r = dma_fence_add_callback(fence, &sched_job->cb, + drm_sched_job_done_cb); + if (r == -ENOENT) + drm_sched_job_done(sched_job, fence->error); + else if (r) + DRM_DEV_ERROR(sched->dev, + "fence add callback failed (%d)\n", r); + } else { + drm_sched_job_done(sched_job, IS_ERR(fence) ? + PTR_ERR(fence) : 0); + } + + complete_all(&entity->entity_idle); } - wake_up(&sched->job_scheduled); - drm_sched_run_job_queue(sched); + if (submitted_credits) + wake_up(&sched->job_scheduled); +} + +static struct workqueue_struct *drm_sched_alloc_wq(const char *name) +{ +#if (IS_ENABLED(CONFIG_LOCKDEP)) + static struct lockdep_map map = { + .name = "drm_sched_lockdep_map" + }; + + /* + * Avoid leaking a lockdep map on each drm sched creation and + * destruction by using a single lockdep map for all drm sched + * allocated submit_wq. + */ + + return alloc_ordered_workqueue_lockdep_map(name, WQ_MEM_RECLAIM, &map); +#else + return alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); +#endif } /** @@ -1253,8 +1001,6 @@ static void drm_sched_run_job_work(struct work_struct *w) */ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_args *args) { - int i; - sched->ops = args->ops; sched->credit_limit = args->credit_limit; sched->name = args->name; @@ -1264,49 +1010,18 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->score = args->score ? args->score : &sched->_score; sched->dev = args->dev; - if (args->num_rqs > DRM_SCHED_PRIORITY_COUNT) { - /* This is a gross violation--tell drivers what the problem is. - */ - dev_err(sched->dev, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n", - __func__); - return -EINVAL; - } else if (sched->sched_rq) { - /* Not an error, but warn anyway so drivers can - * fine-tune their DRM calling order, and return all - * is good. - */ - dev_warn(sched->dev, "%s: scheduler already initialized!\n", __func__); - return 0; - } - if (args->submit_wq) { sched->submit_wq = args->submit_wq; sched->own_submit_wq = false; } else { -#ifdef CONFIG_LOCKDEP - sched->submit_wq = alloc_ordered_workqueue_lockdep_map(args->name, - WQ_MEM_RECLAIM, - &drm_sched_lockdep_map); -#else - sched->submit_wq = alloc_ordered_workqueue(args->name, WQ_MEM_RECLAIM); -#endif + sched->submit_wq = drm_sched_alloc_wq(args->name); if (!sched->submit_wq) return -ENOMEM; sched->own_submit_wq = true; } - sched->sched_rq = kmalloc_array(args->num_rqs, sizeof(*sched->sched_rq), - GFP_KERNEL | __GFP_ZERO); - if (!sched->sched_rq) - goto Out_check_own; - sched->num_rqs = args->num_rqs; - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); - if (!sched->sched_rq[i]) - goto Out_unroll; - drm_sched_rq_init(sched, sched->sched_rq[i]); - } + drm_sched_rq_init(sched); init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); @@ -1321,17 +1036,6 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ sched->ready = true; return 0; -Out_unroll: - for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--) - kfree(sched->sched_rq[i]); - - kfree(sched->sched_rq); - sched->sched_rq = NULL; -Out_check_own: - if (sched->own_submit_wq) - destroy_workqueue(sched->submit_wq); - dev_err(sched->dev, "%s: Failed to setup GPU scheduler--out of memory\n", __func__); - return -ENOMEM; } EXPORT_SYMBOL(drm_sched_init); @@ -1358,25 +1062,21 @@ EXPORT_SYMBOL(drm_sched_init); */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { + + struct drm_sched_rq *rq = &sched->rq; struct drm_sched_entity *s_entity; - int i; drm_sched_wqueue_stop(sched); - for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { - struct drm_sched_rq *rq = sched->sched_rq[i]; - - spin_lock(&rq->lock); - list_for_each_entry(s_entity, &rq->entities, list) - /* - * Prevents reinsertion and marks job_queue as idle, - * it will be removed from the rq in drm_sched_entity_fini() - * eventually - */ - s_entity->stopped = true; - spin_unlock(&rq->lock); - kfree(sched->sched_rq[i]); - } + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) + /* + * Prevents reinsertion and marks job_queue as idle, + * it will be removed from the rq in drm_sched_entity_fini() + * eventually + */ + s_entity->stopped = true; + spin_unlock(&rq->lock); /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ wake_up_all(&sched->job_scheduled); @@ -1387,8 +1087,6 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) if (sched->own_submit_wq) destroy_workqueue(sched->submit_wq); sched->ready = false; - kfree(sched->sched_rq); - sched->sched_rq = NULL; } EXPORT_SYMBOL(drm_sched_fini); @@ -1403,35 +1101,28 @@ EXPORT_SYMBOL(drm_sched_fini); */ void drm_sched_increase_karma(struct drm_sched_job *bad) { - int i; - struct drm_sched_entity *tmp; - struct drm_sched_entity *entity; struct drm_gpu_scheduler *sched = bad->sched; + struct drm_sched_entity *entity, *tmp; + struct drm_sched_rq *rq = &sched->rq; /* don't change @bad's karma if it's from KERNEL RQ, * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) * corrupt but keep in mind that kernel jobs always considered good. */ - if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { - atomic_inc(&bad->karma); - - for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) { - struct drm_sched_rq *rq = sched->sched_rq[i]; - - spin_lock(&rq->lock); - list_for_each_entry_safe(entity, tmp, &rq->entities, list) { - if (bad->s_fence->scheduled.context == - entity->fence_context) { - if (entity->guilty) - atomic_set(entity->guilty, 1); - break; - } - } - spin_unlock(&rq->lock); - if (&entity->list != &rq->entities) - break; + if (bad->s_priority == DRM_SCHED_PRIORITY_KERNEL) + return; + + atomic_inc(&bad->karma); + + spin_lock(&rq->lock); + list_for_each_entry_safe(entity, tmp, &rq->entities, list) { + if (bad->s_fence->scheduled.context == entity->fence_context) { + if (entity->guilty) + atomic_set(entity->guilty, 1); + break; } } + spin_unlock(&rq->lock); } EXPORT_SYMBOL(drm_sched_increase_karma); diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c new file mode 100644 index 00000000000000..f2f10f7d6ddf9c --- /dev/null +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -0,0 +1,214 @@ +#include + +#include +#include + +#include "sched_internal.h" + +static __always_inline bool +drm_sched_entity_compare_before(struct rb_node *a, const struct rb_node *b) +{ + struct drm_sched_entity *ea = + rb_entry((a), struct drm_sched_entity, rb_tree_node); + struct drm_sched_entity *eb = + rb_entry((b), struct drm_sched_entity, rb_tree_node); + + return ktime_before(ea->oldest_job_waiting, eb->oldest_job_waiting); +} + +static void drm_sched_rq_remove_tree_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq) +{ + lockdep_assert_held(&entity->lock); + lockdep_assert_held(&rq->lock); + + if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { + rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); + RB_CLEAR_NODE(&entity->rb_tree_node); + } +} + +static void drm_sched_rq_update_tree_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq, + ktime_t ts) +{ + /* + * Both locks need to be grabbed, one to protect from entity->rq change + * for entity from within concurrent drm_sched_entity_select_rq and the + * other to update the rb tree structure. + */ + lockdep_assert_held(&entity->lock); + lockdep_assert_held(&rq->lock); + + drm_sched_rq_remove_tree_locked(entity, rq); + + entity->oldest_job_waiting = ts; + + rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root, + drm_sched_entity_compare_before); +} + +/** + * drm_sched_rq_init - initialize a given run queue struct + * + * @sched: scheduler instance to associate with this run queue + * + * Initializes a scheduler runqueue. + */ +void drm_sched_rq_init(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_rq *rq = &sched->rq; + + spin_lock_init(&rq->lock); + INIT_LIST_HEAD(&rq->entities); + rq->rb_tree_root = RB_ROOT_CACHED; +} + +static ktime_t +drm_sched_rq_get_min_vruntime(struct drm_sched_rq *rq) +{ + struct drm_sched_entity *entity; + struct rb_node *rb; + + lockdep_assert_held(&rq->lock); + + for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { + entity = rb_entry(rb, typeof(*entity), rb_tree_node); + + return entity->stats->vruntime; /* Unlocked read */ + } + + return 0; +} + +/** + * drm_sched_rq_add_entity - add an entity + * + * @entity: scheduler entity + * @ts: submission timestamp + * + * Adds a scheduler entity to the run queue. + * + * Returns a DRM scheduler pre-selected to handle this entity. + */ +struct drm_gpu_scheduler * +drm_sched_rq_add_entity(struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler *sched; + struct drm_sched_rq *rq; + ktime_t ts; + + /* Add the entity to the run queue */ + spin_lock(&entity->lock); + if (entity->stopped) { + spin_unlock(&entity->lock); + + DRM_ERROR("Trying to push to a killed entity\n"); + return NULL; + } + + rq = entity->rq; + sched = container_of(rq, typeof(*sched), rq); + spin_lock(&rq->lock); + + if (list_empty(&entity->list)) { + atomic_inc(sched->score); + list_add_tail(&entity->list, &rq->entities); + } + + ts = drm_sched_rq_get_min_vruntime(rq); + ts = drm_sched_entity_restore_vruntime(entity, ts); + drm_sched_rq_update_tree_locked(entity, rq, ts); + + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); + + return sched; +} + +/** + * drm_sched_rq_remove_entity - remove an entity + * + * @rq: scheduler run queue + * @entity: scheduler entity + * + * Removes a scheduler entity from the run queue. + */ +void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, + struct drm_sched_entity *entity) +{ + struct drm_gpu_scheduler *sched = container_of(rq, typeof(*sched), rq); + + lockdep_assert_held(&entity->lock); + + if (list_empty(&entity->list)) + return; + + spin_lock(&rq->lock); + + atomic_dec(sched->score); + list_del_init(&entity->list); + + drm_sched_rq_remove_tree_locked(entity, rq); + + spin_unlock(&rq->lock); +} + +void drm_sched_rq_pop_entity(struct drm_sched_entity *entity) +{ + struct drm_sched_job *next_job; + struct drm_sched_rq *rq; + + /* + * Update the entity's location in the min heap according to + * the timestamp of the next job, if any. + */ + spin_lock(&entity->lock); + rq = entity->rq; + spin_lock(&rq->lock); + next_job = drm_sched_entity_queue_peek(entity); + if (next_job) { + ktime_t ts; + + ts = drm_sched_entity_get_job_ts(entity); + drm_sched_rq_update_tree_locked(entity, rq, ts); + } else { + ktime_t min_vruntime; + + drm_sched_rq_remove_tree_locked(entity, rq); + min_vruntime = drm_sched_rq_get_min_vruntime(rq); + drm_sched_entity_save_vruntime(entity, min_vruntime); + } + spin_unlock(&rq->lock); + spin_unlock(&entity->lock); +} + +/** + * drm_sched_rq_select_entity - Select an entity which provides a job to run + * + * @sched: the gpu scheduler + * + * Find oldest waiting ready entity. + * + * Return an entity if one is found or NULL if no ready entity was found. + */ +struct drm_sched_entity * +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_rq *rq = &sched->rq; + struct rb_node *rb; + + spin_lock(&rq->lock); + for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { + struct drm_sched_entity *entity; + + entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); + if (drm_sched_entity_is_ready(entity)) { + reinit_completion(&entity->entity_idle); + break; + } + } + spin_unlock(&rq->lock); + + return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; +} diff --git a/drivers/gpu/drm/scheduler/tests/Makefile b/drivers/gpu/drm/scheduler/tests/Makefile new file mode 100644 index 00000000000000..9ec185fbbc153a --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +drm-sched-tests-y := \ + mock_scheduler.o \ + tests_basic.o \ + tests_scheduler.o + +obj-$(CONFIG_DRM_SCHED_KUNIT_TEST) += drm-sched-tests.o diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c new file mode 100644 index 00000000000000..61efc96e6e41c3 --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Valve Corporation */ + +#include "sched_tests.h" + +/* + * Here we implement the mock "GPU" (or the scheduler backend) which is used by + * the DRM scheduler unit tests in order to exercise the core functionality. + * + * Test cases are implemented in a separate file. + */ + +/** + * drm_mock_sched_entity_new - Create a new mock scheduler entity + * + * @test: KUnit test owning the entity + * @priority: Scheduling priority + * @sched: Mock scheduler on which the entity can be scheduled + * + * Returns: New mock scheduler entity with allocation managed by the test + */ +struct drm_mock_sched_entity * +drm_mock_sched_entity_new(struct kunit *test, + enum drm_sched_priority priority, + struct drm_mock_scheduler *sched) +{ + struct drm_mock_sched_entity *entity; + struct drm_gpu_scheduler *drm_sched; + int ret; + + entity = kunit_kzalloc(test, sizeof(*entity), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, entity); + + drm_sched = &sched->base; + ret = drm_sched_entity_init(&entity->base, + priority, + &drm_sched, 1, + NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + entity->test = test; + + return entity; +} + +/** + * drm_mock_sched_entity_free - Destroys a mock scheduler entity + * + * @entity: Entity to destroy + * + * To be used from the test cases once done with the entity. + */ +void drm_mock_sched_entity_free(struct drm_mock_sched_entity *entity) +{ + drm_sched_entity_destroy(&entity->base); +} + +static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job) +{ + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(job->base.sched); + + lockdep_assert_held(&sched->lock); + + job->flags |= DRM_MOCK_SCHED_JOB_DONE; + list_move_tail(&job->link, &sched->done_list); + dma_fence_signal(&job->hw_fence); + complete(&job->done); +} + +static enum hrtimer_restart +drm_mock_sched_job_signal_timer(struct hrtimer *hrtimer) +{ + struct drm_mock_sched_job *job = + container_of(hrtimer, typeof(*job), timer); + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(job->base.sched); + struct drm_mock_sched_job *next; + ktime_t now = ktime_get(); + unsigned long flags; + LIST_HEAD(signal); + + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &sched->job_list, link) { + if (!job->duration_us) + break; + + if (ktime_before(now, job->finish_at)) + break; + + sched->hw_timeline.cur_seqno = job->hw_fence.seqno; + drm_mock_sched_job_complete(job); + } + spin_unlock_irqrestore(&sched->lock, flags); + + return HRTIMER_NORESTART; +} + +/** + * drm_mock_sched_job_new - Create a new mock scheduler job + * + * @test: KUnit test owning the job + * @entity: Scheduler entity of the job + * + * Returns: New mock scheduler job with allocation managed by the test + */ +struct drm_mock_sched_job * +drm_mock_sched_job_new(struct kunit *test, + struct drm_mock_sched_entity *entity) +{ + struct drm_mock_sched_job *job; + int ret; + + job = kunit_kzalloc(test, sizeof(*job), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, job); + + ret = drm_sched_job_init(&job->base, + &entity->base, + 1, + NULL); + KUNIT_ASSERT_EQ(test, ret, 0); + + job->test = test; + + init_completion(&job->done); + spin_lock_init(&job->lock); + INIT_LIST_HEAD(&job->link); + hrtimer_init(&job->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + job->timer.function = drm_mock_sched_job_signal_timer; + + return job; +} + +static const char *drm_mock_sched_hw_fence_driver_name(struct dma_fence *fence) +{ + return "drm_mock_sched"; +} + +static const char * +drm_mock_sched_hw_fence_timeline_name(struct dma_fence *fence) +{ + struct drm_mock_sched_job *job = + container_of(fence, typeof(*job), hw_fence); + + return (const char *)job->base.sched->name; +} + +static void drm_mock_sched_hw_fence_release(struct dma_fence *fence) +{ + struct drm_mock_sched_job *job = + container_of(fence, typeof(*job), hw_fence); + + hrtimer_cancel(&job->timer); + + /* Containing job is freed by the kunit framework */ +} + +static const struct dma_fence_ops drm_mock_sched_hw_fence_ops = { + .get_driver_name = drm_mock_sched_hw_fence_driver_name, + .get_timeline_name = drm_mock_sched_hw_fence_timeline_name, + .release = drm_mock_sched_hw_fence_release, +}; + +static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job) +{ + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(sched_job->sched); + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + + dma_fence_init(&job->hw_fence, + &drm_mock_sched_hw_fence_ops, + &job->lock, + sched->hw_timeline.context, + atomic_inc_return(&sched->hw_timeline.next_seqno)); + + dma_fence_get(&job->hw_fence); /* Reference for the job_list */ + + spin_lock_irq(&sched->lock); + if (job->duration_us) { + ktime_t prev_finish_at = 0; + + if (!list_empty(&sched->job_list)) { + struct drm_mock_sched_job *prev = + list_last_entry(&sched->job_list, typeof(*prev), + link); + + prev_finish_at = prev->finish_at; + } + + if (!prev_finish_at) + prev_finish_at = ktime_get(); + + job->finish_at = ktime_add_us(prev_finish_at, job->duration_us); + } + list_add_tail(&job->link, &sched->job_list); + if (job->finish_at) + hrtimer_start(&job->timer, job->finish_at, HRTIMER_MODE_ABS); + spin_unlock_irq(&sched->lock); + + return &job->hw_fence; +} + +static enum drm_gpu_sched_stat +mock_sched_timedout_job(struct drm_sched_job *sched_job) +{ + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + + job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT; + + return DRM_GPU_SCHED_STAT_NOMINAL; +} + +static void mock_sched_free_job(struct drm_sched_job *sched_job) +{ + struct drm_mock_scheduler *sched = + drm_sched_to_mock_sched(sched_job->sched); + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + unsigned long flags; + + /* Remove from the scheduler done list. */ + spin_lock_irqsave(&sched->lock, flags); + list_del(&job->link); + spin_unlock_irqrestore(&sched->lock, flags); + dma_fence_put(&job->hw_fence); + + drm_sched_job_cleanup(sched_job); + + /* Mock job itself is freed by the kunit framework. */ +} + +static const struct drm_sched_backend_ops drm_mock_scheduler_ops = { + .run_job = mock_sched_run_job, + .timedout_job = mock_sched_timedout_job, + .free_job = mock_sched_free_job +}; + +/** + * drm_mock_sched_new - Create a new mock scheduler + * + * @test: KUnit test owning the job + * @timeout: Job timeout to set + * + * Returns: New mock scheduler with allocation managed by the test + */ +struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout) +{ + struct drm_sched_init_args args = { + .ops = &drm_mock_scheduler_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .credit_limit = U32_MAX, + .hang_limit = 1, + .timeout = timeout, + .name = "drm-mock-scheduler", + }; + struct drm_mock_scheduler *sched; + int ret; + + sched = kunit_kzalloc(test, sizeof(*sched), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, sched); + + ret = drm_sched_init(&sched->base, &args); + KUNIT_ASSERT_EQ(test, ret, 0); + + sched->test = test; + sched->hw_timeline.context = dma_fence_context_alloc(1); + atomic_set(&sched->hw_timeline.next_seqno, 0); + INIT_LIST_HEAD(&sched->job_list); + INIT_LIST_HEAD(&sched->done_list); + spin_lock_init(&sched->lock); + + return sched; +} + +/** + * drm_mock_sched_fini - Destroys a mock scheduler + * + * @sched: Scheduler to destroy + * + * To be used from the test cases once done with the scheduler. + */ +void drm_mock_sched_fini(struct drm_mock_scheduler *sched) +{ + struct drm_mock_sched_job *job, *next; + unsigned long flags; + LIST_HEAD(list); + + drm_sched_wqueue_stop(&sched->base); + + /* Force complete all unfinished jobs. */ + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &sched->job_list, link) + list_move_tail(&job->link, &list); + spin_unlock_irqrestore(&sched->lock, flags); + + list_for_each_entry(job, &list, link) + hrtimer_cancel(&job->timer); + + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &list, link) + drm_mock_sched_job_complete(job); + spin_unlock_irqrestore(&sched->lock, flags); + + /* + * Free completed jobs and jobs not yet processed by the DRM scheduler + * free worker. + */ + spin_lock_irqsave(&sched->lock, flags); + list_for_each_entry_safe(job, next, &sched->done_list, link) + list_move_tail(&job->link, &list); + spin_unlock_irqrestore(&sched->lock, flags); + + list_for_each_entry_safe(job, next, &list, link) + mock_sched_free_job(&job->base); + + drm_sched_fini(&sched->base); +} + +/** + * drm_mock_sched_advance - Advances the mock scheduler timeline + * + * @sched: Scheduler timeline to advance + * @num: By how many jobs to advance + * + * Advancing the scheduler timeline by a number of seqnos will trigger + * signalling of the hardware fences and unlinking the jobs from the internal + * scheduler tracking. + * + * This can be used from test cases which want complete control of the simulated + * job execution timing. For example submitting one job with no set duration + * would never complete it before test cases advances the timeline by one. + */ +unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched, + unsigned int num) +{ + struct drm_mock_sched_job *job, *next; + unsigned int found = 0; + unsigned long flags; + LIST_HEAD(signal); + + spin_lock_irqsave(&sched->lock, flags); + if (WARN_ON_ONCE(sched->hw_timeline.cur_seqno + num < + sched->hw_timeline.cur_seqno)) + goto unlock; + sched->hw_timeline.cur_seqno += num; + list_for_each_entry_safe(job, next, &sched->job_list, link) { + if (sched->hw_timeline.cur_seqno < job->hw_fence.seqno) + break; + + drm_mock_sched_job_complete(job); + found++; + } +unlock: + spin_unlock_irqrestore(&sched->lock, flags); + + return found; +} + +MODULE_DESCRIPTION("DRM mock scheduler and tests"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h new file mode 100644 index 00000000000000..27caf8285fb74b --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Valve Corporation */ + +#ifndef _SCHED_TESTS_H_ +#define _SCHED_TESTS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * DOC: Mock DRM scheduler data structures + * + * drm_mock_* data structures are used to implement a mock "GPU". + * + * They subclass the core DRM scheduler objects and add their data on top, which + * enables tracking the submitted jobs and simulating their execution with the + * attributes as specified by the test case. + */ + +/** + * struct drm_mock_scheduler - implements a trivial mock GPU execution engine + * + * @base: DRM scheduler base class + * @test: Backpointer to owning the kunit test case + * @lock: Lock to protect the simulated @hw_timeline, @job_list and @done_list + * @job_list: List of jobs submitted to the mock GPU + * @done_list: List of jobs completed by the mock GPU + * @hw_timeline: Simulated hardware timeline has a @context, @next_seqno and + * @cur_seqno for implementing a struct dma_fence signaling the + * simulated job completion. + * + * Trivial mock GPU execution engine tracks submitted jobs and enables + * completing them strictly in submission order. + */ +struct drm_mock_scheduler { + struct drm_gpu_scheduler base; + + struct kunit *test; + + spinlock_t lock; + struct list_head job_list; + struct list_head done_list; + + struct { + u64 context; + atomic_t next_seqno; + unsigned int cur_seqno; + } hw_timeline; +}; + +/** + * struct drm_mock_sched_entity - implements a mock GPU sched entity + * + * @base: DRM scheduler entity base class + * @test: Backpointer to owning the kunit test case + * + * Mock GPU sched entity is used by the test cases to submit jobs to the mock + * scheduler. + */ +struct drm_mock_sched_entity { + struct drm_sched_entity base; + + struct kunit *test; +}; + +/** + * struct drm_mock_sched_job - implements a mock GPU job + * + * @base: DRM sched job base class + * @done: Completion signaling job completion. + * @flags: Flags designating job state. + * @link: List head element used by job tracking by the drm_mock_scheduler + * @timer: Timer used for simulating job execution duration + * @duration_us: Simulated job duration in micro seconds, or zero if in manual + * timeline advance mode + * @finish_at: Absolute time when the jobs with set duration will complete + * @lock: Lock used for @hw_fence + * @hw_fence: Fence returned to DRM scheduler as the hardware fence + * @test: Backpointer to owning the kunit test case + * + * Mock GPU sched job is used by the test cases to submit jobs to the mock + * scheduler. + */ +struct drm_mock_sched_job { + struct drm_sched_job base; + + struct completion done; + +#define DRM_MOCK_SCHED_JOB_DONE 0x1 +#define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2 + unsigned long flags; + + struct list_head link; + struct hrtimer timer; + + unsigned int duration_us; + ktime_t finish_at; + + spinlock_t lock; + struct dma_fence hw_fence; + + struct kunit *test; +}; + +static inline struct drm_mock_scheduler * +drm_sched_to_mock_sched(struct drm_gpu_scheduler *sched) +{ + return container_of(sched, struct drm_mock_scheduler, base); +}; + +static inline struct drm_mock_sched_entity * +drm_sched_entity_to_mock_entity(struct drm_sched_entity *sched_entity) +{ + return container_of(sched_entity, struct drm_mock_sched_entity, base); +}; + +static inline struct drm_mock_sched_job * +drm_sched_job_to_mock_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct drm_mock_sched_job, base); +}; + +struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, + long timeout); +void drm_mock_sched_fini(struct drm_mock_scheduler *sched); +unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched, + unsigned int num); + +struct drm_mock_sched_entity * +drm_mock_sched_entity_new(struct kunit *test, + enum drm_sched_priority priority, + struct drm_mock_scheduler *sched); +void drm_mock_sched_entity_free(struct drm_mock_sched_entity *entity); + +struct drm_mock_sched_job * +drm_mock_sched_job_new(struct kunit *test, + struct drm_mock_sched_entity *entity); + +/** + * drm_mock_sched_job_submit - Arm and submit a job in one go + * + * @job: Job to arm and submit + */ +static inline void drm_mock_sched_job_submit(struct drm_mock_sched_job *job) +{ + drm_sched_job_arm(&job->base); + drm_sched_entity_push_job(&job->base); +} + +/** + * drm_mock_sched_job_set_duration_us - Set a job duration + * + * @job: Job to set the duration for + * @duration_us: Duration in micro seconds + * + * Jobs with duration set will be automatically completed by the mock scheduler + * as the timeline progresses, unless a job without a set duration is + * encountered in the timelime in which case calling drm_mock_sched_advance() + * will be required to bump the timeline. + */ +static inline void +drm_mock_sched_job_set_duration_us(struct drm_mock_sched_job *job, + unsigned int duration_us) +{ + job->duration_us = duration_us; +} + +/** + * drm_mock_sched_job_is_finished - Check if a job is finished + * + * @job: Job to check + * + * Returns: true if finished + */ +static inline bool +drm_mock_sched_job_is_finished(struct drm_mock_sched_job *job) +{ + return job->flags & DRM_MOCK_SCHED_JOB_DONE; +} + +/** + * drm_mock_sched_job_wait_finished - Wait until a job is finished + * + * @job: Job to wait for + * @timeout: Wait time in jiffies + * + * Returns: true if finished within the timeout provided, otherwise false + */ +static inline bool +drm_mock_sched_job_wait_finished(struct drm_mock_sched_job *job, long timeout) +{ + if (job->flags & DRM_MOCK_SCHED_JOB_DONE) + return true; + + return wait_for_completion_timeout(&job->done, timeout) != 0; +} + +/** + * drm_mock_sched_job_wait_scheduled - Wait until a job is scheduled + * + * @job: Job to wait for + * @timeout: Wait time in jiffies + * + * Returns: true if scheduled within the timeout provided, otherwise false + */ +static inline bool +drm_mock_sched_job_wait_scheduled(struct drm_mock_sched_job *job, long timeout) +{ + KUNIT_ASSERT_EQ(job->test, job->flags & DRM_MOCK_SCHED_JOB_DONE, 0); + + return dma_fence_wait_timeout(&job->base.s_fence->scheduled, + false, + timeout) != 0; +} + +#endif diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c new file mode 100644 index 00000000000000..7230057e0594c6 --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Valve Corporation */ + +#include + +#include "sched_tests.h" + +/* + * DRM scheduler basic tests should check the basic functional correctness of + * the scheduler, including some very light smoke testing. More targeted tests, + * for example focusing on testing specific bugs and other more complicated test + * scenarios, should be implemented in separate source units. + */ + +static int drm_sched_basic_init(struct kunit *test) +{ + test->priv = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + + return 0; +} + +static void drm_sched_basic_exit(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + + drm_mock_sched_fini(sched); +} + +static int drm_sched_timeout_init(struct kunit *test) +{ + test->priv = drm_mock_sched_new(test, HZ); + + return 0; +} + +static void drm_sched_basic_submit(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + struct drm_mock_sched_job *job; + unsigned int i; + bool done; + + /* + * Submit one job to the scheduler and verify that it gets scheduled + * and completed only when the mock hw backend processes it. + */ + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + job = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_finished(job, HZ / 2); + KUNIT_ASSERT_FALSE(test, done); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); +} + +struct drm_sched_basic_params { + const char *description; + unsigned int queue_depth; + unsigned int num_entities; + unsigned int job_us; + bool dep_chain; +}; + +static const struct drm_sched_basic_params drm_sched_basic_cases[] = { + { + .description = "A queue of jobs in a single entity", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 1, + }, + { + .description = "A chain of dependent jobs across multiple entities", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 1, + .dep_chain = true, + }, + { + .description = "Multiple independent job queues", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 4, + }, + { + .description = "Multiple inter-dependent job queues", + .queue_depth = 100, + .job_us = 1000, + .num_entities = 4, + .dep_chain = true, + }, +}; + +static void +drm_sched_basic_desc(const struct drm_sched_basic_params *params, char *desc) +{ + strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(drm_sched_basic, drm_sched_basic_cases, drm_sched_basic_desc); + +static void drm_sched_basic_test(struct kunit *test) +{ + const struct drm_sched_basic_params *params = test->param_value; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_job *job, *prev = NULL; + struct drm_mock_sched_entity **entity; + unsigned int i, cur_ent = 0; + bool done; + + entity = kunit_kcalloc(test, params->num_entities, sizeof(*entity), + GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, entity); + + for (i = 0; i < params->num_entities; i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i < params->queue_depth; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= params->num_entities; + drm_mock_sched_job_set_duration_us(job, params->job_us); + if (params->dep_chain && prev) + drm_sched_job_add_dependency(&job->base, + dma_fence_get(&prev->base.s_fence->finished)); + drm_mock_sched_job_submit(job); + prev = job; + } + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + for (i = 0; i < params->num_entities; i++) + drm_mock_sched_entity_free(entity[i]); +} + +static void drm_sched_basic_entity_cleanup(struct kunit *test) +{ + struct drm_mock_sched_job *job, *mid, *prev = NULL; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity[4]; + const unsigned int qd = 100; + unsigned int i, cur_ent = 0; + bool done; + + /* + * Submit a queue of jobs across different entities with an explicit + * chain of dependencies between them and trigger entity cleanup while + * the queue is still being processed. + */ + + for (i = 0; i < ARRAY_SIZE(entity); i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + if (prev) + drm_sched_job_add_dependency(&job->base, + dma_fence_get(&prev->base.s_fence->finished)); + drm_mock_sched_job_submit(job); + if (i == qd / 2) + mid = job; + prev = job; + } + + done = drm_mock_sched_job_wait_finished(mid, HZ); + KUNIT_ASSERT_TRUE(test, done); + + /* Exit with half of the queue still pending to be executed. */ + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static struct kunit_case drm_sched_basic_tests[] = { + KUNIT_CASE(drm_sched_basic_submit), + KUNIT_CASE_PARAM(drm_sched_basic_test, drm_sched_basic_gen_params), + KUNIT_CASE(drm_sched_basic_entity_cleanup), + {} +}; + +static struct kunit_suite drm_sched_basic = { + .name = "drm_sched_basic_tests", + .init = drm_sched_basic_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_basic_tests, +}; + +static void drm_sched_basic_timeout(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + struct drm_mock_sched_job *job; + bool done; + + /* + * Submit a single job against a scheduler with the timeout configured + * and verify that the timeout handling will run if the backend fails + * to complete it in time. + */ + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + job = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_finished(job, HZ / 2); + KUNIT_ASSERT_FALSE(test, done); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, + 0); + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_FALSE(test, done); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, + DRM_MOCK_SCHED_JOB_TIMEDOUT); + + drm_mock_sched_entity_free(entity); +} + +static struct kunit_case drm_sched_timeout_tests[] = { + KUNIT_CASE(drm_sched_basic_timeout), + {} +}; + +static struct kunit_suite drm_sched_timeout = { + .name = "drm_sched_basic_timeout_tests", + .init = drm_sched_timeout_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_timeout_tests, +}; + +static void drm_sched_priorities(struct kunit *test) +{ + struct drm_mock_sched_entity *entity[DRM_SCHED_PRIORITY_COUNT]; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_job *job; + const unsigned int qd = 100; + unsigned int i, cur_ent = 0; + enum drm_sched_priority p; + bool done; + + /* + * Submit a bunch of jobs against entities configured with different + * priorities. + */ + + BUILD_BUG_ON(DRM_SCHED_PRIORITY_KERNEL > DRM_SCHED_PRIORITY_LOW); + BUILD_BUG_ON(ARRAY_SIZE(entity) != DRM_SCHED_PRIORITY_COUNT); + + for (p = DRM_SCHED_PRIORITY_KERNEL; p <= DRM_SCHED_PRIORITY_LOW; p++) + entity[p] = drm_mock_sched_entity_new(test, p, sched); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + drm_mock_sched_job_submit(job); + } + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static void drm_sched_change_priority(struct kunit *test) +{ + struct drm_mock_sched_entity *entity[DRM_SCHED_PRIORITY_COUNT]; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_job *job; + const unsigned int qd = 1000; + unsigned int i, cur_ent = 0; + enum drm_sched_priority p; + + /* + * Submit a bunch of jobs against entities configured with different + * priorities and while waiting for them to complete, periodically keep + * changing their priorities. + * + * We set up the queue-depth (qd) and job duration so the priority + * changing loop has some time to interact with submissions to the + * backend and job completions as they progress. + */ + + for (p = DRM_SCHED_PRIORITY_KERNEL; p <= DRM_SCHED_PRIORITY_LOW; p++) + entity[p] = drm_mock_sched_entity_new(test, p, sched); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + drm_mock_sched_job_submit(job); + } + + do { + drm_sched_entity_set_priority(&entity[cur_ent]->base, + (entity[cur_ent]->base.priority + 1) % + DRM_SCHED_PRIORITY_COUNT); + cur_ent++; + cur_ent %= ARRAY_SIZE(entity); + usleep_range(200, 500); + } while (!drm_mock_sched_job_is_finished(job)); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static struct kunit_case drm_sched_priority_tests[] = { + KUNIT_CASE(drm_sched_priorities), + KUNIT_CASE(drm_sched_change_priority), + {} +}; + +static struct kunit_suite drm_sched_priority = { + .name = "drm_sched_basic_priority_tests", + .init = drm_sched_basic_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_priority_tests, +}; + +static void drm_sched_test_modify_sched(struct kunit *test) +{ + unsigned int i, cur_ent = 0, cur_sched = 0; + struct drm_mock_sched_entity *entity[13]; + struct drm_mock_scheduler *sched[3]; + struct drm_mock_sched_job *job; + const unsigned int qd = 1000; + + /* + * Submit a bunch of jobs against entities configured with different + * schedulers and while waiting for them to complete, periodically keep + * changing schedulers associated with each entity. + * + * We set up the queue-depth (qd) and job duration so the sched modify + * loop has some time to interact with submissions to the backend and + * job completions as they progress. + * + * For the number of schedulers and entities we use primes in order to + * perturb the entity->sched assignments with less of a regular pattern. + */ + + for (i = 0; i < ARRAY_SIZE(sched); i++) + sched[i] = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched[i % ARRAY_SIZE(sched)]); + + for (i = 0; i < qd; i++) { + job = drm_mock_sched_job_new(test, entity[cur_ent++]); + cur_ent %= ARRAY_SIZE(entity); + drm_mock_sched_job_set_duration_us(job, 1000); + drm_mock_sched_job_submit(job); + } + + do { + struct drm_gpu_scheduler *modify; + + usleep_range(200, 500); + cur_ent++; + cur_ent %= ARRAY_SIZE(entity); + cur_sched++; + cur_sched %= ARRAY_SIZE(sched); + modify = &sched[cur_sched]->base; + drm_sched_entity_modify_sched(&entity[cur_ent]->base, &modify, + 1); + } while (!drm_mock_sched_job_is_finished(job)); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); + + for (i = 0; i < ARRAY_SIZE(sched); i++) + drm_mock_sched_fini(sched[i]); +} + +static struct kunit_case drm_sched_modify_sched_tests[] = { + KUNIT_CASE(drm_sched_test_modify_sched), + {} +}; + +static struct kunit_suite drm_sched_modify_sched = { + .name = "drm_sched_basic_modify_sched_tests", + .test_cases = drm_sched_modify_sched_tests, +}; + +static void drm_sched_test_credits(struct kunit *test) +{ + struct drm_mock_sched_entity *entity; + struct drm_mock_scheduler *sched; + struct drm_mock_sched_job *job[2]; + bool done; + int i; + + /* + * Check that the configured credit limit is respected. + */ + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + sched->base.credit_limit = 1; + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + job[0] = drm_mock_sched_job_new(test, entity); + job[1] = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job[0]); + drm_mock_sched_job_submit(job[1]); + + done = drm_mock_sched_job_wait_scheduled(job[0], HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_scheduled(job[1], HZ); + KUNIT_ASSERT_FALSE(test, done); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_scheduled(job[1], HZ); + KUNIT_ASSERT_TRUE(test, done); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job[1], HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); + drm_mock_sched_fini(sched); +} + +static struct kunit_case drm_sched_credits_tests[] = { + KUNIT_CASE(drm_sched_test_credits), + {} +}; + +static struct kunit_suite drm_sched_credits = { + .name = "drm_sched_basic_credits_tests", + .test_cases = drm_sched_credits_tests, +}; + +kunit_test_suites(&drm_sched_basic, + &drm_sched_timeout, + &drm_sched_priority, + &drm_sched_modify_sched, + &drm_sched_credits); diff --git a/drivers/gpu/drm/scheduler/tests/tests_scheduler.c b/drivers/gpu/drm/scheduler/tests/tests_scheduler.c new file mode 100644 index 00000000000000..d70b47d7bf7a31 --- /dev/null +++ b/drivers/gpu/drm/scheduler/tests/tests_scheduler.c @@ -0,0 +1,815 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Valve Corporation */ + +#include +#include +#include + +#include "sched_tests.h" + +/* + * DRM scheduler scheduler tests exercise load balancing decisions ie. entity + * selection logic. + */ + +static int drm_sched_scheduler_init(struct kunit *test) +{ + struct drm_mock_scheduler *sched; + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + sched->base.credit_limit = 1; + + test->priv = sched; + + return 0; +} + +static int drm_sched_scheduler_init2(struct kunit *test) +{ + struct drm_mock_scheduler *sched; + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + sched->base.credit_limit = 2; + + test->priv = sched; + + return 0; +} + +static void drm_sched_scheduler_exit(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + + drm_mock_sched_fini(sched); +} + +static void drm_sched_scheduler_queue_overhead(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + const unsigned int job_us = 1000; + const unsigned int jobs = 1000; + const unsigned int total_us = jobs * job_us; + struct drm_mock_sched_job *job, *first; + ktime_t start, end; + bool done; + int i; + + /* + * Deep queue job at a time processing (single credit). + * + * This measures the overhead of picking and processing a job at a time + * by comparing the ideal total "GPU" time of all submitted jobs versus + * the time actually taken. + */ + + KUNIT_ASSERT_EQ(test, sched->base.credit_limit, 1); + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i <= jobs; i++) { + job = drm_mock_sched_job_new(test, entity); + if (i == 0) + first = job; /* Extra first job blocks the queue */ + else + drm_mock_sched_job_set_duration_us(job, job_us); + drm_mock_sched_job_submit(job); + } + + done = drm_mock_sched_job_wait_scheduled(first, HZ); + KUNIT_ASSERT_TRUE(test, done); + + start = ktime_get(); + i = drm_mock_sched_advance(sched, 1); /* Release the queue */ + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, + usecs_to_jiffies(total_us) * 5); + end = ktime_get(); + KUNIT_ASSERT_TRUE(test, done); + + pr_info("Expected %uus, actual %lldus\n", + total_us, + ktime_to_us(ktime_sub(end, start))); + + drm_mock_sched_entity_free(entity); +} + +static void drm_sched_scheduler_ping_pong(struct kunit *test) +{ + struct drm_mock_sched_job *job, *first, *prev = NULL; + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity[2]; + const unsigned int job_us = 1000; + const unsigned int jobs = 1000; + const unsigned int total_us = jobs * job_us; + ktime_t start, end; + bool done; + int i; + + /* + * Two entitites in inter-dependency chain. + * + * This measures the overhead of picking and processing a job at a time, + * where each job depends on the previous one from the diffferent + * entity, by comparing the ideal total "GPU" time of all submitted jobs + * versus the time actually taken. + */ + + KUNIT_ASSERT_EQ(test, sched->base.credit_limit, 1); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + entity[i] = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i <= jobs; i++) { + job = drm_mock_sched_job_new(test, entity[i & 1]); + if (i == 0) + first = job; /* Extra first job blocks the queue */ + else + drm_mock_sched_job_set_duration_us(job, job_us); + if (prev) + drm_sched_job_add_dependency(&job->base, + dma_fence_get(&prev->base.s_fence->finished)); + drm_mock_sched_job_submit(job); + prev = job; + } + + done = drm_mock_sched_job_wait_scheduled(first, HZ); + KUNIT_ASSERT_TRUE(test, done); + + start = ktime_get(); + i = drm_mock_sched_advance(sched, 1); /* Release the queue */ + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, + usecs_to_jiffies(total_us) * 5); + end = ktime_get(); + KUNIT_ASSERT_TRUE(test, done); + + pr_info("Expected %uus, actual %lldus\n", + total_us, + ktime_to_us(ktime_sub(end, start))); + + for (i = 0; i < ARRAY_SIZE(entity); i++) + drm_mock_sched_entity_free(entity[i]); +} + +static struct kunit_case drm_sched_scheduler_overhead_tests[] = { + KUNIT_CASE_SLOW(drm_sched_scheduler_queue_overhead), + KUNIT_CASE_SLOW(drm_sched_scheduler_ping_pong), + {} +}; + +static struct kunit_suite drm_sched_scheduler_overhead = { + .name = "drm_sched_scheduler_overhead_tests", + .init = drm_sched_scheduler_init, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_overhead_tests, +}; + +struct drm_sched_client_params { + enum drm_sched_priority priority; + unsigned int job_cnt; + unsigned int job_us; + unsigned int wait_us; + bool sync; +}; + +struct drm_sched_test_params { + const char *description; + unsigned int num_clients; + struct drm_sched_client_params client[2]; +}; + +static const struct drm_sched_test_params drm_sched_cases[] = { + { + .description = "Normal and normal", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "Normal and low", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "High and normal", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_HIGH, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "High and low", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_HIGH, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 1, + .job_us = 8000, + .wait_us = 0, + .sync = false, + }, + }, + { + .description = "50 and 50", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1500, + .wait_us = 1500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + }, + { + .description = "50 and 50 low", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1500, + .wait_us = 1500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 1, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + }, + { + .description = "50 high and 50", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_HIGH, + .job_cnt = 1, + .job_us = 1500, + .wait_us = 1500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + }, + { + .description = "Low hog and interactive", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_LOW, + .job_cnt = 3, + .job_us = 2500, + .wait_us = 500, + .sync = false, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 500, + .wait_us = 10000, + .sync = true, + }, + }, + { + .description = "Heavy and interactive", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 3, + .job_us = 2500, + .wait_us = 2500, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1000, + .wait_us = 9000, + .sync = true, + }, + }, + { + .description = "Very heavy and interactive", + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 50000, + .wait_us = 1, + .sync = true, + }, + .client[1] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 1, + .job_us = 1000, + .wait_us = 9000, + .sync = true, + }, + }, +}; + +static void +drm_sched_desc(const struct drm_sched_test_params *params, char *desc) +{ + strscpy(desc, params->description, KUNIT_PARAM_DESC_SIZE); +} + +KUNIT_ARRAY_PARAM(drm_sched_scheduler_two_clients, + drm_sched_cases, + drm_sched_desc); + +struct test_client_stats { + unsigned long min_us; + unsigned long max_us; + unsigned long avg_us; +}; + +struct test_client { + struct kunit *test; + + struct drm_mock_sched_entity *entity; + + struct kthread_worker *worker; + struct kthread_work work; + + unsigned int id; + ktime_t duration; + + struct drm_sched_client_params params; + + ktime_t ideal_duration; + unsigned int cycles; + unsigned int cycle; + ktime_t start; + ktime_t end; + bool done; + + struct test_client_stats cycle_time; + struct test_client_stats latency_time; +}; + +static void +update_stats(struct test_client_stats *stats, unsigned int n, unsigned long us) +{ + if (us > stats->max_us) + stats->max_us = us; + if (us < stats->min_us) + stats->min_us = us; + stats->avg_us = DIV_ROUND_UP(n * stats->avg_us + us, n + 1); +} + +static void drm_sched_client_work(struct kthread_work *work) +{ + struct test_client *client = container_of(work, typeof(*client), work); + const long sync_wait = MAX_SCHEDULE_TIMEOUT; + unsigned int cycle, work_us, period_us; + struct drm_mock_sched_job *job = NULL; + + work_us = client->params.job_cnt * client->params.job_us; + period_us = work_us + client->params.wait_us; + client->cycles = DIV_ROUND_UP(ktime_to_us(client->duration), period_us); + client->ideal_duration = us_to_ktime(client->cycles * period_us); + + client->start = ktime_get(); + + for (cycle = 0; cycle < client->cycles; cycle++) { + unsigned int batch; + unsigned long us; + ktime_t t; + + if (READ_ONCE(client->done)) + break; + + t = ktime_get(); + for (batch = 0; batch < client->params.job_cnt; batch++) { + job = drm_mock_sched_job_new(client->test, + client->entity); + drm_mock_sched_job_set_duration_us(job, + client->params.job_us); + drm_mock_sched_job_submit(job); + } + + if (client->params.sync) + drm_mock_sched_job_wait_finished(job, sync_wait); + + t = ktime_sub(ktime_get(), t); + us = ktime_to_us(t); + update_stats(&client->cycle_time, cycle, us); + if (ktime_to_us(t) >= (long)work_us) + us = ktime_to_us(t) - work_us; + else if (WARN_ON_ONCE(client->params.sync)) + us = 0; + update_stats(&client->latency_time, cycle, us); + WRITE_ONCE(client->cycle, cycle); + + if (READ_ONCE(client->done)) + break; + + if (client->params.wait_us) + fsleep(client->params.wait_us); + else + cond_resched(); + } + + client->done = drm_mock_sched_job_wait_finished(job, sync_wait); + client->end = ktime_get(); +} + +static const char *prio_str(enum drm_sched_priority prio) +{ + switch (prio) { + case DRM_SCHED_PRIORITY_KERNEL: + return "kernel"; + case DRM_SCHED_PRIORITY_LOW: + return "low"; + case DRM_SCHED_PRIORITY_NORMAL: + return "normal"; + case DRM_SCHED_PRIORITY_HIGH: + return "high"; + default: + return "???"; + } +} + +static void drm_sched_scheduler_two_clients_test(struct kunit *test) +{ + const struct drm_sched_test_params *params = test->param_value; + struct drm_mock_scheduler *sched = test->priv; + struct test_client client[2] = { }; + unsigned int prev_cycle[2] = { }; + unsigned int i, j; + ktime_t start; + + /* + * Same job stream from from two clients. + */ + + for (i = 0; i < ARRAY_SIZE(client); i++) + client[i].entity = + drm_mock_sched_entity_new(test, + params->client[i].priority, + sched); + + for (i = 0; i < ARRAY_SIZE(client); i++) { + client[i].test = test; + client[i].id = i; + client[i].duration = ms_to_ktime(1000); + client[i].params = params->client[i]; + client[i].cycle_time.min_us = ~0UL; + client[i].latency_time.min_us = ~0UL; + client[i].worker = + kthread_create_worker(0, "%s-%u", __func__, i); + if (IS_ERR(client[i].worker)) { + for (j = 0; j < i; j++) + kthread_destroy_worker(client[j].worker); + KUNIT_FAIL(test, "Failed to create worker!\n"); + } + + kthread_init_work(&client[i].work, drm_sched_client_work); + } + + for (i = 0; i < ARRAY_SIZE(client); i++) + kthread_queue_work(client[i].worker, &client[i].work); + + /* + * The clients (workers) can be a mix of async (deep submission queue), + * sync (one job at a time), or something in between. Therefore it is + * difficult to display a single metric representing their progress. + * + * Each struct drm_sched_client_params describes the actual submission + * pattern which happens in the following steps: + * 1. Submit N jobs + * 2. Wait for last submitted job to finish + * 3. Sleep for U micro-seconds + * 4. Goto 1. for C cycles + * + * Where number of cycles is calculated to match the target client + * duration from the respective struct drm_sched_test_params. + * + * To asses scheduling behaviour what we output for both clients is: + * - pct: Percentage progress of the jobs submitted + * - cps: "Cycles" per second (where one cycle is one 1.-4. above) + * - qd: Number of outstanding jobs in the client/entity + */ + + start = ktime_get(); + pr_info("%s:\n\t pct1 cps1 qd1; pct2 cps2 qd2\n", + params->description); + while (!READ_ONCE(client[0].done) || !READ_ONCE(client[1].done)) { + unsigned int pct[2], qd[2], cycle[2], cps[2]; + + for (i = 0; i < ARRAY_SIZE(client); i++) { + qd[i] = spsc_queue_count(&client[i].entity->base.job_queue); + cycle[i] = READ_ONCE(client[i].cycle); + cps[i] = DIV_ROUND_UP(1000 * (cycle[i] - prev_cycle[i]), + 100); + if (client[i].cycles) + pct[i] = DIV_ROUND_UP(100 * (1 + cycle[i]), + client[i].cycles); + else + pct[i] = 0; + prev_cycle[i] = cycle[i]; + } + + if (READ_ONCE(client[0].done)) + pr_info("\t+%6lldms: ; %3u %5u %4u\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + pct[1], cps[1], qd[1]); + else if (READ_ONCE(client[1].done)) + pr_info("\t+%6lldms: %3u %5u %4u;\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + pct[0], cps[0], qd[0]); + else + pr_info("\t+%6lldms: %3u %5u %4u; %3u %5u %4u\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + pct[0], cps[0], qd[0], + pct[1], cps[1], qd[1]); + msleep(100); + } + + for (i = 0; i < ARRAY_SIZE(client); i++) { + kthread_flush_work(&client[i].work); + kthread_destroy_worker(client[i].worker); + } + + for (i = 0; i < ARRAY_SIZE(client); i++) + KUNIT_ASSERT_TRUE(test, client[i].done); + + for (i = 0; i < ARRAY_SIZE(client); i++) { + pr_info(" %u: prio=%s sync=%u elapsed_ms=%lldms (ideal_ms=%lldms) cycle_time(min,avg,max)=%lu,%lu,%lu us latency_time(min,avg,max)=%lu,%lu,%lu us", + i, + prio_str(params->client[i].priority), + params->client[i].sync, + ktime_to_ms(ktime_sub(client[i].end, client[i].start)), + ktime_to_ms(client[i].ideal_duration), + client[i].cycle_time.min_us, + client[i].cycle_time.avg_us, + client[i].cycle_time.max_us, + client[i].latency_time.min_us, + client[i].latency_time.avg_us, + client[i].latency_time.max_us); + drm_mock_sched_entity_free(client[i].entity); + } +} + +static const struct kunit_attributes drm_sched_scheduler_two_clients_attr = { + .speed = KUNIT_SPEED_SLOW, +}; + +static struct kunit_case drm_sched_scheduler_two_clients_tests[] = { + KUNIT_CASE_PARAM_ATTR(drm_sched_scheduler_two_clients_test, + drm_sched_scheduler_two_clients_gen_params, + drm_sched_scheduler_two_clients_attr), + {} +}; + +static struct kunit_suite drm_sched_scheduler_two_clients1 = { + .name = "drm_sched_scheduler_two_clients_one_credit_tests", + .init = drm_sched_scheduler_init, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_two_clients_tests, +}; + +static struct kunit_suite drm_sched_scheduler_two_clients2 = { + .name = "drm_sched_scheduler_two_clients_two_credits_tests", + .init = drm_sched_scheduler_init2, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_two_clients_tests, +}; + + +static const struct drm_sched_test_params drm_sched_many_cases[] = { + { + .description = "2 clients", + .num_clients = 2, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "3 clients", + .num_clients = 3, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "7 clients", + .num_clients = 7, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "13 clients", + .num_clients = 13, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 4, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, + { + .description = "31 clients", + .num_clients = 31, + .client[0] = { + .priority = DRM_SCHED_PRIORITY_NORMAL, + .job_cnt = 2, + .job_us = 1000, + .wait_us = 0, + .sync = true, + }, + }, +}; + +KUNIT_ARRAY_PARAM(drm_sched_scheduler_many_clients, + drm_sched_many_cases, + drm_sched_desc); + +static void drm_sched_scheduler_many_clients_test(struct kunit *test) +{ + const struct drm_sched_test_params *params = test->param_value; + struct drm_mock_scheduler *sched = test->priv; + const unsigned int clients = params->num_clients; + unsigned int i, j, delta_total = 0, loops = 0; + struct test_client *client; + unsigned int *prev_cycle; + ktime_t start; + char *buf; + + /* + * Many clients with deep-ish async queues. + */ + + buf = kunit_kmalloc(test, PAGE_SIZE, GFP_KERNEL); + client = kunit_kcalloc(test, clients, sizeof(*client), GFP_KERNEL); + prev_cycle = kunit_kcalloc(test, clients, sizeof(*prev_cycle), + GFP_KERNEL); + + for (i = 0; i < clients; i++) + client[i].entity = + drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + + for (i = 0; i < clients; i++) { + client[i].test = test; + client[i].id = i; + client[i].params = params->client[0]; + client[i].duration = ms_to_ktime(1000 / clients); + client[i].cycle_time.min_us = ~0UL; + client[i].latency_time.min_us = ~0UL; + client[i].worker = + kthread_create_worker(0, "%s-%u", __func__, i); + if (IS_ERR(client[i].worker)) { + for (j = 0; j < i; j++) + kthread_destroy_worker(client[j].worker); + KUNIT_FAIL(test, "Failed to create worker!\n"); + } + + kthread_init_work(&client[i].work, drm_sched_client_work); + } + + for (i = 0; i < clients; i++) + kthread_queue_work(client[i].worker, &client[i].work); + + start = ktime_get(); + pr_info("%u clients:\n\tt\t\tcycle:\t min avg max : ...\n", clients); + for (;;) { + unsigned int min = ~0; + unsigned int max = 0; + unsigned int total = 0; + bool done = true; + char pbuf[16]; + + memset(buf, 0, PAGE_SIZE); + for (i = 0; i < clients; i++) { + unsigned int cycle, cycles; + + cycle = READ_ONCE(client[i].cycle); + cycles = READ_ONCE(client[i].cycles); + + snprintf(pbuf, sizeof(pbuf), " %3d", cycle); + strncat(buf, pbuf, PAGE_SIZE); + + total += cycle; + if (cycle < min) + min = cycle; + if (cycle > max) + max = cycle; + + if (!min || (cycle + 1) < cycles) + done = false; + } + + loops++; + delta_total += max - min; + + pr_info("\t+%6lldms\t\t %3u %3u %3u :%s\n", + ktime_to_ms(ktime_sub(ktime_get(), start)), + min, DIV_ROUND_UP(total, clients), max, buf); + + if (done) + break; + + msleep(100); + } + + pr_info(" avg_max_min_delta(x100)=%u\n", + loops ? DIV_ROUND_UP(delta_total * 100, loops) : 0); + + for (i = 0; i < clients; i++) { + kthread_flush_work(&client[i].work); + kthread_destroy_worker(client[i].worker); + } + + for (i = 0; i < clients; i++) + drm_mock_sched_entity_free(client[i].entity); +} + +static const struct kunit_attributes drm_sched_scheduler_many_clients_attr = { + .speed = KUNIT_SPEED_SLOW, +}; + +static struct kunit_case drm_sched_scheduler_many_clients_tests[] = { + KUNIT_CASE_PARAM_ATTR(drm_sched_scheduler_many_clients_test, + drm_sched_scheduler_many_clients_gen_params, + drm_sched_scheduler_many_clients_attr), + {} +}; + +static struct kunit_suite drm_sched_scheduler_many_clients = { + .name = "drm_sched_scheduler_many_clients_tests", + .init = drm_sched_scheduler_init2, + .exit = drm_sched_scheduler_exit, + .test_cases = drm_sched_scheduler_many_clients_tests, +}; + +kunit_test_suites(&drm_sched_scheduler_overhead, + &drm_sched_scheduler_two_clients1, + &drm_sched_scheduler_two_clients2, + &drm_sched_scheduler_many_clients); diff --git a/drivers/gpu/drm/sti/Makefile b/drivers/gpu/drm/sti/Makefile index f203ac5514ae0b..f778a4eee7c9cf 100644 --- a/drivers/gpu/drm/sti/Makefile +++ b/drivers/gpu/drm/sti/Makefile @@ -7,8 +7,6 @@ sti-drm-y := \ sti_compositor.o \ sti_crtc.o \ sti_plane.o \ - sti_crtc.o \ - sti_plane.o \ sti_hdmi.o \ sti_hdmi_tx3g4c28phy.o \ sti_dvo.o \ diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 1e8ec50b759e46..ff5a749710db3a 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -200,6 +200,11 @@ static const struct drm_encoder_helper_funcs tegra_rgb_encoder_helper_funcs = { .atomic_check = tegra_rgb_encoder_atomic_check, }; +static void tegra_dc_of_node_put(void *data) +{ + of_node_put(data); +} + int tegra_dc_rgb_probe(struct tegra_dc *dc) { struct device_node *np; @@ -207,7 +212,14 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc) int err; np = of_get_child_by_name(dc->dev->of_node, "rgb"); - if (!np || !of_device_is_available(np)) + if (!np) + return -ENODEV; + + err = devm_add_action_or_reset(dc->dev, tegra_dc_of_node_put, np); + if (err < 0) + return err; + + if (!of_device_is_available(np)) return -ENODEV; rgb = devm_kzalloc(dc->dev, sizeof(*rgb), GFP_KERNEL); diff --git a/drivers/gpu/drm/tests/drm_client_modeset_test.c b/drivers/gpu/drm/tests/drm_client_modeset_test.c index 7516f6cb36e4e3..b2fdb1a774fe69 100644 --- a/drivers/gpu/drm/tests/drm_client_modeset_test.c +++ b/drivers/gpu/drm/tests/drm_client_modeset_test.c @@ -95,6 +95,9 @@ static void drm_test_pick_cmdline_res_1920_1080_60(struct kunit *test) expected_mode = drm_mode_find_dmt(priv->drm, 1920, 1080, 60, false); KUNIT_ASSERT_NOT_NULL(test, expected_mode); + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_TRUE(test, drm_mode_parse_command_line_for_connector(cmdline, connector, @@ -129,7 +132,8 @@ static void drm_test_pick_cmdline_named(struct kunit *test) struct drm_device *drm = priv->drm; struct drm_connector *connector = &priv->connector; struct drm_cmdline_mode *cmdline_mode = &connector->cmdline_mode; - const struct drm_display_mode *expected_mode, *mode; + const struct drm_display_mode *mode; + struct drm_display_mode *expected_mode; const char *cmdline = params->cmdline; int ret; @@ -149,6 +153,9 @@ static void drm_test_pick_cmdline_named(struct kunit *test) expected_mode = params->func(drm); KUNIT_ASSERT_NOT_NULL(test, expected_mode); + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected_mode, mode)); } diff --git a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c index 59c8408c453c2e..1cfcb597b088b4 100644 --- a/drivers/gpu/drm/tests/drm_cmdline_parser_test.c +++ b/drivers/gpu/drm/tests/drm_cmdline_parser_test.c @@ -7,6 +7,7 @@ #include #include +#include #include static const struct drm_connector no_connector = {}; @@ -955,8 +956,15 @@ struct drm_cmdline_tv_option_test { static void drm_test_cmdline_tv_options(struct kunit *test) { const struct drm_cmdline_tv_option_test *params = test->param_value; - const struct drm_display_mode *expected_mode = params->mode_fn(NULL); + struct drm_display_mode *expected_mode; struct drm_cmdline_mode mode = { }; + int ret; + + expected_mode = params->mode_fn(NULL); + KUNIT_ASSERT_NOT_NULL(test, expected_mode); + + ret = drm_kunit_add_mode_destroy_action(test, expected_mode); + KUNIT_ASSERT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, drm_mode_parse_command_line_for_connector(params->cmdline, &no_connector, &mode)); diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index fd4215e2f982d2..925fbc2cda700a 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -216,6 +216,9 @@ static void drm_gem_shmem_test_get_pages_sgt(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sgt); KUNIT_EXPECT_NULL(test, shmem->sgt); + ret = kunit_add_action_or_reset(test, kfree_wrapper, sgt); + KUNIT_ASSERT_EQ(test, ret, 0); + ret = kunit_add_action_or_reset(test, sg_free_table_wrapper, sgt); KUNIT_ASSERT_EQ(test, ret, 0); diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c index a4eb68f0decca1..6f6616cf496683 100644 --- a/drivers/gpu/drm/tests/drm_kunit_helpers.c +++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c @@ -278,6 +278,28 @@ static void kunit_action_drm_mode_destroy(void *ptr) drm_mode_destroy(NULL, mode); } +/** + * drm_kunit_add_mode_destroy_action() - Add a drm_destroy_mode kunit action + * @test: The test context object + * @mode: The drm_display_mode to destroy eventually + * + * Registers a kunit action that will destroy the drm_display_mode at + * the end of the test. + * + * If an error occurs, the drm_display_mode will be destroyed. + * + * Returns: + * 0 on success, an error code otherwise. + */ +int drm_kunit_add_mode_destroy_action(struct kunit *test, + struct drm_display_mode *mode) +{ + return kunit_add_action_or_reset(test, + kunit_action_drm_mode_destroy, + mode); +} +EXPORT_SYMBOL_GPL(drm_kunit_add_mode_destroy_action); + /** * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC for a KUnit test * @test: The test context object diff --git a/drivers/gpu/drm/tests/drm_modes_test.c b/drivers/gpu/drm/tests/drm_modes_test.c index 6ed51f99e133c9..f5b20f92df8be7 100644 --- a/drivers/gpu/drm/tests/drm_modes_test.c +++ b/drivers/gpu/drm/tests/drm_modes_test.c @@ -40,6 +40,7 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_NTSC, @@ -47,6 +48,9 @@ static void drm_test_modes_analog_tv_ntsc_480i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 60); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); @@ -70,6 +74,7 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *expected, *mode; + int ret; expected = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_NTSC, @@ -77,9 +82,15 @@ static void drm_test_modes_analog_tv_ntsc_480i_inlined(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, expected); + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); + mode = drm_mode_analog_ntsc_480i(priv->drm); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode)); } @@ -87,6 +98,7 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_PAL, @@ -94,6 +106,9 @@ static void drm_test_modes_analog_tv_pal_576i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 50); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); @@ -117,6 +132,7 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *expected, *mode; + int ret; expected = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_PAL, @@ -124,9 +140,15 @@ static void drm_test_modes_analog_tv_pal_576i_inlined(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, expected); + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); + mode = drm_mode_analog_pal_576i(priv->drm); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected, mode)); } @@ -134,6 +156,7 @@ static void drm_test_modes_analog_tv_mono_576i(struct kunit *test) { struct drm_test_modes_priv *priv = test->priv; struct drm_display_mode *mode; + int ret; mode = drm_analog_tv_mode(priv->drm, DRM_MODE_TV_MODE_MONOCHROME, @@ -141,6 +164,9 @@ static void drm_test_modes_analog_tv_mono_576i(struct kunit *test) true); KUNIT_ASSERT_NOT_NULL(test, mode); + ret = drm_kunit_add_mode_destroy_action(test, mode); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, drm_mode_vrefresh(mode), 50); KUNIT_EXPECT_EQ(test, mode->hdisplay, 720); diff --git a/drivers/gpu/drm/tests/drm_probe_helper_test.c b/drivers/gpu/drm/tests/drm_probe_helper_test.c index bc09ff38aca18e..db0e4f5df275e8 100644 --- a/drivers/gpu/drm/tests/drm_probe_helper_test.c +++ b/drivers/gpu/drm/tests/drm_probe_helper_test.c @@ -98,7 +98,7 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) struct drm_connector *connector = &priv->connector; struct drm_cmdline_mode *cmdline = &connector->cmdline_mode; struct drm_display_mode *mode; - const struct drm_display_mode *expected; + struct drm_display_mode *expected; size_t len; int ret; @@ -134,6 +134,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected)); KUNIT_EXPECT_TRUE(test, mode->type & DRM_MODE_TYPE_PREFERRED); + + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); } if (params->num_expected_modes >= 2) { @@ -145,6 +148,9 @@ drm_test_connector_helper_tv_get_modes_check(struct kunit *test) KUNIT_EXPECT_TRUE(test, drm_mode_equal(mode, expected)); KUNIT_EXPECT_FALSE(test, mode->type & DRM_MODE_TYPE_PREFERRED); + + ret = drm_kunit_add_mode_destroy_action(test, expected); + KUNIT_ASSERT_EQ(test, ret, 0); } mutex_unlock(&priv->drm->mode_config.mutex); diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c index 0460ecaef4bd98..23914a9f7fd376 100644 --- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c +++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c @@ -390,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, drm); - drm_client_setup(drm, NULL); + if (bpp == 16) + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565); + else + drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888); return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index 93c007f18855d2..9e2d72c447eec9 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -7,20 +7,6 @@ #include #include -/* - * Casting from randomized struct file * to struct ttm_backup * is fine since - * struct ttm_backup is never defined nor dereferenced. - */ -static struct file *ttm_backup_to_file(struct ttm_backup *backup) -{ - return (void *)backup; -} - -static struct ttm_backup *ttm_file_to_backup(struct file *file) -{ - return (void *)file; -} - /* * Need to map shmem indices to handle since a handle value * of 0 means error, following the swp_entry_t convention. @@ -40,12 +26,12 @@ static pgoff_t ttm_backup_handle_to_shmem_idx(pgoff_t handle) * @backup: The struct backup pointer used to obtain the handle * @handle: The handle obtained from the @backup_page function. */ -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) +void ttm_backup_drop(struct file *backup, pgoff_t handle) { loff_t start = ttm_backup_handle_to_shmem_idx(handle); start <<= PAGE_SHIFT; - shmem_truncate_range(file_inode(ttm_backup_to_file(backup)), start, + shmem_truncate_range(file_inode(backup), start, start + PAGE_SIZE - 1); } @@ -55,16 +41,15 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * @backup: The struct backup pointer used to back up the page. * @dst: The struct page to copy into. * @handle: The handle returned when the page was backed up. - * @intr: Try to perform waits interruptable or at least killable. + * @intr: Try to perform waits interruptible or at least killable. * * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. */ -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; struct folio *from_folio; pgoff_t idx = ttm_backup_handle_to_shmem_idx(handle); @@ -106,12 +91,11 @@ int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, * the folio size- and usage. */ s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; unsigned long handle = 0; struct folio *to_folio; int ret; @@ -161,9 +145,9 @@ ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, * * After a call to this function, it's illegal to use the @backup pointer. */ -void ttm_backup_fini(struct ttm_backup *backup) +void ttm_backup_fini(struct file *backup) { - fput(ttm_backup_to_file(backup)); + fput(backup); } /** @@ -194,14 +178,10 @@ EXPORT_SYMBOL_GPL(ttm_backup_bytes_avail); * * Create a backup utilizing shmem objects. * - * Return: A pointer to a struct ttm_backup on success, + * Return: A pointer to a struct file on success, * an error pointer on error. */ -struct ttm_backup *ttm_backup_shmem_create(loff_t size) +struct file *ttm_backup_shmem_create(loff_t size) { - struct file *filp; - - filp = shmem_file_setup("ttm shmem backup", size, 0); - - return ttm_file_to_backup(filp); + return shmem_file_setup("ttm shmem backup", size, 0); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 95b86003c50ded..5bf3c969907c6a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1093,7 +1093,8 @@ struct ttm_bo_swapout_walk { struct ttm_lru_walk walk; /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ gfp_t gfp_flags; - + /** @hit_low: Whether we should attempt to swap BO's with low watermark threshold */ + /** @evict_low: If we cannot swap a bo when @try_low is false (first pass) */ bool hit_low, evict_low; }; diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 83b10706ba896b..c2ea865be65720 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -506,7 +506,7 @@ static void ttm_pool_allocated_page_commit(struct page *allocated, * if successful, populate the page-table and dma-address arrays. */ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, - struct ttm_backup *backup, + struct file *backup, const struct ttm_operation_ctx *ctx, struct ttm_pool_alloc_state *alloc) @@ -655,7 +655,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; pgoff_t i, nr; for (i = start_page; i < end_page; i += nr, pages += nr) { @@ -963,7 +963,7 @@ void ttm_pool_drop_backed_up(struct ttm_tt *tt) long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, const struct ttm_backup_flags *flags) { - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; struct page *page; unsigned long handle; gfp_t alloc_gfp; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index df0aa6c4b8b8c1..698cd4bf5e4648 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL(ttm_tt_pages_limit); */ int ttm_tt_setup_backup(struct ttm_tt *tt) { - struct ttm_backup *backup = + struct file *backup = ttm_backup_shmem_create(((loff_t)tt->num_pages) << PAGE_SHIFT); if (WARN_ON_ONCE(!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))) diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 76816f2551c100..7e789e181af0ac 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -21,74 +21,74 @@ struct v3d_reg_def { }; static const struct v3d_reg_def v3d_hub_reg_defs[] = { - REGDEF(33, 42, V3D_HUB_AXICFG), - REGDEF(33, 71, V3D_HUB_UIFCFG), - REGDEF(33, 71, V3D_HUB_IDENT0), - REGDEF(33, 71, V3D_HUB_IDENT1), - REGDEF(33, 71, V3D_HUB_IDENT2), - REGDEF(33, 71, V3D_HUB_IDENT3), - REGDEF(33, 71, V3D_HUB_INT_STS), - REGDEF(33, 71, V3D_HUB_INT_MSK_STS), - - REGDEF(33, 71, V3D_MMU_CTL), - REGDEF(33, 71, V3D_MMU_VIO_ADDR), - REGDEF(33, 71, V3D_MMU_VIO_ID), - REGDEF(33, 71, V3D_MMU_DEBUG_INFO), - - REGDEF(71, 71, V3D_GMP_STATUS(71)), - REGDEF(71, 71, V3D_GMP_CFG(71)), - REGDEF(71, 71, V3D_GMP_VIO_ADDR(71)), + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_HUB_AXICFG), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_UIFCFG), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT0), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT1), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT2), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_IDENT3), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_INT_STS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_HUB_INT_MSK_STS), + + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_CTL), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_VIO_ADDR), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_VIO_ID), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_MMU_DEBUG_INFO), + + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_GMP_STATUS(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_GMP_CFG(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_GMP_VIO_ADDR(71)), }; static const struct v3d_reg_def v3d_gca_reg_defs[] = { - REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN), - REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK), + REGDEF(V3D_GEN_33, V3D_GEN_33, V3D_GCA_SAFE_SHUTDOWN), + REGDEF(V3D_GEN_33, V3D_GEN_33, V3D_GCA_SAFE_SHUTDOWN_ACK), }; static const struct v3d_reg_def v3d_core_reg_defs[] = { - REGDEF(33, 71, V3D_CTL_IDENT0), - REGDEF(33, 71, V3D_CTL_IDENT1), - REGDEF(33, 71, V3D_CTL_IDENT2), - REGDEF(33, 71, V3D_CTL_MISCCFG), - REGDEF(33, 71, V3D_CTL_INT_STS), - REGDEF(33, 71, V3D_CTL_INT_MSK_STS), - REGDEF(33, 71, V3D_CLE_CT0CS), - REGDEF(33, 71, V3D_CLE_CT0CA), - REGDEF(33, 71, V3D_CLE_CT0EA), - REGDEF(33, 71, V3D_CLE_CT1CS), - REGDEF(33, 71, V3D_CLE_CT1CA), - REGDEF(33, 71, V3D_CLE_CT1EA), - - REGDEF(33, 71, V3D_PTB_BPCA), - REGDEF(33, 71, V3D_PTB_BPCS), - - REGDEF(33, 42, V3D_GMP_STATUS(33)), - REGDEF(33, 42, V3D_GMP_CFG(33)), - REGDEF(33, 42, V3D_GMP_VIO_ADDR(33)), - - REGDEF(33, 71, V3D_ERR_FDBGO), - REGDEF(33, 71, V3D_ERR_FDBGB), - REGDEF(33, 71, V3D_ERR_FDBGS), - REGDEF(33, 71, V3D_ERR_STAT), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_IDENT0), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_IDENT1), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_IDENT2), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_MISCCFG), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_INT_STS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CTL_INT_MSK_STS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT0CS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT0CA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT0EA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT1CS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT1CA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_CLE_CT1EA), + + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_PTB_BPCA), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_PTB_BPCS), + + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_GMP_STATUS(33)), + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_GMP_CFG(33)), + REGDEF(V3D_GEN_33, V3D_GEN_42, V3D_GMP_VIO_ADDR(33)), + + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_FDBGO), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_FDBGB), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_FDBGS), + REGDEF(V3D_GEN_33, V3D_GEN_71, V3D_ERR_STAT), }; static const struct v3d_reg_def v3d_csd_reg_defs[] = { - REGDEF(41, 71, V3D_CSD_STATUS), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG0(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG1(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG2(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG3(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG4(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG5(41)), - REGDEF(41, 42, V3D_CSD_CURRENT_CFG6(41)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG0(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG1(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG2(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG3(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG4(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG5(71)), - REGDEF(71, 71, V3D_CSD_CURRENT_CFG6(71)), - REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG7), + REGDEF(V3D_GEN_41, V3D_GEN_71, V3D_CSD_STATUS), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG0(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG1(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG2(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG3(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG4(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG5(41)), + REGDEF(V3D_GEN_41, V3D_GEN_42, V3D_CSD_CURRENT_CFG6(41)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG0(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG1(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG2(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG3(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG4(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG5(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_CSD_CURRENT_CFG6(71)), + REGDEF(V3D_GEN_71, V3D_GEN_71, V3D_V7_CSD_CURRENT_CFG7), }; static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) @@ -164,7 +164,7 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) str_yes_no(ident2 & V3D_HUB_IDENT2_WITH_MMU)); seq_printf(m, "TFU: %s\n", str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TFU)); - if (v3d->ver <= 42) { + if (v3d->ver <= V3D_GEN_42) { seq_printf(m, "TSY: %s\n", str_yes_no(ident1 & V3D_HUB_IDENT1_WITH_TSY)); } @@ -196,11 +196,11 @@ static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) seq_printf(m, " QPUs: %d\n", nslc * qups); seq_printf(m, " Semaphores: %d\n", V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM)); - if (v3d->ver <= 42) { + if (v3d->ver <= V3D_GEN_42) { seq_printf(m, " BCG int: %d\n", (ident2 & V3D_IDENT2_BCG_INT) != 0); } - if (v3d->ver < 40) { + if (v3d->ver < V3D_GEN_41) { seq_printf(m, " Override TMU: %d\n", (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); } @@ -234,7 +234,7 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) int core = 0; int measure_ms = 1000; - if (v3d->ver >= 40) { + if (v3d->ver >= V3D_GEN_41) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, V3D_SET_FIELD_VER(cycle_count_reg, diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 852015214e971c..aa68be8fe86b71 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -92,7 +93,7 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, args->value = 1; return 0; case DRM_V3D_PARAM_SUPPORTS_PERFMON: - args->value = (v3d->ver >= 40); + args->value = (v3d->ver >= V3D_GEN_41); return 0; case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT: args->value = 1; @@ -254,10 +255,10 @@ static const struct drm_driver v3d_drm_driver = { }; static const struct of_device_id v3d_of_match[] = { - { .compatible = "brcm,2711-v3d" }, - { .compatible = "brcm,2712-v3d" }, - { .compatible = "brcm,7268-v3d" }, - { .compatible = "brcm,7278-v3d" }, + { .compatible = "brcm,2711-v3d", .data = (void *)V3D_GEN_42 }, + { .compatible = "brcm,2712-v3d", .data = (void *)V3D_GEN_71 }, + { .compatible = "brcm,7268-v3d", .data = (void *)V3D_GEN_33 }, + { .compatible = "brcm,7278-v3d", .data = (void *)V3D_GEN_41 }, {}, }; MODULE_DEVICE_TABLE(of, v3d_of_match); @@ -274,6 +275,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct drm_device *drm; struct v3d_dev *v3d; + enum v3d_gen gen; int ret; u32 mmu_debug; u32 ident1, ident3; @@ -287,6 +289,9 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drm); + gen = (uintptr_t)of_device_get_match_data(dev); + v3d->ver = gen; + ret = map_regs(v3d, &v3d->hub_regs, "hub"); if (ret) return ret; @@ -316,6 +321,11 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) ident1 = V3D_READ(V3D_HUB_IDENT1); v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV)); + /* Make sure that the V3D tech version retrieved from the HW is equal + * to the one advertised by the device tree. + */ + WARN_ON(v3d->ver != gen); + v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ @@ -340,7 +350,7 @@ static int v3d_platform_drm_probe(struct platform_device *pdev) } } - if (v3d->ver < 41) { + if (v3d->ver < V3D_GEN_41) { ret = map_regs(v3d, &v3d->gca_regs, "gca"); if (ret) goto clk_disable; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 9deaefa0f95b71..de4a9e18f6a903 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -94,11 +94,18 @@ struct v3d_perfmon { u64 values[] __counted_by(ncounters); }; +enum v3d_gen { + V3D_GEN_33 = 33, + V3D_GEN_41 = 41, + V3D_GEN_42 = 42, + V3D_GEN_71 = 71, +}; + struct v3d_dev { struct drm_device drm; /* Short representation (e.g. 33, 41) of the V3D tech version */ - int ver; + enum v3d_gen ver; /* Short representation (e.g. 5, 6) of the V3D tech revision */ int rev; @@ -199,7 +206,7 @@ to_v3d_dev(struct drm_device *dev) static inline bool v3d_has_csd(struct v3d_dev *v3d) { - return v3d->ver >= 41; + return v3d->ver >= V3D_GEN_41; } #define v3d_to_pdev(v3d) to_platform_device((v3d)->drm.dev) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index b1e681630ded09..1ea6d3832c2212 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -25,7 +25,7 @@ v3d_init_core(struct v3d_dev *v3d, int core) * type. If you want the default behavior, you can still put * "2" in the indirect texture state's output_type field. */ - if (v3d->ver < 40) + if (v3d->ver < V3D_GEN_41) V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); /* Whenever we flush the L2T cache, we always want to flush @@ -58,7 +58,7 @@ v3d_idle_axi(struct v3d_dev *v3d, int core) static void v3d_idle_gca(struct v3d_dev *v3d) { - if (v3d->ver >= 41) + if (v3d->ver >= V3D_GEN_41) return; V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); @@ -132,13 +132,13 @@ v3d_reset(struct v3d_dev *v3d) static void v3d_flush_l3(struct v3d_dev *v3d) { - if (v3d->ver < 41) { + if (v3d->ver < V3D_GEN_41) { u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); - if (v3d->ver < 33) { + if (v3d->ver < V3D_GEN_33) { V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); } @@ -151,7 +151,7 @@ v3d_flush_l3(struct v3d_dev *v3d) static void v3d_invalidate_l2c(struct v3d_dev *v3d, int core) { - if (v3d->ver > 32) + if (v3d->ver >= V3D_GEN_33) return; V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 72b6a119412fa7..2cca5d3a26a22c 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -143,7 +143,7 @@ v3d_irq(int irq, void *arg) /* We shouldn't be triggering these if we have GMP in * always-allowed mode. */ - if (v3d->ver < 71 && (intsts & V3D_INT_GMPV)) + if (v3d->ver < V3D_GEN_71 && (intsts & V3D_INT_GMPV)) dev_err(v3d->drm.dev, "GMP violation\n"); /* V3D 4.2 wires the hub and core IRQs together, so if we & @@ -186,27 +186,59 @@ v3d_hub_irq(int irq, void *arg) u32 axi_id = V3D_READ(V3D_MMU_VIO_ID); u64 vio_addr = ((u64)V3D_READ(V3D_MMU_VIO_ADDR) << (v3d->va_width - 32)); - static const char *const v3d41_axi_ids[] = { - "L2T", - "PTB", - "PSE", - "TLB", - "CLE", - "TFU", - "MMU", - "GMP", + static const struct { + u32 begin; + u32 end; + const char *client; + } v3d41_axi_ids[] = { + {0x00, 0x20, "L2T"}, + {0x20, 0x21, "PTB"}, + {0x40, 0x41, "PSE"}, + {0x60, 0x80, "TLB"}, + {0x80, 0x88, "CLE"}, + {0xA0, 0xA1, "TFU"}, + {0xC0, 0xE0, "MMU"}, + {0xE0, 0xE1, "GMP"}, + }, v3d71_axi_ids[] = { + {0x00, 0x30, "L2T"}, + {0x30, 0x38, "CLE"}, + {0x38, 0x39, "PTB"}, + {0x39, 0x3A, "PSE"}, + {0x3A, 0x3B, "CSD"}, + {0x40, 0x60, "TLB"}, + {0x60, 0x70, "MMU"}, + {0x7C, 0x7E, "TFU"}, + {0x7F, 0x80, "GMP"}, }; const char *client = "?"; V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL)); - if (v3d->ver >= 41) { - axi_id = axi_id >> 5; - if (axi_id < ARRAY_SIZE(v3d41_axi_ids)) - client = v3d41_axi_ids[axi_id]; + if (v3d->ver >= V3D_GEN_71) { + size_t i; + + axi_id = axi_id & 0x7F; + for (i = 0; i < ARRAY_SIZE(v3d71_axi_ids); i++) { + if (axi_id >= v3d71_axi_ids[i].begin && + axi_id < v3d71_axi_ids[i].end) { + client = v3d71_axi_ids[i].client; + break; + } + } + } else if (v3d->ver >= V3D_GEN_41) { + size_t i; + + axi_id = axi_id & 0xFF; + for (i = 0; i < ARRAY_SIZE(v3d41_axi_ids); i++) { + if (axi_id >= v3d41_axi_ids[i].begin && + axi_id < v3d41_axi_ids[i].end) { + client = v3d41_axi_ids[i].client; + break; + } + } } - dev_err(v3d->drm.dev, "MMU error from client %s (%d) at 0x%llx%s%s%s\n", + dev_err(v3d->drm.dev, "MMU error from client %s (0x%x) at 0x%llx%s%s%s\n", client, axi_id, (long long)vio_addr, ((intsts & V3D_HUB_INT_MMU_WRV) ? ", write violation" : ""), @@ -217,7 +249,7 @@ v3d_hub_irq(int irq, void *arg) status = IRQ_HANDLED; } - if (v3d->ver >= 71 && (intsts & V3D_V7_HUB_INT_GMPV)) { + if (v3d->ver >= V3D_GEN_71 && (intsts & V3D_V7_HUB_INT_GMPV)) { dev_err(v3d->drm.dev, "GMP Violation\n"); status = IRQ_HANDLED; } diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 3ebda2fa46fc47..9a3fe52558746e 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -200,10 +200,10 @@ void v3d_perfmon_init(struct v3d_dev *v3d) const struct v3d_perf_counter_desc *counters = NULL; unsigned int max = 0; - if (v3d->ver >= 71) { + if (v3d->ver >= V3D_GEN_71) { counters = v3d_v71_performance_counters; max = ARRAY_SIZE(v3d_v71_performance_counters); - } else if (v3d->ver >= 42) { + } else if (v3d->ver >= V3D_GEN_42) { counters = v3d_v42_performance_counters; max = ARRAY_SIZE(v3d_v42_performance_counters); } diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 34c42d6e12cde6..35f131a46d0701 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -357,11 +357,11 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica); V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua); V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa); - if (v3d->ver >= 71) + if (v3d->ver >= V3D_GEN_71) V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc); V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios); V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]); - if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { + if (v3d->ver >= V3D_GEN_71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]); V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]); V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]); @@ -412,7 +412,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) * * XXX: Set the CFG7 register */ - if (v3d->ver >= 71) + if (v3d->ver >= V3D_GEN_71) V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); /* CFG0 write kicks off the job. */ @@ -428,7 +428,8 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]); struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect); struct drm_v3d_submit_csd *args = &indirect_csd->job->args; - u32 *wg_counts; + struct v3d_dev *v3d = job->base.v3d; + u32 num_batches, *wg_counts; v3d_get_bo_vaddr(bo); v3d_get_bo_vaddr(indirect); @@ -441,8 +442,17 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job) args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; - args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) * - (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; + + num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) * + (wg_counts[0] * wg_counts[1] * wg_counts[2]); + + /* V3D 7.1.6 and later don't subtract 1 from the number of batches */ + if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6)) + args->cfg[4] = num_batches - 1; + else + args->cfg[4] = num_batches; + + WARN_ON(args->cfg[4] == ~0); for (int i = 0; i < 3; i++) { /* 0xffffffff indicates that the uniform rewrite is not needed */ @@ -734,11 +744,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -/* If the current address or return address have changed, then the GPU - * has probably made progress and we should delay the reset. This - * could fail if the GPU got in an infinite loop in the CL, but that - * is pretty unlikely outside of an i-g-t testcase. - */ +static void +v3d_sched_skip_reset(struct drm_sched_job *sched_job) +{ + struct drm_gpu_scheduler *sched = sched_job->sched; + + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); +} + static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -748,9 +763,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); + /* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { *timedout_ctca = ctca; *timedout_ctra = ctra; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -790,11 +812,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); - /* If we've made progress, skip reset and let the timer get - * rearmed. + /* If we've made progress, skip reset, add the job to the pending + * list, and let the timer get rearmed. */ if (job->timedout_batches != batches) { job->timedout_batches = batches; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c index e70d7c3076acf1..f0ddc223c1f839 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock_output.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock_output.c @@ -75,24 +75,30 @@ int vc4_mock_atomic_add_output(struct kunit *test, int ret; encoder = vc4_find_encoder_by_type(drm, type); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder); + if (!encoder) + return -ENODEV; crtc = vc4_find_crtc_for_encoder(test, drm, encoder); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc); + if (!crtc) + return -ENODEV; output = encoder_to_vc4_dummy_output(encoder); conn = &output->connector; conn_state = drm_atomic_get_connector_state(state, conn); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + if (IS_ERR(conn_state)) + return PTR_ERR(conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, crtc); - KUNIT_EXPECT_EQ(test, ret, 0); + if (ret) + return ret; crtc_state = drm_atomic_get_crtc_state(state, crtc); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); ret = drm_atomic_set_mode_for_crtc(crtc_state, &default_mode); - KUNIT_EXPECT_EQ(test, ret, 0); + if (ret) + return ret; crtc_state->active = true; @@ -113,26 +119,32 @@ int vc4_mock_atomic_del_output(struct kunit *test, int ret; encoder = vc4_find_encoder_by_type(drm, type); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, encoder); + if (!encoder) + return -ENODEV; crtc = vc4_find_crtc_for_encoder(test, drm, encoder); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc); + if (!crtc) + return -ENODEV; crtc_state = drm_atomic_get_crtc_state(state, crtc); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, crtc_state); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); crtc_state->active = false; ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL); - KUNIT_ASSERT_EQ(test, ret, 0); + if (ret) + return ret; output = encoder_to_vc4_dummy_output(encoder); conn = &output->connector; conn_state = drm_atomic_get_connector_state(state, conn); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + if (IS_ERR(conn_state)) + return PTR_ERR(conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, NULL); - KUNIT_ASSERT_EQ(test, ret, 0); + if (ret) + return ret; return 0; } diff --git a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c index 992e8f5c5c6ea8..d1f694029169ad 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c +++ b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c @@ -20,7 +20,6 @@ struct pv_muxing_priv { struct vc4_dev *vc4; - struct drm_atomic_state *state; }; static bool check_fifo_conflict(struct kunit *test, @@ -677,18 +676,41 @@ static void drm_vc4_test_pv_muxing(struct kunit *test) { const struct pv_muxing_param *params = test->param_value; const struct pv_muxing_priv *priv = test->priv; - struct drm_atomic_state *state = priv->state; + struct drm_modeset_acquire_ctx ctx; + struct drm_atomic_state *state; + struct drm_device *drm; + struct vc4_dev *vc4; unsigned int i; int ret; + drm_modeset_acquire_init(&ctx, 0); + + vc4 = priv->vc4; + drm = &vc4->base; + +retry: + state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); for (i = 0; i < params->nencoders; i++) { enum vc4_encoder_type enc_type = params->encoders[i]; ret = vc4_mock_atomic_add_output(test, state, enc_type); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_ASSERT_EQ(test, ret, 0); } ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_EXPECT_EQ(test, ret, 0); KUNIT_EXPECT_TRUE(test, @@ -700,33 +722,61 @@ static void drm_vc4_test_pv_muxing(struct kunit *test) KUNIT_EXPECT_TRUE(test, check_channel_for_encoder(test, state, enc_type, params->check_fn)); } + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); } static void drm_vc4_test_pv_muxing_invalid(struct kunit *test) { const struct pv_muxing_param *params = test->param_value; const struct pv_muxing_priv *priv = test->priv; - struct drm_atomic_state *state = priv->state; + struct drm_modeset_acquire_ctx ctx; + struct drm_atomic_state *state; + struct drm_device *drm; + struct vc4_dev *vc4; unsigned int i; int ret; + drm_modeset_acquire_init(&ctx, 0); + + vc4 = priv->vc4; + drm = &vc4->base; + +retry: + state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); + for (i = 0; i < params->nencoders; i++) { enum vc4_encoder_type enc_type = params->encoders[i]; ret = vc4_mock_atomic_add_output(test, state, enc_type); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_ASSERT_EQ(test, ret, 0); } ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry; + } KUNIT_EXPECT_LT(test, ret, 0); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); } static int vc4_pv_muxing_test_init(struct kunit *test) { const struct pv_muxing_param *params = test->param_value; - struct drm_modeset_acquire_ctx ctx; struct pv_muxing_priv *priv; - struct drm_device *drm; struct vc4_dev *vc4; priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL); @@ -737,15 +787,6 @@ static int vc4_pv_muxing_test_init(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vc4); priv->vc4 = vc4; - drm_modeset_acquire_init(&ctx, 0); - - drm = &vc4->base; - priv->state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->state); - - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - return 0; } @@ -800,13 +841,26 @@ static void drm_test_vc5_pv_muxing_bugs_subsequent_crtc_enable(struct kunit *tes drm_modeset_acquire_init(&ctx, 0); drm = &vc4->base; +retry_first: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -823,13 +877,26 @@ static void drm_test_vc5_pv_muxing_bugs_subsequent_crtc_enable(struct kunit *tes ret = drm_atomic_helper_swap_state(state, false); KUNIT_ASSERT_EQ(test, ret, 0); +retry_second: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI1); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -874,16 +941,35 @@ static void drm_test_vc5_pv_muxing_bugs_stable_fifo(struct kunit *test) drm_modeset_acquire_init(&ctx, 0); drm = &vc4->base; +retry_first: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI1); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -908,13 +994,26 @@ static void drm_test_vc5_pv_muxing_bugs_stable_fifo(struct kunit *test) ret = drm_atomic_helper_swap_state(state, false); KUNIT_ASSERT_EQ(test, ret, 0); +retry_second: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_del_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); new_hvs_state = vc4_hvs_get_new_global_state(state); @@ -968,25 +1067,50 @@ drm_test_vc5_pv_muxing_bugs_subsequent_crtc_enable_too_many_crtc_state(struct ku drm_modeset_acquire_init(&ctx, 0); drm = &vc4->base; +retry_first: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI0); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_first; + } KUNIT_ASSERT_EQ(test, ret, 0); - ret = drm_atomic_helper_swap_state(state, false); KUNIT_ASSERT_EQ(test, ret, 0); +retry_second: state = drm_kunit_helper_atomic_state_alloc(test, drm, &ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); ret = vc4_mock_atomic_add_output(test, state, VC4_ENCODER_TYPE_HDMI1); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); ret = drm_atomic_check_only(state); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(&ctx); + if (!ret) + goto retry_second; + } KUNIT_ASSERT_EQ(test, ret, 0); new_vc4_crtc_state = get_vc4_crtc_state_for_encoder(test, state, diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 37238a12baa58a..176aba27b03d36 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -372,13 +372,13 @@ static void vc4_hdmi_handle_hotplug(struct vc4_hdmi *vc4_hdmi, * the lock for now. */ + drm_atomic_helper_connector_hdmi_hotplug(connector, status); + if (status == connector_status_disconnected) { cec_phys_addr_invalidate(vc4_hdmi->cec_adap); return; } - drm_atomic_helper_connector_hdmi_hotplug(connector, status); - cec_s_phys_addr(vc4_hdmi->cec_adap, connector->display_info.source_physical_address, false); @@ -559,12 +559,6 @@ static int vc4_hdmi_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drm_connector_hdmi_audio_init(connector, dev->dev, - &vc4_hdmi_audio_funcs, - 8, false, -1); - if (ret) - return ret; - drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs); /* @@ -2274,6 +2268,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) return ret; } + ret = drm_connector_hdmi_audio_init(&vc4_hdmi->connector, dev, + &vc4_hdmi_audio_funcs, 8, false, + -1); + if (ret) + return ret; + dai_link->cpus = &vc4_hdmi->audio.cpu; dai_link->codecs = &vc4_hdmi->audio.codec; dai_link->platforms = &vc4_hdmi->audio.platform; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index 2d88e390feb468..e32e680c71979e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -128,6 +128,14 @@ static void virtio_gpu_remove(struct virtio_device *vdev) drm_dev_put(dev); } +static void virtio_gpu_shutdown(struct virtio_device *vdev) +{ + /* + * drm does its own synchronization on shutdown. + * Do nothing here, opt out of device reset. + */ +} + static void virtio_gpu_config_changed(struct virtio_device *vdev) { struct drm_device *dev = vdev->priv; @@ -162,6 +170,7 @@ static struct virtio_driver virtio_gpu_driver = { .id_table = id_table, .probe = virtio_gpu_probe, .remove = virtio_gpu_remove, + .shutdown = virtio_gpu_shutdown, .config_changed = virtio_gpu_config_changed }; diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index dde8fc1a36896d..90c99d83c4cfd2 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -115,13 +115,14 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj, if (!vgdev->has_context_init) virtio_gpu_create_context(obj->dev, file); - objs = virtio_gpu_array_alloc(1); - if (!objs) - return -ENOMEM; - virtio_gpu_array_add_obj(objs, obj); + if (vfpriv->context_created) { + objs = virtio_gpu_array_alloc(1); + if (!objs) + return -ENOMEM; + virtio_gpu_array_add_obj(objs, obj); - if (vfpriv->ctx_id) virtio_gpu_cmd_context_attach_resource(vgdev, vfpriv->ctx_id, objs); + } out_notify: virtio_gpu_notify(vgdev); diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index a6f5a78f436a29..87e584add04286 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -366,12 +366,6 @@ static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, return 0; obj = new_state->fb->obj[0]; - if (obj->import_attach) { - ret = virtio_gpu_prepare_imported_obj(plane, new_state, obj); - if (ret) - return ret; - } - if (bo->dumb || obj->import_attach) { vgplane_st->fence = virtio_gpu_fence_alloc(vgdev, vgdev->fence_drv.context, @@ -380,7 +374,21 @@ static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, return -ENOMEM; } + if (obj->import_attach) { + ret = virtio_gpu_prepare_imported_obj(plane, new_state, obj); + if (ret) + goto err_fence; + } + return 0; + +err_fence: + if (vgplane_st->fence) { + dma_fence_put(&vgplane_st->fence->f); + vgplane_st->fence = NULL; + } + + return ret; } static void virtio_gpu_cleanup_imported_obj(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index fe6a0b01857175..4de2a63ccd1894 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -321,6 +321,7 @@ struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, return ERR_PTR(-ENOMEM); obj = &bo->base.base; + obj->resv = buf->resv; obj->funcs = &virtgpu_gem_dma_buf_funcs; drm_gem_private_object_init(dev, obj, buf->size); diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 12034ec1202990..8c9898b9055d4c 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -194,7 +194,7 @@ static int vkms_crtc_atomic_check(struct drm_crtc *crtc, i++; } - vkms_state->active_planes = kcalloc(i, sizeof(plane), GFP_KERNEL); + vkms_state->active_planes = kcalloc(i, sizeof(*vkms_state->active_planes), GFP_KERNEL); if (!vkms_state->active_planes) return -ENOMEM; vkms_state->num_active_planes = i; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 9b5b8c1f063bb7..f30df3dc871fd1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -51,11 +51,13 @@ static void vmw_bo_release(struct vmw_bo *vbo) mutex_lock(&res->dev_priv->cmdbuf_mutex); (void)vmw_resource_reserve(res, false, true); vmw_resource_mob_detach(res); + if (res->dirty) + res->func->dirty_free(res); if (res->coherent) vmw_bo_dirty_release(res->guest_memory_bo); res->guest_memory_bo = NULL; res->guest_memory_offset = 0; - vmw_resource_unreserve(res, false, false, false, NULL, + vmw_resource_unreserve(res, true, false, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -73,9 +75,9 @@ static void vmw_bo_free(struct ttm_buffer_object *bo) { struct vmw_bo *vbo = to_vmw_bo(&bo->base); - WARN_ON(vbo->dirty); WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_release(vbo); + WARN_ON(vbo->dirty); kfree(vbo); } @@ -848,9 +850,9 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo) vmw_bo_placement_set(bo, domain, domain); } -void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +int vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) { - xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL); + return xa_err(xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL)); } void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 11e330c7c7f52b..51790a11fe6494 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -141,7 +141,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); void vmw_bo_swap_notify(struct ttm_buffer_object *bo); -void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +int vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 2e52d73eba4840..ea741bc4ac3fc7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -4086,6 +4086,23 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv, return 0; } +/* + * DMA fence callback to remove a seqno_waiter + */ +struct seqno_waiter_rm_context { + struct dma_fence_cb base; + struct vmw_private *dev_priv; +}; + +static void seqno_waiter_rm_cb(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct seqno_waiter_rm_context *ctx = + container_of(cb, struct seqno_waiter_rm_context, base); + + vmw_seqno_waiter_remove(ctx->dev_priv); + kfree(ctx); +} + int vmw_execbuf_process(struct drm_file *file_priv, struct vmw_private *dev_priv, void __user *user_commands, void *kernel_commands, @@ -4266,6 +4283,15 @@ int vmw_execbuf_process(struct drm_file *file_priv, } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); + struct seqno_waiter_rm_context *ctx = + kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx->dev_priv = dev_priv; + vmw_seqno_waiter_add(dev_priv); + if (dma_fence_add_callback(&fence->base, &ctx->base, + seqno_waiter_rm_cb) < 0) { + vmw_seqno_waiter_remove(dev_priv); + kfree(ctx); + } } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index a73af8a355fbf5..c4d5fe5f330f98 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -273,7 +273,7 @@ int vmw_user_resource_lookup_handle(struct vmw_private *dev_priv, goto out_bad_resource; res = converter->base_obj_to_res(base); - kref_get(&res->kref); + vmw_resource_reference(res); *p_res = res; ret = 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 5721c74da3e0b9..d7a8070330ba54 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -658,7 +658,7 @@ static void vmw_user_surface_free(struct vmw_resource *res) struct vmw_user_surface *user_srf = container_of(srf, struct vmw_user_surface, srf); - WARN_ON_ONCE(res->dirty); + WARN_ON(res->dirty); if (user_srf->master) drm_master_put(&user_srf->master); kfree(srf->offsets); @@ -689,8 +689,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) * Dumb buffers own the resource and they'll unref the * resource themselves */ - if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb) - return; + WARN_ON(res && res->guest_memory_bo && res->guest_memory_bo->is_dumb); vmw_resource_unreference(&res); } @@ -871,7 +870,12 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, vmw_resource_unreference(&res); goto out_unlock; } - vmw_bo_add_detached_resource(res->guest_memory_bo, res); + + ret = vmw_bo_add_detached_resource(res->guest_memory_bo, res); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } } tmp = vmw_resource_reference(&srf->res); @@ -1670,6 +1674,14 @@ vmw_gb_surface_define_internal(struct drm_device *dev, } + if (res->guest_memory_bo) { + ret = vmw_bo_add_detached_resource(res->guest_memory_bo, res); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } + } + tmp = vmw_resource_reference(res); ret = ttm_prime_object_init(tfile, res->guest_memory_size, &user_srf->prime, VMW_RES_SURFACE, @@ -1684,7 +1696,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->handle = user_srf->prime.base.handle; rep->backup_size = res->guest_memory_size; if (res->guest_memory_bo) { - vmw_bo_add_detached_resource(res->guest_memory_bo, res); rep->buffer_map_handle = drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node); rep->buffer_size = res->guest_memory_bo->tbo.base.size; @@ -2358,12 +2369,19 @@ int vmw_dumb_create(struct drm_file *file_priv, vbo = res->guest_memory_bo; vbo->is_dumb = true; vbo->dumb_surface = vmw_res_to_srf(res); - + drm_gem_object_put(&vbo->tbo.base); + /* + * Unset the user surface dtor since this in not actually exposed + * to userspace. The suface is owned via the dumb_buffer's GEM handle + */ + struct vmw_user_surface *usurf = container_of(vbo->dumb_surface, + struct vmw_user_surface, srf); + usurf->prime.base.refcount_release = NULL; err: if (res) vmw_resource_unreference(&res); - if (ret) - ttm_ref_object_base_unref(tfile, arg.rep.handle); + + ttm_ref_object_base_unref(tfile, arg.rep.handle); return ret; } diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 5c2f459a2925a4..9a46aafcb33bae 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -2,6 +2,8 @@ config DRM_XE tristate "Intel Xe Graphics" depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) + depends on INTEL_VSEC || !INTEL_VSEC + depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -27,7 +29,6 @@ config DRM_XE select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI - select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE select IOSF_MBI diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h index a255946b6f77e7..8cfcd3360896c2 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h @@ -41,6 +41,7 @@ #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE BIT(10) /* gen12 */ #define PIPE_CONTROL0_HDC_PIPELINE_FLUSH BIT(9) /* gen12 */ #define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 167fb0f742de7b..5a47991b4b81fc 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1) +#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index fb8ec317b6ee6c..891f928d80ce82 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -43,6 +43,10 @@ #define XEHPC_BCS8_RING_BASE 0x3ee000 #define GSCCS_RING_BASE 0x11a000 +#define ENGINE_ID(base) XE_REG((base) + 0x8c) +#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4) +#define ENGINE_CLASS_ID REG_GENMASK(2, 0) + #define RING_TAIL(base) XE_REG((base) + 0x30) #define TAIL_ADDR REG_GENMASK(20, 3) @@ -154,6 +158,7 @@ #define STOP_RING REG_BIT(8) #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) +#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac) #define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) #define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index da1f198ac107cc..181913967ac9b7 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -157,6 +157,7 @@ #define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) #define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) +#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12) #define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) #define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 57944f90bbf6e9..994af591a2e85e 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -11,7 +11,9 @@ #define CTX_RING_TAIL (0x06 + 1) #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) +#define CTX_BB_PER_CTX_PTR (0x12 + 1) #define CTX_TIMESTAMP (0x22 + 1) +#define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h index 8846eb9ce2a40b..c7d5d782e3f95c 100644 --- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -21,6 +21,9 @@ #define BMG_PACKAGE_POWER_SKU XE_REG(0x138098) #define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc) #define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120) +#define BMG_FAN_1_SPEED XE_REG(0x138140) +#define BMG_FAN_2_SPEED XE_REG(0x138170) +#define BMG_FAN_3_SPEED XE_REG(0x1381a0) #define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0) #define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434) #define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index ef1e5256c56a8a..0e502feaca8186 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt, unsigned int fw_ref, i; u32 reg_val; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n"); + } for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 64f9c936eea063..5922302c3e00cc 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -816,21 +816,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, goto out; } - /* Reject BO eviction if BO is bound to current VM. */ - if (evict && ctx->resv) { - struct drm_gpuvm_bo *vm_bo; - - drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) { - struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); - - if (xe_vm_resv(vm) == ctx->resv && - xe_vm_in_preempt_fence_mode(vm)) { - ret = -EBUSY; - goto out; - } - } - } - /* * Failed multi-hop where the old_mem is still marked as * TTM_PL_FLAG_TEMPORARY, should just be a dummy move. @@ -1023,6 +1008,25 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx, return lret; } +static bool +xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) +{ + struct drm_gpuvm_bo *vm_bo; + + if (!ttm_bo_eviction_valuable(bo, place)) + return false; + + if (!xe_bo_is_xe_bo(bo)) + return true; + + drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) { + if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm))) + return false; + } + + return true; +} + /** * xe_bo_shrink() - Try to shrink an xe bo. * @ctx: The struct ttm_operation_ctx used for shrinking. @@ -1057,7 +1061,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo, (flags.purge && !xe_tt->purgeable)) return -EBUSY; - if (!ttm_bo_eviction_valuable(bo, &place)) + if (!xe_bo_eviction_valuable(bo, &place)) return -EBUSY; if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo)) @@ -1418,7 +1422,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .io_mem_pfn = xe_ttm_io_mem_pfn, .access_memory = xe_ttm_access_memory, .release_notify = xe_ttm_bo_release_notify, - .eviction_valuable = ttm_bo_eviction_valuable, + .eviction_valuable = xe_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, .swap_notify = xe_ttm_bo_swap_notify, }; @@ -2260,6 +2264,8 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) .no_wait_gpu = false, .gfp_retry_mayfail = true, }; + struct pin_cookie cookie; + int ret; if (vm) { lockdep_assert_held(&vm->lock); @@ -2269,8 +2275,12 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } + cookie = xe_vm_set_validating(vm, allow_res_evict); trace_xe_bo_validate(bo); - return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); + xe_vm_clear_validating(vm, allow_res_evict, cookie); + + return ret; } bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo) @@ -2386,7 +2396,7 @@ typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe, u64 value); static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = { - [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type, + [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type, }; static int gem_create_user_ext_set_property(struct xe_device *xe, diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 72ef0b6fc4250f..0564164935c609 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -314,6 +314,8 @@ struct xe_device { u8 has_atomic_enable_pte_bit:1; /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ u8 has_device_atomics_on_smem:1; + /** @info.has_fan_control: Device supports fan control */ + u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; /** @info.has_heci_cscfi: device has heci cscfi */ @@ -330,6 +332,8 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; + /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */ + u8 has_64bit_timestamp:1; /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; /** @@ -585,6 +589,7 @@ struct xe_device { INTEL_DRAM_DDR5, INTEL_DRAM_LPDDR5, INTEL_DRAM_GDDR, + INTEL_DRAM_GDDR_ECC, } type; u8 num_qgv_points; u8 num_psf_gv_points; diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index f67803e15a0e66..f7a20264ea3305 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { - struct dma_buf *dma_buf = attach->dmabuf; - struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv); - - if (!xe_bo_is_vram(bo)) { + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c2b..e2bb156c71fb08 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -208,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; @@ -378,7 +383,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +401,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +516,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +534,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +591,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +727,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f2335664d..d1c76e50379929 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 606922d9dd7302..cd9b1c32f30f80 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); struct xe_lrc *lrc; - u32 old_ts, new_ts; + u64 old_ts, new_ts; int idx; /* diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index fd41113f857251..0bcf97063ff61a 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -555,6 +555,28 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } +void xe_gsc_stop_prepare(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC); + + /* + * If the GSC FW load or the proxy init are interrupted, the only way + * to recover it is to do an FLR and reload the GSC from scratch. + * Therefore, let's wait for the init to complete before stopping + * operations. The proxy init is the last step, so we can just wait on + * that + */ + ret = xe_gsc_wait_for_proxy_init_done(gsc); + if (ret) + xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n"); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index d99f66c38075c4..b8b8e0810ad94a 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -16,6 +16,7 @@ struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_stop_prepare(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 8cf70b228ff3b6..d0519cd6704a11 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) HECI1_FWSTS1_PROXY_STATE_NORMAL; } +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* Proxy init can take up to 500ms, so wait double that for safety */ + return xe_mmio_wait32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_CURRENT_STATE, + HECI1_FWSTS1_PROXY_STATE_NORMAL, + USEC_PER_SEC, NULL, false); +} + static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index fdef56995cd43e..765602221dbcf3 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -12,6 +12,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 10a9e3c72b3604..66198cf2662c57 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -857,7 +857,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_stop_prepare(>->uc); + xe_uc_suspend_prepare(>->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 2d63a69cbfa38e..f7005a3643e627 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -92,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; + int ret = 0; xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_pm_runtime_put(xe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto fw_put; } for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); +fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); - return 0; + return ret; } static int powergate_info(struct xe_gt *gt, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index 604bdc7c817360..552ac92496a408 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -32,13 +32,18 @@ * Xe's Freq provides a sysfs API for frequency management: * * device/tile#/gt#/freq0/_freq *read-only* files: + * * - act_freq: The actual resolved frequency decided by PCODE. * - cur_freq: The current one requested by GuC PC to the PCODE. * - rpn_freq: The Render Performance (RP) N level, which is the minimal one. + * - rpa_freq: The Render Performance (RP) A level, which is the achiveable one. + * Calculated by PCODE at runtime based on multiple running conditions * - rpe_freq: The Render Performance (RP) E level, which is the efficient one. + * Calculated by PCODE at runtime based on multiple running conditions * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one. * * device/tile#/gt#/freq0/_freq *read-write* files: + * * - min_freq: Min frequency request. * - max_freq: Max frequency request. * If max <= min, then freq_min becomes a fixed frequency request. diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index c5ad9a0a89c2b3..0c22b3a3665500 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, XE_MAX_EU_FUSE_BITS) * num_dss; - /* user can issue separate page faults per EU and per CS */ + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; +#undef PF_MULTIPLIER pf_queue->gt = gt; pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 03072e0949917c..084cbdeba8eaa5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -322,6 +322,13 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +/* + * Ensure that roundup_pow_of_two(length) doesn't overflow. + * Note that roundup_pow_of_two() operates on unsigned long, + * not on u64. + */ +#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX)) + /** * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an * address range @@ -346,6 +353,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_device *xe = gt_to_xe(gt); #define MAX_TLB_INVALIDATION_LEN 7 u32 action[MAX_TLB_INVALIDATION_LEN]; + u64 length = end - start; int len = 0; xe_gt_assert(gt, fence); @@ -358,11 +366,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation) { + if (!xe->info.has_range_tlb_invalidation || + length > MAX_RANGE_TLB_INVALIDATION_LENGTH) { action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); } else { u64 orig_start = start; - u64 length = end - start; u64 align; if (length < SZ_4K) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index e7c9e095a19f05..7031542a70cebc 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -490,24 +490,52 @@ static void fill_engine_enable_masks(struct xe_gt *gt, engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER)); } -static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads) +/* + * Write the offsets corresponding to the golden LRCs. The actual data is + * populated later by guc_golden_lrc_populate() + */ +static void guc_golden_lrc_init(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); + struct xe_gt *gt = ads_to_gt(ads); struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), offsetof(struct __guc_ads_blob, system_info)); - u8 guc_class; + size_t alloc_size, real_size; + u32 addr_ggtt, offset; + int class; + + offset = guc_ads_golden_lrc_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { + u8 guc_class; + + guc_class = xe_engine_class_to_guc_class(class); - for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) { if (!info_map_read(xe, &info_map, engine_enabled_masks[guc_class])) continue; + real_size = xe_gt_lrc_size(gt, class); + alloc_size = PAGE_ALIGN(real_size); + + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists LRC registers. This is + * required to allow the GuC to restore just the engine state + * when a watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ ads_blob_write(ads, ads.eng_state_size[guc_class], - guc_ads_golden_lrc_size(ads) - - xe_lrc_skip_size(xe)); + real_size - xe_lrc_skip_size(xe)); ads_blob_write(ads, ads.golden_context_lrca[guc_class], - xe_bo_ggtt_addr(ads->bo) + - guc_ads_golden_lrc_offset(ads)); + addr_ggtt); + + addr_ggtt += alloc_size; } } @@ -857,7 +885,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); guc_policies_init(ads); - guc_prep_golden_lrc_null(ads); + guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); guc_doorbell_init(ads); @@ -883,7 +911,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); - guc_prep_golden_lrc_null(ads); + guc_golden_lrc_init(ads); guc_mapping_table_init(gt, &info_map); guc_capture_prep_lists(ads); guc_doorbell_init(ads); @@ -903,18 +931,22 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_ads_private_data_offset(ads)); } -static void guc_populate_golden_lrc(struct xe_guc_ads *ads) +/* + * After the golden LRC's are recorded for each engine class by the first + * submission, copy them to the ADS, as initialized earlier by + * guc_golden_lrc_init(). + */ +static void guc_golden_lrc_populate(struct xe_guc_ads *ads) { struct xe_device *xe = ads_to_xe(ads); struct xe_gt *gt = ads_to_gt(ads); struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads), offsetof(struct __guc_ads_blob, system_info)); size_t total_size = 0, alloc_size, real_size; - u32 addr_ggtt, offset; + u32 offset; int class; offset = guc_ads_golden_lrc_offset(ads); - addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) { u8 guc_class; @@ -931,26 +963,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) alloc_size = PAGE_ALIGN(real_size); total_size += alloc_size; - /* - * This interface is slightly confusing. We need to pass the - * base address of the full golden context and the size of just - * the engine state, which is the section of the context image - * that starts after the execlists LRC registers. This is - * required to allow the GuC to restore just the engine state - * when a watchdog reset occurs. - * We calculate the engine state size by removing the size of - * what comes before it in the context image (which is identical - * on all engines). - */ - ads_blob_write(ads, ads.eng_state_size[guc_class], - real_size - xe_lrc_skip_size(xe)); - ads_blob_write(ads, ads.golden_context_lrca[guc_class], - addr_ggtt); - xe_map_memcpy_to(xe, ads_to_map(ads), offset, gt->default_lrc[class], real_size); - addr_ggtt += alloc_size; offset += alloc_size; } @@ -959,7 +974,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads) void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads) { - guc_populate_golden_lrc(ads); + guc_golden_lrc_populate(ads); } static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5feb7..9095618648bcbc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index c569ff456e7416..0b102ab46c4df2 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -17,101 +17,130 @@ #include "xe_macros.h" #include "xe_pm.h" -static struct xe_guc *node_to_guc(struct drm_info_node *node) -{ - return node->info_ent->data; -} - -static int guc_info(struct seq_file *m, void *data) +/* + * guc_debugfs_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_guc specific in similar way how we handle &xe_gt + * specific files using &xe_gt_debugfs_simple_show. + * + * It is assumed that those debugfs files will be created on directory entry + * which grandparent struct dentry d_inode->i_private points to &xe_gt. + * + * /sys/kernel/debug/dri/0/ + * ├── gt0 # dent->d_parent->d_parent (d_inode->i_private == gt) + * │   ├── uc # dent->d_parent + * │   │   ├── guc_info # dent + * │   │   ├── guc_... + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_guc *, struct drm_printer *) + * + * Example:: + * + * int foo(struct xe_guc *guc, struct drm_printer *p) + * { + * drm_printf(p, "enabled %d\n", guc->submission_state.enabled); + * return 0; + * } + * + * static const struct drm_info_list bar[] = { + * { name = "foo", .show = guc_debugfs_show, .data = foo }, + * }; + * + * parent = debugfs_create_dir("uc", gtdir); + * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), parent, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +static int guc_debugfs_show(struct seq_file *m, void *data) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct dentry *grandparent = parent->d_parent; + struct xe_gt *gt = grandparent->d_inode->i_private; + struct xe_device *xe = gt_to_xe(gt); + int (*print)(struct xe_guc *, struct drm_printer *) = node->info_ent->data; + int ret; xe_pm_runtime_get(xe); - xe_guc_print_info(guc, &p); + ret = print(>->uc.guc, &p); xe_pm_runtime_put(xe); - return 0; + return ret; } -static int guc_log(struct seq_file *m, void *data) +static int guc_log(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_log_print(&guc->log, &p); - xe_pm_runtime_put(xe); - + xe_guc_log_print(&guc->log, p); return 0; } -static int guc_log_dmesg(struct seq_file *m, void *data) +static int guc_log_dmesg(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - - xe_pm_runtime_get(xe); xe_guc_log_print_dmesg(&guc->log); - xe_pm_runtime_put(xe); - return 0; } -static int guc_ctb(struct seq_file *m, void *data) +static int guc_ctb(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_ct_print(&guc->ct, &p, true); - xe_pm_runtime_put(xe); - + xe_guc_ct_print(&guc->ct, p, true); return 0; } -static int guc_pc(struct seq_file *m, void *data) +static int guc_pc(struct xe_guc *guc, struct drm_printer *p) { - struct xe_guc *guc = node_to_guc(m->private); - struct xe_device *xe = guc_to_xe(guc); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pm_runtime_get(xe); - xe_guc_pc_print(&guc->pc, &p); - xe_pm_runtime_put(xe); - + xe_guc_pc_print(&guc->pc, p); return 0; } -static const struct drm_info_list debugfs_list[] = { - {"guc_info", guc_info, 0}, - {"guc_log", guc_log, 0}, - {"guc_log_dmesg", guc_log_dmesg, 0}, - {"guc_ctb", guc_ctb, 0}, - {"guc_pc", guc_pc, 0}, +/* + * only for GuC debugfs files which can be safely used on the VF as well: + * - without access to the GuC privileged registers + * - without access to the PF specific GuC objects + */ +static const struct drm_info_list vf_safe_debugfs_list[] = { + { "guc_info", .show = guc_debugfs_show, .data = xe_guc_print_info }, + { "guc_ctb", .show = guc_debugfs_show, .data = guc_ctb }, +}; + +/* For GuC debugfs files that require the SLPC support */ +static const struct drm_info_list slpc_debugfs_list[] = { + { "guc_pc", .show = guc_debugfs_show, .data = guc_pc }, +}; + +/* everything else should be added here */ +static const struct drm_info_list pf_only_debugfs_list[] = { + { "guc_log", .show = guc_debugfs_show, .data = guc_log }, + { "guc_log_dmesg", .show = guc_debugfs_show, .data = guc_log_dmesg }, }; void xe_guc_debugfs_register(struct xe_guc *guc, struct dentry *parent) { - struct drm_minor *minor = guc_to_xe(guc)->drm.primary; - struct drm_info_list *local; - int i; - -#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) - local = drmm_kmalloc(&guc_to_xe(guc)->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) - return; + struct xe_device *xe = guc_to_xe(guc); + struct drm_minor *minor = xe->drm.primary; - memcpy(local, debugfs_list, DEBUGFS_SIZE); -#undef DEBUGFS_SIZE + drm_debugfs_create_files(vf_safe_debugfs_list, + ARRAY_SIZE(vf_safe_debugfs_list), + parent, minor); - for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) - local[i].data = guc; + if (!IS_SRIOV_VF(xe)) { + drm_debugfs_create_files(pf_only_debugfs_list, + ARRAY_SIZE(pf_only_debugfs_list), + parent, minor); - drm_debugfs_create_files(local, - ARRAY_SIZE(debugfs_list), - parent, minor); + if (!xe->info.skip_guc_pc) + drm_debugfs_create_files(slpc_debugfs_list, + ARRAY_SIZE(slpc_debugfs_list), + parent, minor); + } } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 85215313976ce4..43b1192ba61cde 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1070,6 +1070,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING, SLPC_RESET_EXTENDED_TIMEOUT_MS)) { xe_gt_err(gt, "GuC PC Start failed: Dynamic GT frequency control and GT sleep states are now disabled.\n"); + ret = -EIO; goto out; } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 31bc2022bfc2d8..769781d577df60 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) return xe_sched_invalidate_job(job, 2); } - ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); + ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0])); ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); /* diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index c3cc0fa105e84a..57b71956ddf42a 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -19,29 +19,6 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/** - * xe_mark_range_accessed() - mark a range is accessed, so core mm - * have such information for memory eviction or write back to - * hard disk - * @range: the range to mark - * @write: if write to this range, we mark pages in this range - * as dirty - */ -static void xe_mark_range_accessed(struct hmm_range *range, bool write) -{ - struct page *page; - u64 i, npages; - - npages = xe_npages_in_range(range->start, range->end); - for (i = 0; i < npages; i++) { - page = hmm_pfn_to_page(range->hmm_pfns[i]); - if (write) - set_page_dirty_lock(page); - - mark_page_accessed(page); - } -} - static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, struct hmm_range *range, struct rw_semaphore *notifier_sem) { @@ -331,7 +308,6 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, if (ret) goto out_unlock; - xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; xe_hmm_userptr_set_mapped(uvma); userptr->notifier_seq = hmm_range.notifier_seq; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 8c05fd30b7df61..93241fd0a4ba3b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -389,12 +389,6 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) blit_cctl_val, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - /* Use Fixed slice CCS mode */ - { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), - XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), - XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, - RCU_MODE_FIXED_SLICE_CCS_MODE)) - }, /* Disable WMTP if HW doesn't support it */ { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), @@ -461,6 +455,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + /* Use Fixed slice CCS mode */ + { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"), + XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)), + XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, + RCU_MODE_FIXED_SLICE_CCS_MODE)) + }, }; xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index b53e8d2accdbd7..a440442b4d7270 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -32,14 +32,61 @@ bool xe_hw_engine_timeout_in_range(u64 timeout, u64 min, u64 max) return timeout >= min && timeout <= max; } -static void kobj_xe_hw_engine_release(struct kobject *kobj) +static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) { kfree(kobj); } +static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) { + xe_pm_runtime_get(xe); + ret = kattr->show(kobj, kattr, buf); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) { + xe_pm_runtime_get(xe); + ret = kattr->store(kobj, kattr, buf, count); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { + .show = xe_hw_engine_class_sysfs_attr_show, + .store = xe_hw_engine_class_sysfs_attr_store, +}; + static const struct kobj_type kobj_xe_hw_engine_type = { - .release = kobj_xe_hw_engine_release, - .sysfs_ops = &kobj_sysfs_ops + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &xe_hw_engine_class_sysfs_ops, +}; + +static const struct kobj_type kobj_xe_hw_engine_type_def = { + .release = xe_hw_engine_sysfs_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, }; static ssize_t job_timeout_max_store(struct kobject *kobj, @@ -543,7 +590,7 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, if (!kobj) return -ENOMEM; - kobject_init(kobj, &kobj_xe_hw_engine_type); + kobject_init(kobj, &kobj_xe_hw_engine_type_def); err = kobject_add(kobj, parent, "%s", ".defaults"); if (err) goto err_object; @@ -559,57 +606,6 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, return err; } -static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) -{ - kfree(kobj); -} - -static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, - struct attribute *attr, - char *buf) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->show) { - xe_pm_runtime_get(xe); - ret = kattr->show(kobj, kattr, buf); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, - size_t count) -{ - struct xe_device *xe = kobj_to_xe(kobj); - struct kobj_attribute *kattr; - ssize_t ret = -EIO; - - kattr = container_of(attr, struct kobj_attribute, attr); - if (kattr->store) { - xe_pm_runtime_get(xe); - ret = kattr->store(kobj, kattr, buf, count); - xe_pm_runtime_put(xe); - } - - return ret; -} - -static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { - .show = xe_hw_engine_class_sysfs_attr_show, - .store = xe_hw_engine_class_sysfs_attr_store, -}; - -static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { - .release = xe_hw_engine_sysfs_kobj_release, - .sysfs_ops = &xe_hw_engine_class_sysfs_ops, -}; static void hw_engine_class_sysfs_fini(void *arg) { @@ -640,7 +636,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) if (!kobj) return -ENOMEM; - kobject_init(kobj, &xe_hw_engine_sysfs_kobj_type); + kobject_init(kobj, &kobj_xe_hw_engine_type); err = kobject_add(kobj, gt->sysfs, "engines"); if (err) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 48d80ffdf7bb9c..eb293aec36a0fb 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -27,6 +28,7 @@ enum xe_hwmon_reg { REG_PKG_POWER_SKU_UNIT, REG_GT_PERF_STATUS, REG_PKG_ENERGY_STATUS, + REG_FAN_SPEED, }; enum xe_hwmon_reg_operation { @@ -42,6 +44,13 @@ enum xe_hwmon_channel { CHANNEL_MAX, }; +enum xe_fan_channel { + FAN_1, + FAN_2, + FAN_3, + FAN_MAX, +}; + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -61,6 +70,16 @@ struct xe_hwmon_energy_info { long accum_energy; }; +/** + * struct xe_hwmon_fan_info - to cache previous fan reading + */ +struct xe_hwmon_fan_info { + /** @reg_val_prev: previous fan reg val */ + u32 reg_val_prev; + /** @time_prev: previous timestamp */ + u64 time_prev; +}; + /** * struct xe_hwmon - xe hwmon data structure */ @@ -79,6 +98,8 @@ struct xe_hwmon { int scl_shift_time; /** @ei: Energy info for energyN_input */ struct xe_hwmon_energy_info ei[CHANNEL_MAX]; + /** @fi: Fan info for fanN_input */ + struct xe_hwmon_fan_info fi[FAN_MAX]; }; static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, @@ -144,6 +165,14 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg return PCU_CR_PACKAGE_ENERGY_STATUS; } break; + case REG_FAN_SPEED: + if (channel == FAN_1) + return BMG_FAN_1_SPEED; + else if (channel == FAN_2) + return BMG_FAN_2_SPEED; + else if (channel == FAN_3) + return BMG_FAN_3_SPEED; + break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; @@ -454,6 +483,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), + HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT), NULL }; @@ -480,6 +510,19 @@ static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) (uval & POWER_SETUP_I1_DATA_MASK)); } +static int xe_hwmon_pcode_read_fan_control(const struct xe_hwmon *hwmon, u32 subcmd, u32 *uval) +{ + struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); + + /* Platforms that don't return correct value */ + if (hwmon->xe->info.platform == XE_DG2 && subcmd == FSC_READ_NUM_FANS) { + *uval = 2; + return 0; + } + + return xe_pcode_read(root_tile, PCODE_MBOX(FAN_SPEED_CONTROL, subcmd, 0), uval, NULL); +} + static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, long *value, u32 scale_factor) { @@ -705,6 +748,75 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) } } +static umode_t +xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) +{ + u32 uval; + + if (!hwmon->xe->info.has_fan_control) + return 0; + + switch (attr) { + case hwmon_fan_input: + if (xe_hwmon_pcode_read_fan_control(hwmon, FSC_READ_NUM_FANS, &uval)) + return 0; + + return channel < uval ? 0444 : 0; + default: + return 0; + } +} + +static int +xe_hwmon_fan_input_read(struct xe_hwmon *hwmon, int channel, long *val) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); + struct xe_hwmon_fan_info *fi = &hwmon->fi[channel]; + u64 rotations, time_now, time; + u32 reg_val; + int ret = 0; + + mutex_lock(&hwmon->hwmon_lock); + + reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_FAN_SPEED, channel)); + time_now = get_jiffies_64(); + + /* + * HW register value is accumulated count of pulses from PWM fan with the scale + * of 2 pulses per rotation. + */ + rotations = (reg_val - fi->reg_val_prev) / 2; + + time = jiffies_delta_to_msecs(time_now - fi->time_prev); + if (unlikely(!time)) { + ret = -EAGAIN; + goto unlock; + } + + /* + * Calculate fan speed in RPM by time averaging two subsequent readings in minutes. + * RPM = number of rotations * msecs per minute / time in msecs + */ + *val = DIV_ROUND_UP_ULL(rotations * (MSEC_PER_SEC * 60), time); + + fi->reg_val_prev = reg_val; + fi->time_prev = time_now; +unlock: + mutex_unlock(&hwmon->hwmon_lock); + return ret; +} + +static int +xe_hwmon_fan_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) +{ + switch (attr) { + case hwmon_fan_input: + return xe_hwmon_fan_input_read(hwmon, channel, val); + default: + return -EOPNOTSUPP; + } +} + static umode_t xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel) @@ -730,6 +842,9 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, case hwmon_energy: ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); break; + case hwmon_fan: + ret = xe_hwmon_fan_is_visible(hwmon, attr, channel); + break; default: ret = 0; break; @@ -765,6 +880,9 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, case hwmon_energy: ret = xe_hwmon_energy_read(hwmon, attr, channel, val); break; + case hwmon_fan: + ret = xe_hwmon_fan_read(hwmon, attr, channel, val); + break; default: ret = -EOPNOTSUPP; break; @@ -842,7 +960,7 @@ static void xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) { struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); - long energy; + long energy, fan_speed; u64 val_sku_unit = 0; int channel; struct xe_reg pkg_power_sku_unit; @@ -866,6 +984,11 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) for (channel = 0; channel < CHANNEL_MAX; channel++) if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) xe_hwmon_energy_get(hwmon, channel, &energy); + + /* Initialize 'struct xe_hwmon_fan_info' with initial fan register reading. */ + for (channel = 0; channel < FAN_MAX; channel++) + if (xe_hwmon_is_visible(hwmon, hwmon_fan, hwmon_fan_input, channel)) + xe_hwmon_fan_input_read(hwmon, channel, &fan_speed); } static void xe_hwmon_mutex_destroy(void *arg) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index df3ceddede070a..16e20b5ad325f9 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -24,6 +24,7 @@ #include "xe_hw_fence.h" #include "xe_map.h" #include "xe_memirq.h" +#include "xe_mmio.h" #include "xe_sriov.h" #include "xe_trace_lrc.h" #include "xe_vm.h" @@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) #define LRC_PARALLEL_PPHWSP_OFFSET 2048 +#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096 #define LRC_PPHWSP_SIZE SZ_4K u32 xe_lrc_regs_offset(struct xe_lrc *lrc) @@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; } @@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; } +static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc) +{ + return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET; +} + static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) { return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); } +static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) +{ + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32); +} + static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { /* Indirect ring state page is at the very end of LRC */ @@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs) DECL_MAP_ADDR_HELPERS(start_seqno) DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) DECL_MAP_ADDR_HELPERS(ctx_timestamp) +DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw) DECL_MAP_ADDR_HELPERS(parallel) DECL_MAP_ADDR_HELPERS(indirect_ring) +DECL_MAP_ADDR_HELPERS(engine_id) #undef DECL_MAP_ADDR_HELPERS @@ -742,19 +756,38 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); } +/** + * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address + * @lrc: Pointer to the lrc. + * + * Returns: ctx timestamp udw GGTT address + */ +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc) +{ + return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); +} + /** * xe_lrc_ctx_timestamp() - Read ctx timestamp value * @lrc: Pointer to the lrc. * * Returns: ctx timestamp value */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) { struct xe_device *xe = lrc_to_xe(lrc); struct iosys_map map; + u32 ldw, udw = 0; map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); + ldw = xe_map_read32(xe, &map); + + if (xe->info.has_64bit_timestamp) { + map = __xe_lrc_ctx_timestamp_udw_map(lrc); + udw = xe_map_read32(xe, &map); + } + + return (u64)udw << 32 | ldw; } /** @@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) { - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); + u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); @@ -877,6 +910,79 @@ static void xe_lrc_finish(struct xe_lrc *lrc) xe_bo_unpin(lrc->bo); xe_bo_unlock(lrc->bo); xe_bo_put(lrc->bo); + xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo); +} + +/* + * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active + * context run ticks. + * @lrc: Pointer to the lrc. + * + * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the + * context, but only gets updated when the context switches out. In order to + * check how long a context has been active before it switches out, two things + * are required: + * + * (1) Determine if the context is running: + * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in + * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is + * initialized. During a query, we just check for this value to determine if the + * context is active. If the context switched out, it would overwrite this + * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as + * the last part of context restore, so reusing this LRC location will not + * clobber anything. + * + * (2) Calculate the time that the context has been active for: + * The CTX_TIMESTAMP ticks only when the context is active. If a context is + * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization. + * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific + * engine instance. Since we do not know which instance the context is running + * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and + * store it in the PPHSWP. + */ +#define CONTEXT_ACTIVE 1ULL +static int xe_lrc_setup_utilization(struct xe_lrc *lrc) +{ + u32 *cmd, *buf = NULL; + + if (lrc->bb_per_ctx_bo->vmap.is_iomem) { + buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cmd = buf; + } else { + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + } + + *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + *cmd++ = ENGINE_ID(0).addr; + *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc); + *cmd++ = 0; + + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = lower_32_bits(CONTEXT_ACTIVE); + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) { + *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); + *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc); + *cmd++ = 0; + *cmd++ = upper_32_bits(CONTEXT_ACTIVE); + } + + *cmd++ = MI_BATCH_BUFFER_END; + + if (buf) { + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, + buf, (cmd - buf) * sizeof(*cmd)); + kfree(buf); + } + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + + return 0; } #define PVC_CTX_ASID (0x2e + 1) @@ -893,31 +999,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, void *init_data = NULL; u32 arb_enable; u32 lrc_size; + u32 bo_flags; int err; kref_init(&lrc->refcount); + lrc->gt = gt; lrc->flags = 0; lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; + bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE; + /* * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); + bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); + lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, + ttm_bo_type_kernel, + bo_flags); + if (IS_ERR(lrc->bb_per_ctx_bo)) { + err = PTR_ERR(lrc->bb_per_ctx_bo); + goto err_lrc_finish; + } + lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); lrc->ring.size = ring_size; lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); @@ -990,7 +1105,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) | _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE)); + lrc->ctx_timestamp = 0; xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); @@ -1019,6 +1137,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); + err = xe_lrc_setup_utilization(lrc); + if (err) + goto err_lrc_finish; + return 0; err_lrc_finish: @@ -1238,6 +1360,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) return __xe_lrc_parallel_map(lrc); } +/** + * xe_lrc_engine_id() - Read engine id value + * @lrc: Pointer to the lrc. + * + * Returns: context id value + */ +static u32 xe_lrc_engine_id(struct xe_lrc *lrc) +{ + struct xe_device *xe = lrc_to_xe(lrc); + struct iosys_map map; + + map = __xe_lrc_engine_id_map(lrc); + return xe_map_read32(xe, &map); +} + static int instr_dw(u32 cmd_header) { /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ @@ -1684,7 +1821,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); return snapshot; } @@ -1784,22 +1921,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) kfree(snapshot); } +static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts) +{ + u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id); + u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id); + struct xe_hw_engine *hwe; + u64 val; + + hwe = xe_gt_hw_engine(lrc->gt, class, instance, false); + if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe), + "Unexpected engine class:instance %d:%d for context utilization\n", + class, instance)) + return -1; + + if (lrc_to_xe(lrc)->info.has_64bit_timestamp) + val = xe_mmio_read64_2x32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + else + val = xe_mmio_read32(&hwe->gt->mmio, + RING_CTX_TIMESTAMP(hwe->mmio_base)); + + *reg_ctx_ts = val; + + return 0; +} + /** * xe_lrc_update_timestamp() - Update ctx timestamp * @lrc: Pointer to the lrc. * @old_ts: Old timestamp value * * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. + * update saved value. With support for active contexts, the calculation may be + * slightly racy, so follow a read-again logic to ensure that the context is + * still active before returning the right timestamp. * * Returns: New ctx timestamp value */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts) { + u64 lrc_ts, reg_ts; + u32 engine_id; + *old_ts = lrc->ctx_timestamp; - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + lrc_ts = xe_lrc_ctx_timestamp(lrc); + /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */ + if (IS_SRIOV_VF(lrc_to_xe(lrc))) { + lrc->ctx_timestamp = lrc_ts; + goto done; + } + + if (lrc_ts == CONTEXT_ACTIVE) { + engine_id = xe_lrc_engine_id(lrc); + if (!get_ctx_timestamp(lrc, engine_id, ®_ts)) + lrc->ctx_timestamp = reg_ts; + + /* read lrc again to ensure context is still active */ + lrc_ts = xe_lrc_ctx_timestamp(lrc); + } + + /* + * If context switched out, just use the lrc_ts. Note that this needs to + * be a separate if condition. + */ + if (lrc_ts != CONTEXT_ACTIVE) + lrc->ctx_timestamp = lrc_ts; +done: trace_xe_lrc_update_timestamp(lrc, *old_ts); return lrc->ctx_timestamp; diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 0b40f349ab95d0..eb6e8de8c939e9 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); +u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc); +u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); @@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); * * Returns the current LRC timestamp */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts); +u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts); #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 71ecb453f811a4..ae24cf6f8dd998 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -25,8 +25,8 @@ struct xe_lrc { /** @size: size of lrc including any indirect ring state page */ u32 size; - /** @tile: tile which this LRC belongs to */ - struct xe_tile *tile; + /** @gt: gt which this LRC belongs to */ + struct xe_gt *gt; /** @flags: LRC flags */ #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 @@ -52,7 +52,10 @@ struct xe_lrc { struct xe_hw_fence_ctx fence_ctx; /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */ - u32 ctx_timestamp; + u64 ctx_timestamp; + + /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */ + struct xe_bo *bb_per_ctx_bo; }; struct xe_lrc_snapshot; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index df4282c71bf0bf..5a3e89022c3812 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1177,7 +1177,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, err_sync: /* Sync partial copies if any. FIXME: job_mutex? */ if (fence) { - dma_fence_wait(m->fence, false); + dma_fence_wait(fence, false); dma_fence_put(fence); } @@ -1547,7 +1547,7 @@ void xe_migrate_wait(struct xe_migrate *m) static u32 pte_update_cmd_size(u64 size) { u32 num_dword; - u64 entries = DIV_ROUND_UP(size, XE_PAGE_SIZE); + u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); /* @@ -1558,7 +1558,7 @@ static u32 pte_update_cmd_size(u64 size) * 2 dword for the page table's physical location * 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); num_dword += entries * 2; return num_dword; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 70a36e77754666..46301f341773f1 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -75,12 +75,12 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) * is fine as it's going to the root tile's mmio, that's * guaranteed to be initialized earlier in xe_mmio_probe_early() */ - mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); + mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; if (tile_count < xe->info.tile_count) { drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", - xe->info.tile_count, tile_count); + xe->info.tile_count, tile_count); xe->info.tile_count = tile_count; /* @@ -128,7 +128,7 @@ int xe_mmio_probe_early(struct xe_device *xe) */ xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); - if (xe->mmio.regs == NULL) { + if (!xe->mmio.regs) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; } @@ -309,8 +309,8 @@ u64 xe_mmio_read64_2x32(struct xe_mmio *mmio, struct xe_reg reg) return (u64)udw << 32 | ldw; } -static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, - u32 *out_val, bool atomic, bool expect_match) +static int __xe_mmio_wait32(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 val, + u32 timeout_us, u32 *out_val, bool atomic, bool expect_match) { ktime_t cur = ktime_get_raw(); const ktime_t end = ktime_add_us(cur, timeout_us); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 31dade91a08974..0c737413fcb68d 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -775,22 +775,23 @@ void xe_mocs_init(struct xe_gt *gt) void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) { struct xe_device *xe = gt_to_xe(gt); + enum xe_force_wake_domains domain; struct xe_mocs_info table; unsigned int fw_ref, flags; flags = get_mocs_settings(xe, &table); + domain = flags & HAS_LNCF_MOCS ? XE_FORCEWAKE_ALL : XE_FW_GT; xe_pm_runtime_get_noresume(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), - flags & HAS_LNCF_MOCS ? - XE_FORCEWAKE_ALL : XE_FW_GT); - if (!fw_ref) + fw_ref = xe_force_wake_get(gt_to_fw(gt), domain); + + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) goto err_fw; table.ops->dump(&table, flags, gt, p); - xe_force_wake_put(gt_to_fw(gt), fw_ref); err_fw: + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 9f4632e39a1ad5..e861c694f3361b 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = { module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); -module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444); -MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault"); - module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 84339e509c80d6..5a3bfea8b7b4c4 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -12,7 +12,6 @@ struct xe_modparam { bool force_execlist; bool probe_display; - bool always_migrate_to_vram; u32 force_vram_bar_size; int guc_log_level; char *guc_firmware_path; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 818f023166d5d3..57c607337848a3 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -62,6 +62,7 @@ struct xe_device_desc { u8 is_dgfx:1; u8 has_display:1; + u8 has_fan_control:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; @@ -140,6 +141,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ + .has_64bit_timestamp = 1, \ .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ @@ -302,6 +304,7 @@ static const struct xe_device_desc dg2_desc = { DG2_FEATURES, .has_display = true, + .has_fan_control = true, }; static const __maybe_unused struct xe_device_desc pvc_desc = { @@ -336,6 +339,7 @@ static const struct xe_device_desc bmg_desc = { PLATFORM(BATTLEMAGE), .dma_mask_size = 46, .has_display = true, + .has_fan_control = true, .has_heci_cscfi = 1, }; @@ -575,6 +579,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.dma_mask_size = desc->dma_mask_size; xe->info.is_dgfx = desc->is_dgfx; + xe->info.has_fan_control = desc->has_fan_control; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; @@ -668,6 +673,7 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; + xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp; for_each_remote_tile(tile, xe, id) { int err; @@ -920,6 +926,7 @@ static int xe_pci_suspend(struct device *dev) pci_save_state(pdev); pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3cold); return 0; } diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index e9b9bbc138d377..ca6b10d3557349 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -21,6 +21,7 @@ struct xe_graphics_desc { u8 has_indirect_ring_state:1; u8 has_range_tlb_invalidation:1; u8 has_usm:1; + u8 has_64bit_timestamp:1; }; struct xe_media_desc { diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 2bae9afdbd352a..e622ae17f08dd9 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -49,6 +49,9 @@ /* Domain IDs (param2) */ #define PCODE_MBOX_DOMAIN_HBM 0x2 +#define FAN_SPEED_CONTROL 0x7D +#define FSC_READ_NUM_FANS 0x4 + #define PCODE_SCRATCH(x) XE_REG(0x138320 + ((x) * 4)) /* PCODE_SCRATCH0 */ #define AUXINFO_REG_OFFSET REG_GENMASK(17, 15) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ffaf0d02dc7de9..856038553b812a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm, } case DRM_GPUVA_OP_DRIVER: { + /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */ + if (op->subop == XE_VMA_SUBOP_MAP_RANGE) { - op->map_range.range->tile_present |= BIT(tile->id); - op->map_range.range->tile_invalidated &= ~BIT(tile->id); + WRITE_ONCE(op->map_range.range->tile_present, + op->map_range.range->tile_present | + BIT(tile->id)); + WRITE_ONCE(op->map_range.range->tile_invalidated, + op->map_range.range->tile_invalidated & + ~BIT(tile->id)); } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) { - op->unmap_range.range->tile_present &= ~BIT(tile->id); + WRITE_ONCE(op->unmap_range.range->tile_present, + op->unmap_range.range->tile_present & + ~BIT(tile->id)); } break; } diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c index 454ea7dc08ac83..b5bc15f436fa2d 100644 --- a/drivers/gpu/drm/xe/xe_pxp.c +++ b/drivers/gpu/drm/xe/xe_pxp.c @@ -541,10 +541,14 @@ int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q) */ xe_pm_runtime_get(pxp->xe); - if (!pxp_prerequisites_done(pxp)) { - ret = -EBUSY; + /* get_readiness_status() returns 0 for in-progress and 1 for done */ + ret = xe_pxp_get_readiness_status(pxp); + if (ret <= 0) { + if (!ret) + ret = -EBUSY; goto out; } + ret = 0; wait_for_idle: /* diff --git a/drivers/gpu/drm/xe/xe_pxp_debugfs.c b/drivers/gpu/drm/xe/xe_pxp_debugfs.c index ccfbacf08efc16..525a2f6bb0767c 100644 --- a/drivers/gpu/drm/xe/xe_pxp_debugfs.c +++ b/drivers/gpu/drm/xe/xe_pxp_debugfs.c @@ -66,9 +66,18 @@ static int pxp_terminate(struct seq_file *m, void *data) { struct xe_pxp *pxp = node_to_pxp(m->private); struct drm_printer p = drm_seq_file_printer(m); + int ready = xe_pxp_get_readiness_status(pxp); - if (!xe_pxp_is_enabled(pxp)) - return -ENODEV; + if (ready < 0) + return ready; /* disabled or error occurred */ + else if (!ready) + return -EBUSY; /* init still in progress */ + + /* no need for a termination if PXP is not active */ + if (pxp->status != XE_PXP_ACTIVE) { + drm_printf(&p, "PXP not active\n"); + return 0; + } /* simulate a termination interrupt */ spin_lock_irq(&pxp->xe->irq.lock); diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 917fc16de866a5..bc1689db4cd716 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -137,7 +137,8 @@ emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, int i) { - u32 flags = PIPE_CONTROL_CS_STALL | + u32 flags0 = 0; + u32 flags1 = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_COMMAND_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | @@ -148,11 +149,15 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, PIPE_CONTROL_STORE_DATA_INDEX; if (invalidate_tlb) - flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags1 |= PIPE_CONTROL_TLB_INVALIDATE; - flags &= ~mask_flags; + flags1 &= ~mask_flags; - return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); + if (flags1 & PIPE_CONTROL_VF_CACHE_INVALIDATE) + flags0 |= PIPE_CONTROL0_L3_READ_ONLY_CACHE_INVALIDATE; + + return emit_pipe_control(dw, i, flags0, flags1, + LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0); } static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, @@ -229,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job) static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP; return i; } diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c index 8184390f9c7b9c..86d47aaf035892 100644 --- a/drivers/gpu/drm/xe/xe_shrinker.c +++ b/drivers/gpu/drm/xe/xe_shrinker.c @@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe) if (!shrinker) return ERR_PTR(-ENOMEM); - shrinker->shrink = shrinker_alloc(0, "xe system shrinker"); + shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique); if (!shrinker->shrink) { kfree(shrinker); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 3e829c87d7b45a..975094c1a58279 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -15,8 +15,17 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range) { - /* Not reliable without notifier lock */ - return range->base.flags.has_devmem_pages; + /* + * Advisory only check whether the range is currently backed by VRAM + * memory. + */ + + struct drm_gpusvm_range_flags flags = { + /* Pairs with WRITE_ONCE in drm_gpusvm.c */ + .__flags = READ_ONCE(range->base.flags.__flags), + }; + + return flags.has_devmem_pages; } static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) @@ -79,7 +88,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); @@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm) } static bool xe_svm_range_is_valid(struct xe_svm_range *range, - struct xe_tile *tile) + struct xe_tile *tile, + bool devmem_only) { - return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); + /* + * Advisory only check whether the range currently has a valid mapping, + * READ_ONCE pairs with WRITE_ONCE in xe_pt.c + */ + return ((READ_ONCE(range->tile_present) & + ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) && + (!devmem_only || xe_svm_range_in_vram(range)); } static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) @@ -696,11 +712,14 @@ static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, list_for_each_entry(block, blocks, link) block->private = vr; + xe_bo_get(bo); err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, &bo->devmem_allocation, ctx); - xe_bo_unlock(bo); if (err) - xe_bo_put(bo); /* Creation ref */ + xe_svm_devmem_release(&bo->devmem_allocation); + + xe_bo_unlock(bo); + xe_bo_put(bo); unlock: mmap_read_unlock(mm); @@ -709,6 +728,36 @@ static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, return err; } +static bool supports_4K_migration(struct xe_device *xe) +{ + if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) + return false; + + return true; +} + +static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, + struct xe_vma *vma) +{ + struct xe_vm *vm = range_to_vm(&range->base); + u64 range_size = xe_svm_range_size(range); + + if (!range->base.flags.migrate_devmem) + return false; + + if (xe_svm_range_in_vram(range)) { + drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); + return false; + } + + if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); + return false; + } + + return true; +} + /** * xe_svm_handle_pagefault() - SVM handle page fault * @vm: The VM. @@ -732,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), .check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + .devmem_only = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + .timeslice_ms = atomic && IS_DGFX(vm->xe) && + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0, }; struct xe_svm_range *range; struct drm_gpusvm_range *r; struct drm_exec exec; struct dma_fence *fence; + int migrate_try_count = ctx.devmem_only ? 3 : 1; ktime_t end = 0; int err; @@ -755,24 +809,31 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, if (IS_ERR(r)) return PTR_ERR(r); + if (ctx.devmem_only && !r->flags.migrate_devmem) + return -EACCES; + range = to_xe_range(r); - if (xe_svm_range_is_valid(range, tile)) + if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) return 0; range_debug(range, "PAGE FAULT"); - /* XXX: Add migration policy, for now migrate range once */ - if (!range->skip_migrate && range->base.flags.migrate_devmem && - xe_svm_range_size(range) >= SZ_64K) { - range->skip_migrate = true; - + if (--migrate_try_count >= 0 && + xe_svm_range_needs_migrate_to_vram(range, vma)) { err = xe_svm_alloc_vram(vm, tile, range, &ctx); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { - drm_dbg(&vm->xe->drm, - "VRAM allocation failed, falling back to " - "retrying fault, asid=%u, errno=%pe\n", - vm->usm.asid, ERR_PTR(err)); - goto retry; + if (migrate_try_count || !ctx.devmem_only) { + drm_dbg(&vm->xe->drm, + "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + goto retry; + } else { + drm_err(&vm->xe->drm, + "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", + vm->usm.asid, ERR_PTR(err)); + return err; + } } } @@ -780,15 +841,23 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); /* Corner where CPU mappings have changed */ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { - if (err == -EOPNOTSUPP) { - range_debug(range, "PAGE FAULT - EVICT PAGES"); - drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ + if (migrate_try_count > 0 || !ctx.devmem_only) { + if (err == -EOPNOTSUPP) { + range_debug(range, "PAGE FAULT - EVICT PAGES"); + drm_gpusvm_range_evict(&vm->svm.gpusvm, + &range->base); + } + drm_dbg(&vm->xe->drm, + "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); + range_debug(range, "PAGE FAULT - RETRY PAGES"); + goto retry; + } else { + drm_err(&vm->xe->drm, + "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n", + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); } - drm_dbg(&vm->xe->drm, - "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", - vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); - range_debug(range, "PAGE FAULT - RETRY PAGES"); - goto retry; } if (err) { range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); @@ -812,6 +881,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, drm_exec_fini(&exec); err = PTR_ERR(fence); if (err == -EAGAIN) { + ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ range_debug(range, "PAGE FAULT - RETRY BIND"); goto retry; } @@ -822,9 +892,6 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, } drm_exec_fini(&exec); - if (xe_modparam.always_migrate_to_vram) - range->skip_migrate = false; - dma_fence_wait(fence, false); dma_fence_put(fence); @@ -944,3 +1011,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #endif + +/** + * xe_svm_flush() - SVM flush + * @vm: The VM. + * + * Flush all SVM actions. + */ +void xe_svm_flush(struct xe_vm *vm) +{ + if (xe_vm_in_fault_mode(vm)) + flush_work(&vm->svm.garbage_collector.work); +} diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index e059590e5076e7..fe58ac2f4baaca 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -36,11 +36,6 @@ struct xe_svm_range { * range. Protected by GPU SVM notifier lock. */ u8 tile_invalidated; - /** - * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler - * locking. - */ - u8 skip_migrate :1; }; #if IS_ENABLED(CONFIG_DRM_GPUSVM) @@ -72,6 +67,9 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); + +void xe_svm_flush(struct xe_vm *vm); + #else static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range) { @@ -124,6 +122,11 @@ static inline void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h index 5c669a0b21808e..d525cbee1e3411 100644 --- a/drivers/gpu/drm/xe/xe_trace_lrc.h +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -19,12 +19,12 @@ #define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) TRACE_EVENT(xe_lrc_update_timestamp, - TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_PROTO(struct xe_lrc *lrc, uint64_t old), TP_ARGS(lrc, old), TP_STRUCT__entry( __field(struct xe_lrc *, lrc) - __field(u32, old) - __field(u32, new) + __field(u64, old) + __field(u64, new) __string(name, lrc->fence_ctx.name) __string(device_id, __dev_name_lrc(lrc)) ), @@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp, __assign_str(name); __assign_str(device_id); ), - TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s", __entry->lrc, __get_str(name), __entry->old, __entry->new, __get_str(device_id)) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index c14bd22820441c..3a8751a8b92dde 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -244,7 +244,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { - xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_gsc_stop_prepare(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } @@ -278,6 +278,12 @@ static void uc_reset_wait(struct xe_uc *uc) goto again; } +void xe_uc_suspend_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + int xe_uc_suspend(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 3813c1ede450ee..c23e6f5e251417 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); +void xe_uc_suspend_prepare(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_declare_wedged(struct xe_uc *uc); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 60303998bd612f..737172013a8f9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1681,10 +1681,16 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) if (flags & XE_VM_FLAG_LR_MODE) xe_pm_runtime_get_noresume(xe); + if (flags & XE_VM_FLAG_FAULT_MODE) { + err = xe_svm_init(vm); + if (err) + goto err_no_resv; + } + vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { err = -ENOMEM; - goto err_no_resv; + goto err_svm_fini; } drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, @@ -1757,12 +1763,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) } } - if (flags & XE_VM_FLAG_FAULT_MODE) { - err = xe_svm_init(vm); - if (err) - goto err_close; - } - if (number_tiles > 1) vm->composite_fence_ctx = dma_fence_context_alloc(1); @@ -1776,6 +1776,11 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) xe_vm_close_and_put(vm); return ERR_PTR(err); +err_svm_fini: + if (flags & XE_VM_FLAG_FAULT_MODE) { + vm->size = 0; /* close the vm */ + xe_svm_fini(vm); + } err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) @@ -3312,8 +3317,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } /* Ensure all UNMAPs visible */ - if (xe_vm_in_fault_mode(vm)) - flush_work(&vm->svm.garbage_collector.work); + xe_svm_flush(vm); err = down_write_killable(&vm->lock); if (err) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 0ef811fc2bdeee..494af6bdc646b4 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -301,6 +301,75 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +/** + * xe_vm_set_validating() - Register this task as currently making bos resident + * @allow_res_evict: Allow eviction of buffer objects bound to @vm when + * validating. + * @vm: Pointer to the vm or NULL. + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + * + * Return: A pin cookie that should be used for xe_vm_clear_validating(). + */ +static inline struct pin_cookie xe_vm_set_validating(struct xe_vm *vm, + bool allow_res_evict) +{ + struct pin_cookie cookie = {}; + + if (vm && !allow_res_evict) { + xe_vm_assert_held(vm); + cookie = lockdep_pin_lock(&xe_vm_resv(vm)->lock.base); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, current); + } + + return cookie; +} + +/** + * xe_vm_clear_validating() - Unregister this task as currently making bos resident + * @vm: Pointer to the vm or NULL + * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same + * value as for xe_vm_set_validation(). + * @cookie: Cookie obtained from xe_vm_set_validating(). + * + * Register this task as currently making bos resident for the vm. Intended + * to avoid eviction by the same task of shared bos bound to the vm. + * Call with the vm's resv lock held. + */ +static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict, + struct pin_cookie cookie) +{ + if (vm && !allow_res_evict) { + lockdep_unpin_lock(&xe_vm_resv(vm)->lock.base, cookie); + /* Pairs with READ_ONCE in xe_vm_is_validating() */ + WRITE_ONCE(vm->validating, NULL); + } +} + +/** + * xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident + * by the current task. + * @vm: Pointer to the vm. + * + * If this function returns %true, we should be in a vm resv locked region, since + * the current process is the same task that called xe_vm_set_validating(). + * The function asserts that that's indeed the case. + * + * Return: %true if the task is currently making bos resident, %false otherwise. + */ +static inline bool xe_vm_is_validating(struct xe_vm *vm) +{ + /* Pairs with WRITE_ONCE in xe_vm_is_validating() */ + if (READ_ONCE(vm->validating) == current) { + xe_vm_assert_held(vm); + return true; + } + return false; +} + #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); #else diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 84fa41b9fa20f3..0882674ce1cbab 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -310,6 +310,14 @@ struct xe_vm { * protected by the vm resv. */ u64 tlb_flush_seqno; + /** + * @validating: The task that is currently making bos resident for this vm. + * Protected by the VM's resv for writing. Opportunistic reading can be done + * using READ_ONCE. Note: This is a workaround for the + * TTM eviction_valuable() callback not being passed a struct + * ttm_operation_context(). Future work might want to address this. + */ + struct task_struct *validating; /** @batch_invalidate_tlb: Always invalidate TLB before batch start */ bool batch_invalidate_tlb; /** @xef: XE file handle for tracking this VM's drm client */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 24f644c0a67365..2f833f0d575f24 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) }, + { XE_RTP_NAME("22021007897"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) + }, /* Xe3_LPG */ { XE_RTP_NAME("14021490052"), diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 0c738af24f7c53..9b9e176992a837 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -32,8 +32,10 @@ GRAPHICS_VERSION(3001) 14022293748 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019794406 GRAPHICS_VERSION(2001) GRAPHICS_VERSION(2004) + GRAPHICS_VERSION_RANGE(3000, 3001) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index dbecca9bdd544f..cfabf5e2a0bb0a 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -22,6 +22,7 @@ config DRM_ZYNQMP_DPSUB_AUDIO bool "ZynqMP DisplayPort Audio Support" depends on DRM_ZYNQMP_DPSUB depends on SND && SND_SOC + depends on SND_SOC=y || DRM_ZYNQMP_DPSUB=m select SND_SOC_GENERIC_DMAENGINE_PCM help Choose this option to enable DisplayPort audio support in the ZynqMP diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 17c9660da45034..ab0e5a72a05992 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -93,7 +93,7 @@ impl Chipset { // For now, redirect to fmt::Debug for convenience. impl fmt::Display for Chipset { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}", self) + write!(f, "{self:?}") } } diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index a503252702b7b4..43859fc757470c 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -151,6 +151,7 @@ config HID_APPLEIR config HID_APPLETB_BL tristate "Apple Touch Bar Backlight" depends on BACKLIGHT_CLASS_DEVICE + depends on X86 || COMPILE_TEST help Say Y here if you want support for the backlight of Touch Bars on x86 MacBook Pros. @@ -163,6 +164,7 @@ config HID_APPLETB_KBD depends on USB_HID depends on BACKLIGHT_CLASS_DEVICE depends on INPUT + depends on X86 || COMPILE_TEST select INPUT_SPARSEKMAP select HID_APPLETB_BL help diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c index 25f0ebfcbd5f56..0a9b44ce4904e4 100644 --- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c +++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c @@ -83,6 +83,9 @@ static int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata) case ALS_IDX: privdata->dev_en.is_als_present = false; break; + case SRA_IDX: + privdata->dev_en.is_sra_present = false; + break; } if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { @@ -134,9 +137,6 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->sensor_sts[i] = SENSOR_DISABLED; - if (cl_data->num_hid_devices == 1 && cl_data->sensor_idx[0] == SRA_IDX) - break; - if (cl_data->sensor_idx[i] == SRA_IDX) { info.sensor_idx = cl_data->sensor_idx[i]; writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0)); @@ -145,8 +145,10 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) (privdata, cl_data->sensor_idx[i], ENABLE_SENSOR); cl_data->sensor_sts[i] = (status == 0) ? SENSOR_ENABLED : SENSOR_DISABLED; - if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { + cl_data->is_any_sensor_enabled = true; privdata->dev_en.is_sra_present = true; + } continue; } @@ -238,6 +240,8 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata) cleanup: amd_sfh_hid_client_deinit(privdata); for (i = 0; i < cl_data->num_hid_devices; i++) { + if (cl_data->sensor_idx[i] == SRA_IDX) + continue; devm_kfree(dev, cl_data->feature_report[i]); devm_kfree(dev, in_data->input_report[i]); devm_kfree(dev, cl_data->report_descr[i]); diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 2e96ec6a3073da..9a06f9b0e4ef33 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -38,6 +38,9 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type struct hid_bpf_ops *e; int ret; + if (unlikely(hdev->bpf.destroyed)) + return ERR_PTR(-ENODEV); + if (type >= HID_REPORT_TYPES) return ERR_PTR(-EINVAL); @@ -93,6 +96,9 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, struct hid_bpf_ops *e; int ret, idx; + if (unlikely(hdev->bpf.destroyed)) + return -ENODEV; + if (rtype >= HID_REPORT_TYPES) return -EINVAL; @@ -130,6 +136,9 @@ int dispatch_hid_bpf_output_report(struct hid_device *hdev, struct hid_bpf_ops *e; int ret, idx; + if (unlikely(hdev->bpf.destroyed)) + return -ENODEV; + idx = srcu_read_lock(&hdev->bpf.srcu); list_for_each_entry_srcu(e, &hdev->bpf.prog_list, list, srcu_read_lock_held(&hdev->bpf.srcu)) { diff --git a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c index 1a0aeea6a081cd..a754710fc90b8d 100644 --- a/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c +++ b/drivers/hid/bpf/progs/XPPen__ACK05.bpf.c @@ -157,6 +157,7 @@ static const __u8 fixed_rdesc_vendor[] = { ReportCount(5) // padding Input(Const) // Byte 4 in report - just exists so we get to be a tablet pad + UsagePage_Digitizers Usage_Dig_BarrelSwitch // BTN_STYLUS ReportCount(1) ReportSize(1) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 0fb210e40a4127..9eafff0b6ea4c3 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -192,7 +192,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, goto cleanup; input_device->report_desc_size = le16_to_cpu( - desc->desc[0].wDescriptorLength); + desc->rpt_desc.wDescriptorLength); if (input_device->report_desc_size == 0) { input_device->dev_info_status = -EINVAL; goto cleanup; @@ -210,7 +210,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, memcpy(input_device->report_desc, ((unsigned char *)desc) + desc->bLength, - le16_to_cpu(desc->desc[0].wDescriptorLength)); + le16_to_cpu(desc->rpt_desc.wDescriptorLength)); /* Send the ack */ memset(&ack, 0, sizeof(struct mousevsc_prt_msg)); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 288a2b864cc41d..1062731315a2a5 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -41,6 +41,10 @@ #define USB_VENDOR_ID_ACTIONSTAR 0x2101 #define USB_DEVICE_ID_ACTIONSTAR_1011 0x1011 +#define USB_VENDOR_ID_ADATA_XPG 0x125f +#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE 0x7505 +#define USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE 0x7506 + #define USB_VENDOR_ID_ADS_TECH 0x06e1 #define USB_DEVICE_ID_ADS_TECH_RADIO_SI470X 0xa155 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 646171598e4132..0731473cc9b1ad 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -27,6 +27,8 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_GAMEPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_PREDATOR), HID_QUIRK_BADPAD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_ADATA_XPG, USB_VENDOR_ID_ADATA_XPG_WL_GAMING_MOUSE_DONGLE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016), HID_QUIRK_FULLSPEED_INTERVAL }, { HID_USB_DEVICE(USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX), HID_QUIRK_NO_INIT_REPORTS }, diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index dfd9d22ed559c8..949d307c66a806 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1150,11 +1150,9 @@ static void steam_client_ll_close(struct hid_device *hdev) struct steam_device *steam = hdev->driver_data; unsigned long flags; - bool connected; spin_lock_irqsave(&steam->lock, flags); steam->client_opened--; - connected = steam->connected && !steam->client_opened; spin_unlock_irqrestore(&steam->lock, flags); schedule_work(&steam->unregister_work); diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 3b81468a1df297..0bf70664c35ee1 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -174,6 +174,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev) u8 ep_addr[2] = {b_ep, 0}; if (!usb_check_int_endpoints(usbif, ep_addr)) { + kfree(send_buf); hid_err(hdev, "Unexpected non-int endpoint\n"); return; } diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index a367df6ea01f3d..61a4019ddc7430 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -142,11 +142,12 @@ static int uclogic_input_configured(struct hid_device *hdev, suffix = "System Control"; break; } - } - - if (suffix) + } else { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } diff --git a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c index fa51155ebe3937..8a8c4a46f92700 100644 --- a/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c +++ b/drivers/hid/intel-thc-hid/intel-quicki2c/pci-quicki2c.c @@ -82,15 +82,10 @@ static int quicki2c_acpi_get_dsd_property(struct acpi_device *adev, acpi_string { acpi_handle handle = acpi_device_handle(adev); struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object obj = { .type = type }; - struct acpi_object_list arg_list = { - .count = 1, - .pointer = &obj, - }; union acpi_object *ret_obj; acpi_status status; - status = acpi_evaluate_object(handle, dsd_method_name, &arg_list, &buffer); + status = acpi_evaluate_object(handle, dsd_method_name, NULL, &buffer); if (ACPI_FAILURE(status)) { acpi_handle_err(handle, "Can't evaluate %s method: %d\n", dsd_method_name, status); diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 7d9297fad90ea7..d4cbecc668ec02 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -984,12 +984,11 @@ static int usbhid_parse(struct hid_device *hid) struct usb_host_interface *interface = intf->cur_altsetting; struct usb_device *dev = interface_to_usbdev (intf); struct hid_descriptor *hdesc; + struct hid_class_descriptor *hcdesc; u32 quirks = 0; unsigned int rsize = 0; char *rdesc; - int ret, n; - int num_descriptors; - size_t offset = offsetof(struct hid_descriptor, desc); + int ret; quirks = hid_lookup_quirk(hid); @@ -1011,20 +1010,19 @@ static int usbhid_parse(struct hid_device *hid) return -ENODEV; } - if (hdesc->bLength < sizeof(struct hid_descriptor)) { - dbg_hid("hid descriptor is too short\n"); + if (!hdesc->bNumDescriptors || + hdesc->bLength != sizeof(*hdesc) + + (hdesc->bNumDescriptors - 1) * sizeof(*hcdesc)) { + dbg_hid("hid descriptor invalid, bLen=%hhu bNum=%hhu\n", + hdesc->bLength, hdesc->bNumDescriptors); return -EINVAL; } hid->version = le16_to_cpu(hdesc->bcdHID); hid->country = hdesc->bCountryCode; - num_descriptors = min_t(int, hdesc->bNumDescriptors, - (hdesc->bLength - offset) / sizeof(struct hid_class_descriptor)); - - for (n = 0; n < num_descriptors; n++) - if (hdesc->desc[n].bDescriptorType == HID_DT_REPORT) - rsize = le16_to_cpu(hdesc->desc[n].wDescriptorLength); + if (hdesc->rpt_desc.bDescriptorType == HID_DT_REPORT) + rsize = le16_to_cpu(hdesc->rpt_desc.wDescriptorLength); if (!rsize || rsize > HID_MAX_DESCRIPTOR_SIZE) { dbg_hid("weird size of report descriptor (%u)\n", rsize); @@ -1052,6 +1050,11 @@ static int usbhid_parse(struct hid_device *hid) goto err; } + if (hdesc->bNumDescriptors > 1) + hid_warn(intf, + "%u unsupported optional hid class descriptors\n", + (int)(hdesc->bNumDescriptors - 1)); + hid->quirks |= quirks; return 0; diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 1556d4287fa506..eaf099b2efdb0a 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -70,10 +70,16 @@ static void wacom_wac_queue_flush(struct hid_device *hdev, { while (!kfifo_is_empty(fifo)) { int size = kfifo_peek_len(fifo); - u8 *buf = kzalloc(size, GFP_KERNEL); + u8 *buf; unsigned int count; int err; + buf = kzalloc(size, GFP_KERNEL); + if (!buf) { + kfifo_skip(fifo); + continue; + } + count = kfifo_out(fifo, buf, size); if (count != size) { // Hard to say what is the "right" action in this @@ -81,6 +87,7 @@ static void wacom_wac_queue_flush(struct hid_device *hdev, // to flush seems reasonable enough, however. hid_warn(hdev, "%s: removed fifo entry with unexpected size\n", __func__); + kfree(buf); continue; } err = hid_report_raw_event(hdev, HID_INPUT_REPORT, buf, size, false); @@ -2361,6 +2368,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) unsigned int connect_mask = HID_CONNECT_HIDRAW; features->pktlen = wacom_compute_pktlen(hdev); + if (!features->pktlen) + return -ENODEV; if (!devres_open_group(&hdev->dev, wacom, GFP_KERNEL)) return -ENOMEM; diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index fb8cd8469328ee..35f26fa1ffe76e 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -1077,68 +1077,10 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, EXPORT_SYMBOL(vmbus_sendpacket); /* - * vmbus_sendpacket_pagebuffer - Send a range of single-page buffer - * packets using a GPADL Direct packet type. This interface allows you - * to control notifying the host. This will be useful for sending - * batched data. Also the sender can control the send flags - * explicitly. - */ -int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, void *buffer, u32 bufferlen, - u64 requestid) -{ - int i; - struct vmbus_channel_packet_page_buffer desc; - u32 descsize; - u32 packetlen; - u32 packetlen_aligned; - struct kvec bufferlist[3]; - u64 aligned_data = 0; - - if (pagecount > MAX_PAGE_BUFFER_COUNT) - return -EINVAL; - - /* - * Adjust the size down since vmbus_channel_packet_page_buffer is the - * largest size we support - */ - descsize = sizeof(struct vmbus_channel_packet_page_buffer) - - ((MAX_PAGE_BUFFER_COUNT - pagecount) * - sizeof(struct hv_page_buffer)); - packetlen = descsize + bufferlen; - packetlen_aligned = ALIGN(packetlen, sizeof(u64)); - - /* Setup the descriptor */ - desc.type = VM_PKT_DATA_USING_GPA_DIRECT; - desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; - desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ - desc.length8 = (u16)(packetlen_aligned >> 3); - desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ - desc.reserved = 0; - desc.rangecount = pagecount; - - for (i = 0; i < pagecount; i++) { - desc.range[i].len = pagebuffers[i].len; - desc.range[i].offset = pagebuffers[i].offset; - desc.range[i].pfn = pagebuffers[i].pfn; - } - - bufferlist[0].iov_base = &desc; - bufferlist[0].iov_len = descsize; - bufferlist[1].iov_base = buffer; - bufferlist[1].iov_len = bufferlen; - bufferlist[2].iov_base = &aligned_data; - bufferlist[2].iov_len = (packetlen_aligned - packetlen); - - return hv_ringbuffer_write(channel, bufferlist, 3, requestid, NULL); -} -EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); - -/* - * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * vmbus_sendpacket_mpb_desc - Send one or more multi-page buffer packets * using a GPADL Direct packet type. - * The buffer includes the vmbus descriptor. + * The desc argument must include space for the VMBus descriptor. The + * rangecount field must already be set. */ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *desc, @@ -1160,7 +1102,6 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, desc->length8 = (u16)(packetlen_aligned >> 3); desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc->reserved = 0; - desc->rangecount = 1; bufferlist[0].iov_base = desc; bufferlist[0].iov_len = desc_size; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index b3b11be1165009..59792e00cecf38 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -307,7 +307,7 @@ void __init hv_get_partition_id(void) local_irq_save(flags); output = *this_cpu_ptr(hyperv_pcpu_input_arg); - status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, &output); + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output); pt_id = output->partition_id; local_irq_restore(flags); @@ -566,9 +566,11 @@ int hv_common_cpu_die(unsigned int cpu) * originally allocated memory is reused in hv_common_cpu_init(). */ - synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); - kfree(*synic_eventring_tail); - *synic_eventring_tail = NULL; + if (hv_root_partition()) { + synic_eventring_tail = this_cpu_ptr(hv_synic_eventring_tail); + kfree(*synic_eventring_tail); + *synic_eventring_tail = NULL; + } return 0; } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 29780f3a747848..0b450e53161e51 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -477,4 +477,10 @@ static inline int hv_debug_add_dev_dir(struct hv_device *dev) #endif /* CONFIG_HYPERV_TESTING */ +/* Create and remove sysfs entry for memory mapped ring buffers for a channel */ +int hv_create_ring_sysfs(struct vmbus_channel *channel, + int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + struct vm_area_struct *vma)); +int hv_remove_ring_sysfs(struct vmbus_channel *channel); + #endif /* _HYPERV_VMBUS_H */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8d3cff42bdbb2a..e3d51a3163163c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1802,6 +1802,26 @@ static ssize_t subchannel_id_show(struct vmbus_channel *channel, } static VMBUS_CHAN_ATTR_RO(subchannel_id); +static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj, + const struct bin_attribute *attr, + struct vm_area_struct *vma) +{ + struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); + + /* + * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer + * is not NULL. + */ + return channel->mmap_ring_buffer(channel, vma); +} + +static struct bin_attribute chan_attr_ring_buffer = { + .attr = { + .name = "ring", + .mode = 0600, + }, + .mmap = hv_mmap_ring_buffer_wrapper, +}; static struct attribute *vmbus_chan_attrs[] = { &chan_attr_out_mask.attr, &chan_attr_in_mask.attr, @@ -1821,6 +1841,11 @@ static struct attribute *vmbus_chan_attrs[] = { NULL }; +static struct bin_attribute *vmbus_chan_bin_attrs[] = { + &chan_attr_ring_buffer, + NULL +}; + /* * Channel-level attribute_group callback function. Returns the permission for * each attribute, and returns 0 if an attribute is not visible. @@ -1841,9 +1866,34 @@ static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, return attr->mode; } +static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj, + const struct bin_attribute *attr, int idx) +{ + const struct vmbus_channel *channel = + container_of(kobj, struct vmbus_channel, kobj); + + /* Hide ring attribute if channel's ring_sysfs_visible is set to false */ + if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible) + return 0; + + return attr->attr.mode; +} + +static size_t vmbus_chan_bin_size(struct kobject *kobj, + const struct bin_attribute *bin_attr, int a) +{ + const struct vmbus_channel *channel = + container_of(kobj, struct vmbus_channel, kobj); + + return channel->ringbuffer_pagecount << PAGE_SHIFT; +} + static const struct attribute_group vmbus_chan_group = { .attrs = vmbus_chan_attrs, - .is_visible = vmbus_chan_attr_is_visible + .bin_attrs = vmbus_chan_bin_attrs, + .is_visible = vmbus_chan_attr_is_visible, + .is_bin_visible = vmbus_chan_bin_attr_is_visible, + .bin_size = vmbus_chan_bin_size, }; static const struct kobj_type vmbus_chan_ktype = { @@ -1851,6 +1901,63 @@ static const struct kobj_type vmbus_chan_ktype = { .release = vmbus_chan_release, }; +/** + * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel. + * @channel: Pointer to vmbus_channel structure + * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of + * channel's "ring" sysfs node, which is for the ring buffer of that channel. + * Function pointer is of below type: + * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + * struct vm_area_struct *vma)) + * This has a pointer to the channel and a pointer to vm_area_struct, + * used for mmap, as arguments. + * + * Sysfs node for ring buffer of a channel is created along with other fields, however its + * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case + * is running. + * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of + * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid + * exposing the ring buffer by default, this function is reponsible to enable visibility of + * ring for userspace to use. + * Note: Race conditions can happen with userspace and it is not encouraged to create new + * use-cases for this. This was added to maintain backward compatibility, while solving + * one of the race conditions in uio_hv_generic while creating sysfs. + * + * Returns 0 on success or error code on failure. + */ +int hv_create_ring_sysfs(struct vmbus_channel *channel, + int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel, + struct vm_area_struct *vma)) +{ + struct kobject *kobj = &channel->kobj; + + channel->mmap_ring_buffer = hv_mmap_ring_buffer; + channel->ring_sysfs_visible = true; + + return sysfs_update_group(kobj, &vmbus_chan_group); +} +EXPORT_SYMBOL_GPL(hv_create_ring_sysfs); + +/** + * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel. + * @channel: Pointer to vmbus_channel structure + * + * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group. + * + * Returns 0 on success or error code on failure. + */ +int hv_remove_ring_sysfs(struct vmbus_channel *channel) +{ + struct kobject *kobj = &channel->kobj; + int ret; + + channel->ring_sysfs_visible = false; + ret = sysfs_update_group(kobj, &vmbus_chan_group); + channel->mmap_ring_buffer = NULL; + return ret; +} +EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs); + /* * vmbus_add_channel_kobj - setup a sub-directory under device/channels */ diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 006ced5ab6e6ad..c7c02a1f55d459 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -933,6 +933,10 @@ static int asus_ec_hwmon_read_string(struct device *dev, { struct ec_sensors_data *state = dev_get_drvdata(dev); int sensor_index = find_ec_sensor_index(state, type, channel); + + if (sensor_index < 0) + return sensor_index; + *str = get_sensor_info(state, sensor_index)->label; return 0; diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c index fa170c966bc3be..d4e2e175e07700 100644 --- a/drivers/hwtracing/coresight/coresight-catu.c +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -458,12 +458,17 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, enum cs_mode cs_mode, static int catu_enable(struct coresight_device *csdev, enum cs_mode mode, void *data) { - int rc; + int rc = 0; struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); - CS_UNLOCK(catu_drvdata->base); - rc = catu_enable_hw(catu_drvdata, mode, data); - CS_LOCK(catu_drvdata->base); + guard(raw_spinlock_irqsave)(&catu_drvdata->spinlock); + if (csdev->refcnt == 0) { + CS_UNLOCK(catu_drvdata->base); + rc = catu_enable_hw(catu_drvdata, mode, data); + CS_LOCK(catu_drvdata->base); + } + if (!rc) + csdev->refcnt++; return rc; } @@ -486,12 +491,15 @@ static int catu_disable_hw(struct catu_drvdata *drvdata) static int catu_disable(struct coresight_device *csdev, void *__unused) { - int rc; + int rc = 0; struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); - CS_UNLOCK(catu_drvdata->base); - rc = catu_disable_hw(catu_drvdata); - CS_LOCK(catu_drvdata->base); + guard(raw_spinlock_irqsave)(&catu_drvdata->spinlock); + if (--csdev->refcnt == 0) { + CS_UNLOCK(catu_drvdata->base); + rc = catu_disable_hw(catu_drvdata); + CS_LOCK(catu_drvdata->base); + } return rc; } @@ -550,6 +558,7 @@ static int __catu_probe(struct device *dev, struct resource *res) dev->platform_data = pdata; drvdata->base = base; + raw_spin_lock_init(&drvdata->spinlock); catu_desc.access = CSDEV_ACCESS_IOMEM(base); catu_desc.pdata = pdata; catu_desc.dev = dev; @@ -702,7 +711,7 @@ static int __init catu_init(void) { int ret; - ret = coresight_init_driver("catu", &catu_driver, &catu_platform_driver); + ret = coresight_init_driver("catu", &catu_driver, &catu_platform_driver, THIS_MODULE); tmc_etr_set_catu_ops(&etr_catu_buf_ops); return ret; } diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h index 141feac1c14b08..755776cd19c5bb 100644 --- a/drivers/hwtracing/coresight/coresight-catu.h +++ b/drivers/hwtracing/coresight/coresight-catu.h @@ -65,6 +65,7 @@ struct catu_drvdata { void __iomem *base; struct coresight_device *csdev; int irq; + raw_spinlock_t spinlock; }; #define CATU_REG32(name, offset) \ diff --git a/drivers/hwtracing/coresight/coresight-config.h b/drivers/hwtracing/coresight/coresight-config.h index b9ebc9fcfb7f20..90fd937d3bd837 100644 --- a/drivers/hwtracing/coresight/coresight-config.h +++ b/drivers/hwtracing/coresight/coresight-config.h @@ -228,7 +228,7 @@ struct cscfg_feature_csdev { * @feats_csdev:references to the device features to enable. */ struct cscfg_config_csdev { - const struct cscfg_config_desc *config_desc; + struct cscfg_config_desc *config_desc; struct coresight_device *csdev; bool enabled; struct list_head node; diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index fb43ef6a3b1f0d..d3523f0262af82 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -465,7 +465,7 @@ int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, /* Enable all helpers adjacent to the path first */ ret = coresight_enable_helpers(csdev, mode, path); if (ret) - goto err; + goto err_disable_path; /* * ETF devices are tricky... They can be a link or a sink, * depending on how they are configured. If an ETF has been @@ -486,8 +486,10 @@ int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, * that need disabling. Disabling the path here * would mean we could disrupt an existing session. */ - if (ret) + if (ret) { + coresight_disable_helpers(csdev, path); goto out; + } break; case CORESIGHT_DEV_TYPE_SOURCE: /* sources are enabled from either sysFS or Perf */ @@ -497,16 +499,19 @@ int coresight_enable_path(struct coresight_path *path, enum cs_mode mode, child = list_next_entry(nd, link)->csdev; ret = coresight_enable_link(csdev, parent, child, source); if (ret) - goto err; + goto err_disable_helpers; break; default: - goto err; + ret = -EINVAL; + goto err_disable_helpers; } } out: return ret; -err: +err_disable_helpers: + coresight_disable_helpers(csdev, path); +err_disable_path: coresight_disable_path_from(path, nd); goto out; } @@ -1585,17 +1590,17 @@ module_init(coresight_init); module_exit(coresight_exit); int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, - struct platform_driver *pdev_drv) + struct platform_driver *pdev_drv, struct module *owner) { int ret; - ret = amba_driver_register(amba_drv); + ret = __amba_driver_register(amba_drv, owner); if (ret) { pr_err("%s: error registering AMBA driver\n", drv); return ret; } - ret = platform_driver_register(pdev_drv); + ret = __platform_driver_register(pdev_drv, owner); if (!ret) return 0; diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 342c3aaf414dd8..a871d997330b09 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -774,7 +774,8 @@ static struct platform_driver debug_platform_driver = { static int __init debug_init(void) { - return coresight_init_driver("debug", &debug_driver, &debug_platform_driver); + return coresight_init_driver("debug", &debug_driver, &debug_platform_driver, + THIS_MODULE); } static void __exit debug_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 2b8f1046384020..88ef381ee6dd9b 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -1020,6 +1020,9 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1); raw_spin_unlock(&drvdata->spinlock); + + cscfg_csdev_disable_active_config(csdev); + cpus_read_unlock(); /* @@ -1176,7 +1179,7 @@ static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata) * tracing at the kernel EL and EL0, forcing to use the * virtual time as the timestamp. */ - trfcr = (TRFCR_EL1_TS_VIRTUAL | + trfcr = (FIELD_PREP(TRFCR_EL1_TS_MASK, TRFCR_EL1_TS_VIRTUAL) | TRFCR_EL1_ExTRE | TRFCR_EL1_E0TRE); diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index fdd0956fecb36d..c3ca904de584d7 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -2320,11 +2320,11 @@ static ssize_t ts_source_show(struct device *dev, goto out; } - switch (drvdata->trfcr & TRFCR_EL1_TS_MASK) { + val = FIELD_GET(TRFCR_EL1_TS_MASK, drvdata->trfcr); + switch (val) { case TRFCR_EL1_TS_VIRTUAL: case TRFCR_EL1_TS_GUEST_PHYSICAL: case TRFCR_EL1_TS_PHYSICAL: - val = FIELD_GET(TRFCR_EL1_TS_MASK, drvdata->trfcr); break; default: val = -1; diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 0541712b2bcb69..124fc2e26cfb1a 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -433,7 +433,8 @@ static struct amba_driver dynamic_funnel_driver = { static int __init funnel_init(void) { - return coresight_init_driver("funnel", &dynamic_funnel_driver, &funnel_driver); + return coresight_init_driver("funnel", &dynamic_funnel_driver, &funnel_driver, + THIS_MODULE); } static void __exit funnel_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index ee7ee79f6cf775..572dcd2bac16d9 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -438,7 +438,8 @@ static struct amba_driver dynamic_replicator_driver = { static int __init replicator_init(void) { - return coresight_init_driver("replicator", &dynamic_replicator_driver, &replicator_driver); + return coresight_init_driver("replicator", &dynamic_replicator_driver, &replicator_driver, + THIS_MODULE); } static void __exit replicator_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 26f9339f38b938..527347e4d16c5d 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -1058,7 +1058,7 @@ static struct platform_driver stm_platform_driver = { static int __init stm_init(void) { - return coresight_init_driver("stm", &stm_driver, &stm_platform_driver); + return coresight_init_driver("stm", &stm_driver, &stm_platform_driver, THIS_MODULE); } static void __exit stm_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index a70c1454b4106c..83dad24e0116d4 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -395,6 +395,8 @@ static void cscfg_remove_owned_csdev_configs(struct coresight_device *csdev, voi if (list_empty(&csdev->config_csdev_list)) return; + guard(raw_spinlock_irqsave)(&csdev->cscfg_csdev_lock); + list_for_each_entry_safe(config_csdev, tmp, &csdev->config_csdev_list, node) { if (config_csdev->config_desc->load_owner == load_owner) list_del(&config_csdev->node); @@ -867,6 +869,25 @@ void cscfg_csdev_reset_feats(struct coresight_device *csdev) } EXPORT_SYMBOL_GPL(cscfg_csdev_reset_feats); +static bool cscfg_config_desc_get(struct cscfg_config_desc *config_desc) +{ + if (!atomic_fetch_inc(&config_desc->active_cnt)) { + /* must ensure that config cannot be unloaded in use */ + if (unlikely(cscfg_owner_get(config_desc->load_owner))) { + atomic_dec(&config_desc->active_cnt); + return false; + } + } + + return true; +} + +static void cscfg_config_desc_put(struct cscfg_config_desc *config_desc) +{ + if (!atomic_dec_return(&config_desc->active_cnt)) + cscfg_owner_put(config_desc->load_owner); +} + /* * This activate configuration for either perf or sysfs. Perf can have multiple * active configs, selected per event, sysfs is limited to one. @@ -890,22 +911,17 @@ static int _cscfg_activate_config(unsigned long cfg_hash) if (config_desc->available == false) return -EBUSY; - /* must ensure that config cannot be unloaded in use */ - err = cscfg_owner_get(config_desc->load_owner); - if (err) + if (!cscfg_config_desc_get(config_desc)) { + err = -EINVAL; break; + } + /* * increment the global active count - control changes to * active configurations */ atomic_inc(&cscfg_mgr->sys_active_cnt); - /* - * mark the descriptor as active so enable config on a - * device instance will use it - */ - atomic_inc(&config_desc->active_cnt); - err = 0; dev_dbg(cscfg_device(), "Activate config %s.\n", config_desc->name); break; @@ -920,9 +936,8 @@ static void _cscfg_deactivate_config(unsigned long cfg_hash) list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) { if ((unsigned long)config_desc->event_ea->var == cfg_hash) { - atomic_dec(&config_desc->active_cnt); atomic_dec(&cscfg_mgr->sys_active_cnt); - cscfg_owner_put(config_desc->load_owner); + cscfg_config_desc_put(config_desc); dev_dbg(cscfg_device(), "Deactivate config %s.\n", config_desc->name); break; } @@ -1047,7 +1062,7 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, unsigned long cfg_hash, int preset) { struct cscfg_config_csdev *config_csdev_active = NULL, *config_csdev_item; - const struct cscfg_config_desc *config_desc; + struct cscfg_config_desc *config_desc; unsigned long flags; int err = 0; @@ -1062,8 +1077,8 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags); list_for_each_entry(config_csdev_item, &csdev->config_csdev_list, node) { config_desc = config_csdev_item->config_desc; - if ((atomic_read(&config_desc->active_cnt)) && - ((unsigned long)config_desc->event_ea->var == cfg_hash)) { + if (((unsigned long)config_desc->event_ea->var == cfg_hash) && + cscfg_config_desc_get(config_desc)) { config_csdev_active = config_csdev_item; csdev->active_cscfg_ctxt = (void *)config_csdev_active; break; @@ -1097,7 +1112,11 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev, err = -EBUSY; raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); } + + if (err) + cscfg_config_desc_put(config_desc); } + return err; } EXPORT_SYMBOL_GPL(cscfg_csdev_enable_active_config); @@ -1136,8 +1155,10 @@ void cscfg_csdev_disable_active_config(struct coresight_device *csdev) raw_spin_unlock_irqrestore(&csdev->cscfg_csdev_lock, flags); /* true if there was an enabled active config */ - if (config_csdev) + if (config_csdev) { cscfg_csdev_disable_config(config_csdev); + cscfg_config_desc_put(config_csdev->config_desc); + } } EXPORT_SYMBOL_GPL(cscfg_csdev_disable_active_config); diff --git a/drivers/hwtracing/coresight/coresight-tmc-core.c b/drivers/hwtracing/coresight/coresight-tmc-core.c index a7814e8e657b21..455b1c9b15682c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-core.c +++ b/drivers/hwtracing/coresight/coresight-tmc-core.c @@ -1060,7 +1060,7 @@ static struct platform_driver tmc_platform_driver = { static int __init tmc_init(void) { - return coresight_init_driver("tmc", &tmc_driver, &tmc_platform_driver); + return coresight_init_driver("tmc", &tmc_driver, &tmc_platform_driver, THIS_MODULE); } static void __exit tmc_exit(void) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index d858740001c27d..a922e3b709638d 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -747,7 +747,6 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) char *buf = NULL; enum tmc_mode mode; unsigned long flags; - int rc = 0; /* config types are set a boot time and never change */ if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETB && @@ -773,11 +772,11 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) * can't be NULL. */ memset(drvdata->buf, 0, drvdata->size); - rc = __tmc_etb_enable_hw(drvdata); - if (rc) { - raw_spin_unlock_irqrestore(&drvdata->spinlock, flags); - return rc; - } + /* + * Ignore failures to enable the TMC to make sure, we don't + * leave the TMC in a "reading" state. + */ + __tmc_etb_enable_hw(drvdata); } else { /* * The ETB/ETF is not tracing and the buffer was just read. diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 97ef36f03ec207..3e015928842808 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -318,7 +318,7 @@ static struct platform_driver tpiu_platform_driver = { static int __init tpiu_init(void) { - return coresight_init_driver("tpiu", &tpiu_driver, &tpiu_platform_driver); + return coresight_init_driver("tpiu", &tpiu_driver, &tpiu_platform_driver, THIS_MODULE); } static void __exit tpiu_exit(void) diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig index 4b6359326ede99..4f7d2b6d79e294 100644 --- a/drivers/hwtracing/intel_th/Kconfig +++ b/drivers/hwtracing/intel_th/Kconfig @@ -60,6 +60,7 @@ config INTEL_TH_STH config INTEL_TH_MSU tristate "Intel(R) Trace Hub Memory Storage Unit" + depends on MMU help Memory Storage Unit (MSU) trace output device enables storing STP traces to system memory. It supports single diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index bf99d79a419204..7163950eb3719c 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include @@ -976,7 +977,6 @@ static void msc_buffer_contig_free(struct msc *msc) for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) { struct page *page = virt_to_page(msc->base + off); - page->mapping = NULL; __free_page(page); } @@ -1158,9 +1158,6 @@ static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win) int i; for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) { - struct page *page = msc_sg_page(sg); - - page->mapping = NULL; dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, sg_virt(sg), sg_dma_address(sg)); } @@ -1601,22 +1598,10 @@ static void msc_mmap_close(struct vm_area_struct *vma) { struct msc_iter *iter = vma->vm_file->private_data; struct msc *msc = iter->msc; - unsigned long pg; if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex)) return; - /* drop page _refcounts */ - for (pg = 0; pg < msc->nr_pages; pg++) { - struct page *page = msc_buffer_get_page(msc, pg); - - if (WARN_ON_ONCE(!page)) - continue; - - if (page->mapping) - page->mapping = NULL; - } - /* last mapping -- drop user_count */ atomic_dec(&msc->user_count); mutex_unlock(&msc->buf_mutex); @@ -1626,16 +1611,14 @@ static vm_fault_t msc_mmap_fault(struct vm_fault *vmf) { struct msc_iter *iter = vmf->vma->vm_file->private_data; struct msc *msc = iter->msc; + struct page *page; - vmf->page = msc_buffer_get_page(msc, vmf->pgoff); - if (!vmf->page) + page = msc_buffer_get_page(msc, vmf->pgoff); + if (!page) return VM_FAULT_SIGBUS; - get_page(vmf->page); - vmf->page->mapping = vmf->vma->vm_file->f_mapping; - vmf->page->index = vmf->pgoff; - - return 0; + get_page(page); + return vmf_insert_mixed(vmf->vma, vmf->address, page_to_pfn_t(page)); } static const struct vm_operations_struct msc_mmap_ops = { @@ -1676,7 +1659,7 @@ static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma) atomic_dec(&msc->user_count); vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY); + vm_flags_set(vma, VM_DONTEXPAND | VM_DONTCOPY | VM_MIXEDMAP); vma->vm_ops = &msc_mmap_ops; return ret; } diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index 43bf90d90eebab..208ce4f9e782cd 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -247,6 +247,9 @@ static int ec_i2c_probe(struct platform_device *pdev) u32 remote_bus; int err; + if (!ec) + return dev_err_probe(dev, -EPROBE_DEFER, "couldn't find parent EC device\n"); + if (!ec->cmd_xfer) { dev_err(dev, "Missing sendrecv\n"); return -EINVAL; diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 8e0267c7cc294e..f21f9877c04047 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -278,9 +278,11 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev, if ((dev->flags & MODEL_MASK) == MODEL_AMD_NAVI_GPU) { dev->slave = i2c_new_ccgx_ucsi(&dev->adapter, dev->irq, &dgpu_node); - if (IS_ERR(dev->slave)) + if (IS_ERR(dev->slave)) { + i2c_del_adapter(&dev->adapter); return dev_err_probe(device, PTR_ERR(dev->slave), "register UCSI failed\n"); + } } pm_runtime_set_autosuspend_delay(device, 1000); diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 0d4b3935e68732..342d47e675869d 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -1380,9 +1380,9 @@ static int lpi2c_imx_probe(struct platform_device *pdev) return 0; rpm_disable: - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); return ret; } diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 16afb9ca19bba9..876791d20ed55e 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1454,7 +1454,7 @@ omap_i2c_probe(struct platform_device *pdev) (1000 * omap->speed / 8); } - if (of_property_read_bool(node, "mux-states")) { + if (of_property_present(node, "mux-states")) { struct mux_state *mux_state; mux_state = devm_mux_state_get(&pdev->dev, NULL); diff --git a/drivers/i2c/i2c-atr.c b/drivers/i2c/i2c-atr.c index 8fe9ddff8e96f6..783fb8df2ebee9 100644 --- a/drivers/i2c/i2c-atr.c +++ b/drivers/i2c/i2c-atr.c @@ -8,12 +8,12 @@ * Originally based on i2c-mux.c */ -#include #include #include #include #include #include +#include #include #include diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c index 8601b9a8b8e75c..5127e58eebc7d9 100644 --- a/drivers/iio/accel/adis16201.c +++ b/drivers/iio/accel/adis16201.c @@ -211,9 +211,9 @@ static const struct iio_chan_spec adis16201_channels[] = { BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), ADIS_AUX_ADC_CHAN(ADIS16201_AUX_ADC_REG, ADIS16201_SCAN_AUX_ADC, 0, 12), ADIS_INCLI_CHAN(X, ADIS16201_XINCL_OUT_REG, ADIS16201_SCAN_INCLI_X, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), ADIS_INCLI_CHAN(Y, ADIS16201_YINCL_OUT_REG, ADIS16201_SCAN_INCLI_Y, - BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 14), + BIT(IIO_CHAN_INFO_CALIBBIAS), 0, 12), IIO_CHAN_SOFT_TIMESTAMP(7) }; diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index e8cd21fa77a698..cbac622ef82117 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -231,7 +231,7 @@ struct adxl355_data { u8 transf_buf[3]; struct { u8 buf[14]; - s64 ts; + aligned_s64 ts; } buffer; } __aligned(IIO_DMA_MINALIGN); }; diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index add4053e7a02e7..0c04b2bb7efbf7 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -601,18 +601,14 @@ static int _adxl367_set_odr(struct adxl367_state *st, enum adxl367_odr odr) if (ret) return ret; + st->odr = odr; + /* Activity timers depend on ODR */ ret = _adxl367_set_act_time_ms(st, st->act_time_ms); if (ret) return ret; - ret = _adxl367_set_inact_time_ms(st, st->inact_time_ms); - if (ret) - return ret; - - st->odr = odr; - - return 0; + return _adxl367_set_inact_time_ms(st, st->inact_time_ms); } static int adxl367_set_odr(struct iio_dev *indio_dev, enum adxl367_odr odr) diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 48e4282964a069..bf1d3923a18179 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -1226,8 +1226,11 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq) if (ret) return ret; - if (device_property_read_bool(dev, "wakeup-source")) - device_init_wakeup(dev, true); + if (device_property_read_bool(dev, "wakeup-source")) { + ret = devm_device_init_wakeup(dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to init wakeup\n"); + } return devm_iio_device_register(dev, indio_dev); } diff --git a/drivers/iio/adc/ad4851.c b/drivers/iio/adc/ad4851.c index 98ebc853db7962..f1d2e2896f2a2d 100644 --- a/drivers/iio/adc/ad4851.c +++ b/drivers/iio/adc/ad4851.c @@ -1034,7 +1034,7 @@ static int ad4858_parse_channels(struct iio_dev *indio_dev) struct device *dev = &st->spi->dev; struct iio_chan_spec *ad4851_channels; const struct iio_chan_spec ad4851_chan = AD4858_IIO_CHANNEL; - int ret; + int ret, i = 0; ret = ad4851_parse_channels_common(indio_dev, &ad4851_channels, ad4851_chan); @@ -1042,15 +1042,15 @@ static int ad4858_parse_channels(struct iio_dev *indio_dev) return ret; device_for_each_child_node_scoped(dev, child) { - ad4851_channels->has_ext_scan_type = 1; + ad4851_channels[i].has_ext_scan_type = 1; if (fwnode_property_read_bool(child, "bipolar")) { - ad4851_channels->ext_scan_type = ad4851_scan_type_20_b; - ad4851_channels->num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_b); + ad4851_channels[i].ext_scan_type = ad4851_scan_type_20_b; + ad4851_channels[i].num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_b); } else { - ad4851_channels->ext_scan_type = ad4851_scan_type_20_u; - ad4851_channels->num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_u); + ad4851_channels[i].ext_scan_type = ad4851_scan_type_20_u; + ad4851_channels[i].num_ext_scan_type = ARRAY_SIZE(ad4851_scan_type_20_u); } - ad4851_channels++; + i++; } indio_dev->channels = ad4851_channels; diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 3ea81a98e45534..7d5d84a07cae1d 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -301,9 +301,9 @@ static int ad7124_get_3db_filter_freq(struct ad7124_state *st, switch (st->channels[channel].cfg.filter_type) { case AD7124_SINC3_FILTER: - return DIV_ROUND_CLOSEST(fadc * 230, 1000); + return DIV_ROUND_CLOSEST(fadc * 272, 1000); case AD7124_SINC4_FILTER: - return DIV_ROUND_CLOSEST(fadc * 262, 1000); + return DIV_ROUND_CLOSEST(fadc * 230, 1000); default: return -EINVAL; } diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 18559757f9085c..7fef2727f89e98 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -45,7 +45,7 @@ struct ad7266_state { */ struct { __be16 sample[2]; - s64 timestamp; + aligned_s64 timestamp; } data __aligned(IIO_DMA_MINALIGN); }; diff --git a/drivers/iio/adc/ad7380.c b/drivers/iio/adc/ad7380.c index 4fcb49fdf56639..aef85093eb16cb 100644 --- a/drivers/iio/adc/ad7380.c +++ b/drivers/iio/adc/ad7380.c @@ -1211,6 +1211,9 @@ static int ad7380_offload_buffer_predisable(struct iio_dev *indio_dev) struct ad7380_state *st = iio_priv(indio_dev); int ret; + spi_offload_trigger_disable(st->offload, st->offload_trigger); + spi_unoptimize_message(&st->offload_msg); + if (st->seq) { ret = regmap_update_bits(st->regmap, AD7380_REG_ADDR_CONFIG1, @@ -1222,10 +1225,6 @@ static int ad7380_offload_buffer_predisable(struct iio_dev *indio_dev) st->seq = false; } - spi_offload_trigger_disable(st->offload, st->offload_trigger); - - spi_unoptimize_message(&st->offload_msg); - return 0; } @@ -1611,11 +1610,25 @@ static int ad7380_write_event_config(struct iio_dev *indio_dev, return ret; } -static int ad7380_get_alert_th(struct ad7380_state *st, +static int ad7380_get_alert_th(struct iio_dev *indio_dev, + const struct iio_chan_spec *chan, enum iio_event_direction dir, int *val) { - int ret, tmp; + struct ad7380_state *st = iio_priv(indio_dev); + const struct iio_scan_type *scan_type; + int ret, tmp, shift; + + scan_type = iio_get_current_scan_type(indio_dev, chan); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + /* + * The register value is 12-bits and is compared to the most significant + * bits of raw value, therefore a shift is required to convert this to + * the same scale as the raw value. + */ + shift = scan_type->realbits - 12; switch (dir) { case IIO_EV_DIR_RISING: @@ -1625,7 +1638,7 @@ static int ad7380_get_alert_th(struct ad7380_state *st, if (ret) return ret; - *val = FIELD_GET(AD7380_ALERT_HIGH_TH, tmp); + *val = FIELD_GET(AD7380_ALERT_HIGH_TH, tmp) << shift; return IIO_VAL_INT; case IIO_EV_DIR_FALLING: ret = regmap_read(st->regmap, @@ -1634,7 +1647,7 @@ static int ad7380_get_alert_th(struct ad7380_state *st, if (ret) return ret; - *val = FIELD_GET(AD7380_ALERT_LOW_TH, tmp); + *val = FIELD_GET(AD7380_ALERT_LOW_TH, tmp) << shift; return IIO_VAL_INT; default: return -EINVAL; @@ -1648,7 +1661,6 @@ static int ad7380_read_event_value(struct iio_dev *indio_dev, enum iio_event_info info, int *val, int *val2) { - struct ad7380_state *st = iio_priv(indio_dev); int ret; switch (info) { @@ -1656,7 +1668,7 @@ static int ad7380_read_event_value(struct iio_dev *indio_dev, if (!iio_device_claim_direct(indio_dev)) return -EBUSY; - ret = ad7380_get_alert_th(st, dir, val); + ret = ad7380_get_alert_th(indio_dev, chan, dir, val); iio_device_release_direct(indio_dev); return ret; diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 1a314fddd7eb98..703556eb7257ea 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -1236,9 +1236,11 @@ static int ad7616_sw_mode_setup(struct iio_dev *indio_dev) st->write_scale = ad7616_write_scale_sw; st->write_os = &ad7616_write_os_sw; - ret = st->bops->sw_mode_config(indio_dev); - if (ret) - return ret; + if (st->bops->sw_mode_config) { + ret = st->bops->sw_mode_config(indio_dev); + if (ret) + return ret; + } /* Activate Burst mode and SEQEN MODE */ return ad7606_write_mask(st, AD7616_CONFIGURATION_REGISTER, @@ -1268,6 +1270,9 @@ static int ad7606b_sw_mode_setup(struct iio_dev *indio_dev) st->write_scale = ad7606_write_scale_sw; st->write_os = &ad7606_write_os_sw; + if (!st->bops->sw_mode_config) + return 0; + return st->bops->sw_mode_config(indio_dev); } diff --git a/drivers/iio/adc/ad7606_spi.c b/drivers/iio/adc/ad7606_spi.c index 885bf0b68e7775..179115e909888b 100644 --- a/drivers/iio/adc/ad7606_spi.c +++ b/drivers/iio/adc/ad7606_spi.c @@ -131,7 +131,7 @@ static int ad7606_spi_reg_read(struct ad7606_state *st, unsigned int addr) { .tx_buf = &st->d16[0], .len = 2, - .cs_change = 0, + .cs_change = 1, }, { .rx_buf = &st->d16[1], .len = 2, diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 5a863005aca6d1..5e0be36af0c5c2 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -168,7 +168,7 @@ struct ad7768_state { union { struct { __be32 chan; - s64 timestamp; + aligned_s64 timestamp; } scan; __be32 d32; u8 d8[2]; diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index a1e48a756a7b51..359e26e3f5bcfe 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -466,7 +466,7 @@ static irqreturn_t dln2_adc_trigger_h(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct { __le16 values[DLN2_ADC_MAX_CHANNELS]; - int64_t timestamp_space; + aligned_s64 timestamp_space; } data; struct dln2_adc_get_all_vals dev_data; struct dln2_adc *dln2 = iio_priv(indio_dev); diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c index 6748b44d568db6..60a19c35807ab7 100644 --- a/drivers/iio/adc/mcp3911.c +++ b/drivers/iio/adc/mcp3911.c @@ -6,7 +6,7 @@ * Copyright (C) 2018 Kent Gustavsson */ #include -#include +#include #include #include #include @@ -79,6 +79,8 @@ #define MCP3910_CONFIG1_CLKEXT BIT(6) #define MCP3910_CONFIG1_VREFEXT BIT(7) +#define MCP3910_CHANNEL(ch) (MCP3911_REG_CHANNEL0 + (ch)) + #define MCP3910_REG_OFFCAL_CH0 0x0f #define MCP3910_OFFCAL(ch) (MCP3910_REG_OFFCAL_CH0 + (ch) * 6) @@ -110,6 +112,7 @@ struct mcp3911_chip_info { int (*get_offset)(struct mcp3911 *adc, int channel, int *val); int (*set_offset)(struct mcp3911 *adc, int channel, int val); int (*set_scale)(struct mcp3911 *adc, int channel, u32 val); + int (*get_raw)(struct mcp3911 *adc, int channel, int *val); }; struct mcp3911 { @@ -170,6 +173,18 @@ static int mcp3911_update(struct mcp3911 *adc, u8 reg, u32 mask, u32 val, u8 len return mcp3911_write(adc, reg, val, len); } +static int mcp3911_read_s24(struct mcp3911 *const adc, u8 const reg, s32 *const val) +{ + u32 uval; + int const ret = mcp3911_read(adc, reg, &uval, 3); + + if (ret) + return ret; + + *val = sign_extend32(uval, 23); + return ret; +} + static int mcp3910_enable_offset(struct mcp3911 *adc, bool enable) { unsigned int mask = MCP3910_CONFIG0_EN_OFFCAL; @@ -194,6 +209,11 @@ static int mcp3910_set_offset(struct mcp3911 *adc, int channel, int val) return adc->chip->enable_offset(adc, 1); } +static int mcp3910_get_raw(struct mcp3911 *adc, int channel, s32 *val) +{ + return mcp3911_read_s24(adc, MCP3910_CHANNEL(channel), val); +} + static int mcp3911_enable_offset(struct mcp3911 *adc, bool enable) { unsigned int mask = MCP3911_STATUSCOM_EN_OFFCAL; @@ -218,6 +238,11 @@ static int mcp3911_set_offset(struct mcp3911 *adc, int channel, int val) return adc->chip->enable_offset(adc, 1); } +static int mcp3911_get_raw(struct mcp3911 *adc, int channel, s32 *val) +{ + return mcp3911_read_s24(adc, MCP3911_CHANNEL(channel), val); +} + static int mcp3910_get_osr(struct mcp3911 *adc, u32 *val) { int ret; @@ -321,12 +346,9 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev, guard(mutex)(&adc->lock); switch (mask) { case IIO_CHAN_INFO_RAW: - ret = mcp3911_read(adc, - MCP3911_CHANNEL(channel->channel), val, 3); + ret = adc->chip->get_raw(adc, channel->channel, val); if (ret) return ret; - - *val = sign_extend32(*val, 23); return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: ret = adc->chip->get_offset(adc, channel->channel, val); @@ -799,6 +821,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3911] = { .channels = mcp3911_channels, @@ -810,6 +833,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3911_get_offset, .set_offset = mcp3911_set_offset, .set_scale = mcp3911_set_scale, + .get_raw = mcp3911_get_raw, }, [MCP3912] = { .channels = mcp3912_channels, @@ -821,6 +845,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3913] = { .channels = mcp3913_channels, @@ -832,6 +857,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3914] = { .channels = mcp3914_channels, @@ -843,6 +869,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3918] = { .channels = mcp3918_channels, @@ -854,6 +881,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, [MCP3919] = { .channels = mcp3919_channels, @@ -865,6 +893,7 @@ static const struct mcp3911_chip_info mcp3911_chip_info[] = { .get_offset = mcp3910_get_offset, .set_offset = mcp3910_set_offset, .set_scale = mcp3910_set_scale, + .get_raw = mcp3910_get_raw, }, }; static const struct of_device_id mcp3911_dt_ids[] = { diff --git a/drivers/iio/adc/pac1934.c b/drivers/iio/adc/pac1934.c index 20802b7f49ea84..09fe88eb3fb045 100644 --- a/drivers/iio/adc/pac1934.c +++ b/drivers/iio/adc/pac1934.c @@ -1081,7 +1081,7 @@ static int pac1934_chip_identify(struct pac1934_chip_info *info) /* * documentation related to the ACPI device definition - * https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ApplicationNotes/ApplicationNotes/PAC1934-Integration-Notes-for-Microsoft-Windows-10-and-Windows-11-Driver-Support-DS00002534.pdf + * https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ApplicationNotes/ApplicationNotes/PAC193X-Integration-Notes-for-Microsoft-Windows-10-and-Windows-11-Driver-Support-DS00002534.pdf */ static int pac1934_acpi_parse_channel_config(struct i2c_client *client, struct pac1934_chip_info *info) diff --git a/drivers/iio/adc/qcom-spmi-iadc.c b/drivers/iio/adc/qcom-spmi-iadc.c index 7fb8b2499a1d00..b64a8a407168bb 100644 --- a/drivers/iio/adc/qcom-spmi-iadc.c +++ b/drivers/iio/adc/qcom-spmi-iadc.c @@ -543,7 +543,9 @@ static int iadc_probe(struct platform_device *pdev) else return ret; } else { - device_init_wakeup(iadc->dev, 1); + ret = devm_device_init_wakeup(iadc->dev); + if (ret) + return dev_err_probe(iadc->dev, ret, "Failed to init wakeup\n"); } ret = iadc_update_offset(iadc); diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 9a099df7951891..5e28bd28b81a9a 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -520,15 +520,6 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (info->reset) rockchip_saradc_reset_controller(info->reset); - /* - * Use a default value for the converter clock. - * This may become user-configurable in the future. - */ - ret = clk_set_rate(info->clk, info->data->clk_rate); - if (ret < 0) - return dev_err_probe(&pdev->dev, ret, - "failed to set adc clk rate\n"); - ret = regulator_enable(info->vref); if (ret < 0) return dev_err_probe(&pdev->dev, ret, @@ -555,6 +546,14 @@ static int rockchip_saradc_probe(struct platform_device *pdev) if (IS_ERR(info->clk)) return dev_err_probe(&pdev->dev, PTR_ERR(info->clk), "failed to get adc clock\n"); + /* + * Use a default value for the converter clock. + * This may become user-configurable in the future. + */ + ret = clk_set_rate(info->clk, info->data->clk_rate); + if (ret < 0) + return dev_err_probe(&pdev->dev, ret, + "failed to set adc clk rate\n"); platform_set_drvdata(pdev, indio_dev); diff --git a/drivers/iio/chemical/pms7003.c b/drivers/iio/chemical/pms7003.c index d0bd94912e0a34..e05ce1f12065c6 100644 --- a/drivers/iio/chemical/pms7003.c +++ b/drivers/iio/chemical/pms7003.c @@ -5,7 +5,6 @@ * Copyright (c) Tomasz Duszynski */ -#include #include #include #include @@ -19,6 +18,8 @@ #include #include #include +#include +#include #define PMS7003_DRIVER_NAME "pms7003" @@ -76,7 +77,7 @@ struct pms7003_state { /* Used to construct scan to push to the IIO buffer */ struct { u16 data[3]; /* PM1, PM2P5, PM10 */ - s64 ts; + aligned_s64 ts; } scan; }; diff --git a/drivers/iio/chemical/sps30.c b/drivers/iio/chemical/sps30.c index 6f4f2ba2c09d5e..a7888146188d09 100644 --- a/drivers/iio/chemical/sps30.c +++ b/drivers/iio/chemical/sps30.c @@ -108,7 +108,7 @@ static irqreturn_t sps30_trigger_handler(int irq, void *p) int ret; struct { s32 data[4]; /* PM1, PM2P5, PM4, PM10 */ - s64 ts; + aligned_s64 ts; } scan; mutex_lock(&state->lock); diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index ad1882f608c0a2..2055a03cbeb187 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -66,6 +66,10 @@ static struct { {HID_USAGE_SENSOR_HUMIDITY, 0, 1000, 0}, {HID_USAGE_SENSOR_HINGE, 0, 0, 17453293}, {HID_USAGE_SENSOR_HINGE, HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453293}, + + {HID_USAGE_SENSOR_HUMAN_PRESENCE, 0, 1, 0}, + {HID_USAGE_SENSOR_HUMAN_PROXIMITY, 0, 1, 0}, + {HID_USAGE_SENSOR_HUMAN_ATTENTION, 0, 1, 0}, }; static void simple_div(int dividend, int divisor, int *whole, diff --git a/drivers/iio/dac/adi-axi-dac.c b/drivers/iio/dac/adi-axi-dac.c index 892d770aec69c4..05b374e137d35d 100644 --- a/drivers/iio/dac/adi-axi-dac.c +++ b/drivers/iio/dac/adi-axi-dac.c @@ -707,6 +707,7 @@ static int axi_dac_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val, { struct axi_dac_state *st = iio_backend_get_priv(back); int ret; + u32 ival; guard(mutex)(&st->lock); @@ -719,6 +720,13 @@ static int axi_dac_bus_reg_read(struct iio_backend *back, u32 reg, u32 *val, if (ret) return ret; + ret = regmap_read_poll_timeout(st->regmap, + AXI_DAC_UI_STATUS_REG, ival, + FIELD_GET(AXI_DAC_UI_STATUS_IF_BUSY, ival) == 0, + 10, 100 * KILO); + if (ret) + return ret; + return regmap_read(st->regmap, AXI_DAC_CUSTOM_RD_REG, val); } diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c index d85b7d3de86604..cc8ce0fe74e7c6 100644 --- a/drivers/iio/filter/admv8818.c +++ b/drivers/iio/filter/admv8818.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,16 @@ #define ADMV8818_HPF_WR0_MSK GENMASK(7, 4) #define ADMV8818_LPF_WR0_MSK GENMASK(3, 0) +#define ADMV8818_BAND_BYPASS 0 +#define ADMV8818_BAND_MIN 1 +#define ADMV8818_BAND_MAX 4 +#define ADMV8818_BAND_CORNER_LOW 0 +#define ADMV8818_BAND_CORNER_HIGH 1 + +#define ADMV8818_STATE_MIN 0 +#define ADMV8818_STATE_MAX 15 +#define ADMV8818_NUM_STATES 16 + enum { ADMV8818_BW_FREQ, ADMV8818_CENTER_FREQ @@ -90,20 +101,24 @@ struct admv8818_state { struct mutex lock; unsigned int filter_mode; u64 cf_hz; + u64 lpf_margin_hz; + u64 hpf_margin_hz; }; -static const unsigned long long freq_range_hpf[4][2] = { +static const unsigned long long freq_range_hpf[5][2] = { + {0ULL, 0ULL}, /* bypass */ {1750000000ULL, 3550000000ULL}, {3400000000ULL, 7250000000ULL}, {6600000000, 12000000000}, {12500000000, 19900000000} }; -static const unsigned long long freq_range_lpf[4][2] = { +static const unsigned long long freq_range_lpf[5][2] = { + {U64_MAX, U64_MAX}, /* bypass */ {2050000000ULL, 3850000000ULL}, {3350000000ULL, 7250000000ULL}, {7000000000, 13000000000}, - {12550000000, 18500000000} + {12550000000, 18850000000} }; static const struct regmap_config admv8818_regmap_config = { @@ -121,44 +136,59 @@ static const char * const admv8818_modes[] = { static int __admv8818_hpf_select(struct admv8818_state *st, u64 freq) { - unsigned int hpf_step = 0, hpf_band = 0, i, j; - u64 freq_step; - int ret; + int band, state, ret; + unsigned int hpf_state = ADMV8818_STATE_MIN, hpf_band = ADMV8818_BAND_BYPASS; + u64 freq_error, min_freq_error, freq_corner, freq_step; - if (freq < freq_range_hpf[0][0]) + if (freq < freq_range_hpf[ADMV8818_BAND_MIN][ADMV8818_BAND_CORNER_LOW]) goto hpf_write; - if (freq > freq_range_hpf[3][1]) { - hpf_step = 15; - hpf_band = 4; - + if (freq >= freq_range_hpf[ADMV8818_BAND_MAX][ADMV8818_BAND_CORNER_HIGH]) { + hpf_state = ADMV8818_STATE_MAX; + hpf_band = ADMV8818_BAND_MAX; goto hpf_write; } - for (i = 0; i < 4; i++) { - freq_step = div_u64((freq_range_hpf[i][1] - - freq_range_hpf[i][0]), 15); + /* Close HPF frequency gap between 12 and 12.5 GHz */ + if (freq >= 12000ULL * HZ_PER_MHZ && freq < 12500ULL * HZ_PER_MHZ) { + hpf_state = ADMV8818_STATE_MAX; + hpf_band = 3; + goto hpf_write; + } - if (freq > freq_range_hpf[i][0] && - (freq < freq_range_hpf[i][1] + freq_step)) { - hpf_band = i + 1; + min_freq_error = U64_MAX; + for (band = ADMV8818_BAND_MIN; band <= ADMV8818_BAND_MAX; band++) { + /* + * This (and therefore all other ranges) have a corner + * frequency higher than the target frequency. + */ + if (freq_range_hpf[band][ADMV8818_BAND_CORNER_LOW] > freq) + break; - for (j = 1; j <= 16; j++) { - if (freq < (freq_range_hpf[i][0] + (freq_step * j))) { - hpf_step = j - 1; - break; - } + freq_step = freq_range_hpf[band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_hpf[band][ADMV8818_BAND_CORNER_LOW]; + freq_step = div_u64(freq_step, ADMV8818_NUM_STATES - 1); + + for (state = ADMV8818_STATE_MIN; state <= ADMV8818_STATE_MAX; state++) { + freq_corner = freq_range_hpf[band][ADMV8818_BAND_CORNER_LOW] + + freq_step * state; + + /* + * This (and therefore all other states) have a corner + * frequency higher than the target frequency. + */ + if (freq_corner > freq) + break; + + freq_error = freq - freq_corner; + if (freq_error < min_freq_error) { + min_freq_error = freq_error; + hpf_state = state; + hpf_band = band; } - break; } } - /* Close HPF frequency gap between 12 and 12.5 GHz */ - if (freq >= 12000 * HZ_PER_MHZ && freq <= 12500 * HZ_PER_MHZ) { - hpf_band = 3; - hpf_step = 15; - } - hpf_write: ret = regmap_update_bits(st->regmap, ADMV8818_REG_WR0_SW, ADMV8818_SW_IN_SET_WR0_MSK | @@ -170,7 +200,7 @@ static int __admv8818_hpf_select(struct admv8818_state *st, u64 freq) return regmap_update_bits(st->regmap, ADMV8818_REG_WR0_FILTER, ADMV8818_HPF_WR0_MSK, - FIELD_PREP(ADMV8818_HPF_WR0_MSK, hpf_step)); + FIELD_PREP(ADMV8818_HPF_WR0_MSK, hpf_state)); } static int admv8818_hpf_select(struct admv8818_state *st, u64 freq) @@ -186,31 +216,52 @@ static int admv8818_hpf_select(struct admv8818_state *st, u64 freq) static int __admv8818_lpf_select(struct admv8818_state *st, u64 freq) { - unsigned int lpf_step = 0, lpf_band = 0, i, j; - u64 freq_step; - int ret; + int band, state, ret; + unsigned int lpf_state = ADMV8818_STATE_MIN, lpf_band = ADMV8818_BAND_BYPASS; + u64 freq_error, min_freq_error, freq_corner, freq_step; - if (freq > freq_range_lpf[3][1]) + if (freq > freq_range_lpf[ADMV8818_BAND_MAX][ADMV8818_BAND_CORNER_HIGH]) goto lpf_write; - if (freq < freq_range_lpf[0][0]) { - lpf_band = 1; - + if (freq < freq_range_lpf[ADMV8818_BAND_MIN][ADMV8818_BAND_CORNER_LOW]) { + lpf_state = ADMV8818_STATE_MIN; + lpf_band = ADMV8818_BAND_MIN; goto lpf_write; } - for (i = 0; i < 4; i++) { - if (freq > freq_range_lpf[i][0] && freq < freq_range_lpf[i][1]) { - lpf_band = i + 1; - freq_step = div_u64((freq_range_lpf[i][1] - freq_range_lpf[i][0]), 15); + min_freq_error = U64_MAX; + for (band = ADMV8818_BAND_MAX; band >= ADMV8818_BAND_MIN; --band) { + /* + * At this point the highest corner frequency of + * all remaining ranges is below the target. + * LPF corner should be >= the target. + */ + if (freq > freq_range_lpf[band][ADMV8818_BAND_CORNER_HIGH]) + break; + + freq_step = freq_range_lpf[band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_lpf[band][ADMV8818_BAND_CORNER_LOW]; + freq_step = div_u64(freq_step, ADMV8818_NUM_STATES - 1); + + for (state = ADMV8818_STATE_MAX; state >= ADMV8818_STATE_MIN; --state) { + + freq_corner = freq_range_lpf[band][ADMV8818_BAND_CORNER_LOW] + + state * freq_step; - for (j = 0; j <= 15; j++) { - if (freq < (freq_range_lpf[i][0] + (freq_step * j))) { - lpf_step = j; - break; - } + /* + * At this point all other states in range will + * place the corner frequency below the target + * LPF corner should >= the target. + */ + if (freq > freq_corner) + break; + + freq_error = freq_corner - freq; + if (freq_error < min_freq_error) { + min_freq_error = freq_error; + lpf_state = state; + lpf_band = band; } - break; } } @@ -225,7 +276,7 @@ static int __admv8818_lpf_select(struct admv8818_state *st, u64 freq) return regmap_update_bits(st->regmap, ADMV8818_REG_WR0_FILTER, ADMV8818_LPF_WR0_MSK, - FIELD_PREP(ADMV8818_LPF_WR0_MSK, lpf_step)); + FIELD_PREP(ADMV8818_LPF_WR0_MSK, lpf_state)); } static int admv8818_lpf_select(struct admv8818_state *st, u64 freq) @@ -242,16 +293,28 @@ static int admv8818_lpf_select(struct admv8818_state *st, u64 freq) static int admv8818_rfin_band_select(struct admv8818_state *st) { int ret; + u64 hpf_corner_target, lpf_corner_target; st->cf_hz = clk_get_rate(st->clkin); + /* Check for underflow */ + if (st->cf_hz > st->hpf_margin_hz) + hpf_corner_target = st->cf_hz - st->hpf_margin_hz; + else + hpf_corner_target = 0; + + /* Check for overflow */ + lpf_corner_target = st->cf_hz + st->lpf_margin_hz; + if (lpf_corner_target < st->cf_hz) + lpf_corner_target = U64_MAX; + mutex_lock(&st->lock); - ret = __admv8818_hpf_select(st, st->cf_hz); + ret = __admv8818_hpf_select(st, hpf_corner_target); if (ret) goto exit; - ret = __admv8818_lpf_select(st, st->cf_hz); + ret = __admv8818_lpf_select(st, lpf_corner_target); exit: mutex_unlock(&st->lock); return ret; @@ -278,8 +341,11 @@ static int __admv8818_read_hpf_freq(struct admv8818_state *st, u64 *hpf_freq) hpf_state = FIELD_GET(ADMV8818_HPF_WR0_MSK, data); - *hpf_freq = div_u64(freq_range_hpf[hpf_band - 1][1] - freq_range_hpf[hpf_band - 1][0], 15); - *hpf_freq = freq_range_hpf[hpf_band - 1][0] + (*hpf_freq * hpf_state); + *hpf_freq = freq_range_hpf[hpf_band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_hpf[hpf_band][ADMV8818_BAND_CORNER_LOW]; + *hpf_freq = div_u64(*hpf_freq, ADMV8818_NUM_STATES - 1); + *hpf_freq = freq_range_hpf[hpf_band][ADMV8818_BAND_CORNER_LOW] + + (*hpf_freq * hpf_state); return ret; } @@ -316,8 +382,11 @@ static int __admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq) lpf_state = FIELD_GET(ADMV8818_LPF_WR0_MSK, data); - *lpf_freq = div_u64(freq_range_lpf[lpf_band - 1][1] - freq_range_lpf[lpf_band - 1][0], 15); - *lpf_freq = freq_range_lpf[lpf_band - 1][0] + (*lpf_freq * lpf_state); + *lpf_freq = freq_range_lpf[lpf_band][ADMV8818_BAND_CORNER_HIGH] - + freq_range_lpf[lpf_band][ADMV8818_BAND_CORNER_LOW]; + *lpf_freq = div_u64(*lpf_freq, ADMV8818_NUM_STATES - 1); + *lpf_freq = freq_range_lpf[lpf_band][ADMV8818_BAND_CORNER_LOW] + + (*lpf_freq * lpf_state); return ret; } @@ -333,6 +402,19 @@ static int admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq) return ret; } +static int admv8818_write_raw_get_fmt(struct iio_dev *indio_dev, + struct iio_chan_spec const *chan, + long mask) +{ + switch (mask) { + case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: + case IIO_CHAN_INFO_HIGH_PASS_FILTER_3DB_FREQUENCY: + return IIO_VAL_INT_64; + default: + return -EINVAL; + } +} + static int admv8818_write_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int val, int val2, long info) @@ -341,6 +423,9 @@ static int admv8818_write_raw(struct iio_dev *indio_dev, u64 freq = ((u64)val2 << 32 | (u32)val); + if ((s64)freq < 0) + return -EINVAL; + switch (info) { case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: return admv8818_lpf_select(st, freq); @@ -502,6 +587,7 @@ static int admv8818_set_mode(struct iio_dev *indio_dev, static const struct iio_info admv8818_info = { .write_raw = admv8818_write_raw, + .write_raw_get_fmt = admv8818_write_raw_get_fmt, .read_raw = admv8818_read_raw, .debugfs_reg_access = &admv8818_reg_access, }; @@ -641,6 +727,32 @@ static int admv8818_clk_setup(struct admv8818_state *st) return devm_add_action_or_reset(&spi->dev, admv8818_clk_notifier_unreg, st); } +static int admv8818_read_properties(struct admv8818_state *st) +{ + struct spi_device *spi = st->spi; + u32 mhz; + int ret; + + ret = device_property_read_u32(&spi->dev, "adi,lpf-margin-mhz", &mhz); + if (ret == 0) + st->lpf_margin_hz = (u64)mhz * HZ_PER_MHZ; + else if (ret == -EINVAL) + st->lpf_margin_hz = 0; + else + return ret; + + + ret = device_property_read_u32(&spi->dev, "adi,hpf-margin-mhz", &mhz); + if (ret == 0) + st->hpf_margin_hz = (u64)mhz * HZ_PER_MHZ; + else if (ret == -EINVAL) + st->hpf_margin_hz = 0; + else if (ret < 0) + return ret; + + return 0; +} + static int admv8818_probe(struct spi_device *spi) { struct iio_dev *indio_dev; @@ -672,6 +784,10 @@ static int admv8818_probe(struct spi_device *spi) mutex_init(&st->lock); + ret = admv8818_read_properties(st); + if (ret) + return ret; + ret = admv8818_init(st); if (ret) return ret; diff --git a/drivers/iio/imu/adis16550.c b/drivers/iio/imu/adis16550.c index b14ea8937c7f5a..28f0dbd0226cbe 100644 --- a/drivers/iio/imu/adis16550.c +++ b/drivers/iio/imu/adis16550.c @@ -836,7 +836,7 @@ static irqreturn_t adis16550_trigger_handler(int irq, void *p) u16 dummy; bool valid; struct iio_poll_func *pf = p; - __be32 data[ADIS16550_MAX_SCAN_DATA]; + __be32 data[ADIS16550_MAX_SCAN_DATA] __aligned(8); struct iio_dev *indio_dev = pf->indio_dev; struct adis16550 *st = iio_priv(indio_dev); struct adis *adis = iio_device_get_drvdata(indio_dev); diff --git a/drivers/iio/imu/bmi270/bmi270_core.c b/drivers/iio/imu/bmi270/bmi270_core.c index a86be5af5ccb1f..2e4469f30d538c 100644 --- a/drivers/iio/imu/bmi270/bmi270_core.c +++ b/drivers/iio/imu/bmi270/bmi270_core.c @@ -918,8 +918,7 @@ static int bmi270_configure_imu(struct bmi270_data *data) FIELD_PREP(BMI270_ACC_CONF_ODR_MSK, BMI270_ACC_CONF_ODR_100HZ) | FIELD_PREP(BMI270_ACC_CONF_BWP_MSK, - BMI270_ACC_CONF_BWP_NORMAL_MODE) | - BMI270_PWR_CONF_ADV_PWR_SAVE_MSK); + BMI270_ACC_CONF_BWP_NORMAL_MODE)); if (ret) return dev_err_probe(dev, ret, "Failed to configure accelerometer"); @@ -927,8 +926,7 @@ static int bmi270_configure_imu(struct bmi270_data *data) FIELD_PREP(BMI270_GYR_CONF_ODR_MSK, BMI270_GYR_CONF_ODR_200HZ) | FIELD_PREP(BMI270_GYR_CONF_BWP_MSK, - BMI270_GYR_CONF_BWP_NORMAL_MODE) | - BMI270_PWR_CONF_ADV_PWR_SAVE_MSK); + BMI270_GYR_CONF_BWP_NORMAL_MODE)); if (ret) return dev_err_probe(dev, ret, "Failed to configure gyroscope"); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 3d3b27f28c9d1c..273196e647a2b5 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -50,7 +50,7 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p) u16 fifo_count; u32 fifo_period; s64 timestamp; - u8 data[INV_MPU6050_OUTPUT_DATA_SIZE]; + u8 data[INV_MPU6050_OUTPUT_DATA_SIZE] __aligned(8); size_t i, nb; mutex_lock(&st->lock); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 0a7cd8c1aa3313..8a9d2593576a2a 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -392,6 +392,9 @@ int st_lsm6dsx_read_fifo(struct st_lsm6dsx_hw *hw) if (fifo_status & cpu_to_le16(ST_LSM6DSX_FIFO_EMPTY_MASK)) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_SAMPLE_SIZE; + fifo_len = (le16_to_cpu(fifo_status) & fifo_diff_mask) * ST_LSM6DSX_CHAN_SIZE; fifo_len = (fifo_len / pattern_len) * pattern_len; @@ -623,6 +626,9 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw) if (!fifo_len) return 0; + if (!pattern_len) + pattern_len = ST_LSM6DSX_TAGGED_SAMPLE_SIZE; + for (read_len = 0; read_len < fifo_len; read_len += pattern_len) { err = st_lsm6dsx_read_block(hw, ST_LSM6DSX_REG_FIFO_OUT_TAG_ADDR, diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 4fdcc2acc94ed0..96c6106b95eef6 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -2719,8 +2719,11 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id, } if (device_property_read_bool(dev, "wakeup-source") || - (pdata && pdata->wakeup_source)) - device_init_wakeup(dev, true); + (pdata && pdata->wakeup_source)) { + err = devm_device_init_wakeup(dev); + if (err) + return dev_err_probe(dev, err, "Failed to init wakeup\n"); + } return 0; } diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 76b76d12b38822..4c65b32d34ce41 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -34,9 +34,9 @@ struct prox_state { struct iio_chan_spec channels[MAX_CHANNELS]; u32 channel2usage[MAX_CHANNELS]; u32 human_presence[MAX_CHANNELS]; - int scale_pre_decml; - int scale_post_decml; - int scale_precision; + int scale_pre_decml[MAX_CHANNELS]; + int scale_post_decml[MAX_CHANNELS]; + int scale_precision[MAX_CHANNELS]; unsigned long scan_mask[2]; /* One entry plus one terminator. */ int num_channels; }; @@ -116,13 +116,15 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: - *val = prox_state->scale_pre_decml; - *val2 = prox_state->scale_post_decml; - ret_type = prox_state->scale_precision; + if (chan->scan_index >= prox_state->num_channels) + return -EINVAL; + + *val = prox_state->scale_pre_decml[chan->scan_index]; + *val2 = prox_state->scale_post_decml[chan->scan_index]; + ret_type = prox_state->scale_precision[chan->scan_index]; break; case IIO_CHAN_INFO_OFFSET: - *val = hid_sensor_convert_exponent( - prox_state->prox_attr[chan->scan_index].unit_expo); + *val = 0; ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SAMP_FREQ: @@ -249,6 +251,10 @@ static int prox_parse_report(struct platform_device *pdev, st->prox_attr[index].size); dev_dbg(&pdev->dev, "prox %x:%x\n", st->prox_attr[index].index, st->prox_attr[index].report_id); + st->scale_precision[index] = + hid_sensor_format_scale(usage_id, &st->prox_attr[index], + &st->scale_pre_decml[index], + &st->scale_post_decml[index]); index++; } diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c index 65b295877b4158..393a3d2fbe1d73 100644 --- a/drivers/iio/light/opt3001.c +++ b/drivers/iio/light/opt3001.c @@ -788,8 +788,9 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) int ret; bool wake_result_ready_queue = false; enum iio_chan_type chan_type = opt->chip_info->chan_type; + bool ok_to_ignore_lock = opt->ok_to_ignore_lock; - if (!opt->ok_to_ignore_lock) + if (!ok_to_ignore_lock) mutex_lock(&opt->lock); ret = i2c_smbus_read_word_swapped(opt->client, OPT3001_CONFIGURATION); @@ -826,7 +827,7 @@ static irqreturn_t opt3001_irq(int irq, void *_iio) } out: - if (!opt->ok_to_ignore_lock) + if (!ok_to_ignore_lock) mutex_unlock(&opt->lock); if (wake_result_ready_queue) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 08975c60e325cb..7bc341c6969762 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -535,8 +535,8 @@ static int ak8974_detect(struct ak8974 *ak8974) fab_data2, sizeof(fab_data2)); for (i = 0; i < 3; ++i) { - static const char axis[3] = "XYZ"; - static const char pgaxis[6] = "ZYZXYX"; + static const char axis[] = "XYZ"; + static const char pgaxis[] = "ZYZXYX"; unsigned offz = le16_to_cpu(fab_data2[i]) & 0x7F; unsigned fine = le16_to_cpu(fab_data1[i]); unsigned sens = le16_to_cpu(fab_data1[i + 3]); diff --git a/drivers/iio/pressure/mprls0025pa.h b/drivers/iio/pressure/mprls0025pa.h index 9d5c30afa9d69a..d62a018eaff32b 100644 --- a/drivers/iio/pressure/mprls0025pa.h +++ b/drivers/iio/pressure/mprls0025pa.h @@ -34,16 +34,6 @@ struct iio_dev; struct mpr_data; struct mpr_ops; -/** - * struct mpr_chan - * @pres: pressure value - * @ts: timestamp - */ -struct mpr_chan { - s32 pres; - s64 ts; -}; - enum mpr_func_id { MPR_FUNCTION_A, MPR_FUNCTION_B, @@ -69,6 +59,8 @@ enum mpr_func_id { * reading in a loop until data is ready * @completion: handshake from irq to read * @chan: channel values for buffered mode + * @chan.pres: pressure value + * @chan.ts: timestamp * @buffer: raw conversion data */ struct mpr_data { @@ -87,7 +79,10 @@ struct mpr_data { struct gpio_desc *gpiod_reset; int irq; struct completion completion; - struct mpr_chan chan; + struct { + s32 pres; + aligned_s64 ts; + } chan; u8 buffer[MPR_MEASUREMENT_RD_SIZE] __aligned(IIO_DMA_MINALIGN); }; diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index c28a7a6dea5f12..555a61e2f3fdd1 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -121,9 +121,9 @@ static const struct maxim_thermocouple_chip maxim_thermocouple_chips[] = { struct maxim_thermocouple_data { struct spi_device *spi; const struct maxim_thermocouple_chip *chip; + char tc_type; u8 buffer[16] __aligned(IIO_DMA_MINALIGN); - char tc_type; }; static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 142170473e7536..e64cbd034a2a19 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -167,7 +167,7 @@ struct cm_port { struct cm_device { struct kref kref; struct list_head list; - spinlock_t mad_agent_lock; + rwlock_t mad_agent_lock; struct ib_device *ib_device; u8 ack_delay; int going_down; @@ -285,7 +285,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) if (!cm_id_priv->av.port) return ERR_PTR(-EINVAL); - spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); mad_agent = cm_id_priv->av.port->mad_agent; if (!mad_agent) { m = ERR_PTR(-EINVAL); @@ -311,7 +311,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) m->ah = ah; out: - spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return m; } @@ -1297,10 +1297,10 @@ static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) if (!cm_id_priv->av.port) return cpu_to_be64(low_tid); - spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); if (cm_id_priv->av.port->mad_agent) hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32; - spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return cpu_to_be64(hi_tid | low_tid); } @@ -3786,7 +3786,8 @@ static void cm_process_send_error(struct cm_id_private *cm_id_priv, spin_lock_irq(&cm_id_priv->lock); if (msg != cm_id_priv->msg) { spin_unlock_irq(&cm_id_priv->lock); - cm_free_priv_msg(msg); + cm_free_msg(msg); + cm_deref_id(cm_id_priv); return; } cm_free_priv_msg(msg); @@ -4378,7 +4379,7 @@ static int cm_add_one(struct ib_device *ib_device) return -ENOMEM; kref_init(&cm_dev->kref); - spin_lock_init(&cm_dev->mad_agent_lock); + rwlock_init(&cm_dev->mad_agent_lock); cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; @@ -4494,9 +4495,9 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) * The above ensures no call paths from the work are running, * the remaining paths all take the mad_agent_lock. */ - spin_lock(&cm_dev->mad_agent_lock); + write_lock(&cm_dev->mad_agent_lock); port->mad_agent = NULL; - spin_unlock(&cm_dev->mad_agent_lock); + write_unlock(&cm_dev->mad_agent_lock); ib_unregister_mad_agent(mad_agent); ib_port_unregister_client_groups(ib_device, i, cm_counter_groups); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fedcdb56fb6b7c..274cfbd5aaba76 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,6 +72,8 @@ static const char * const cma_events[] = { static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, enum ib_gid_type gid_type); +static void cma_netevent_work_handler(struct work_struct *_work); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -1047,6 +1049,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler); rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); if (parent) @@ -5241,9 +5244,9 @@ static int cma_netevent_callback(struct notifier_block *self, if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, neigh->ha, ETH_ALEN)) continue; - INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); cma_id_get(current_id); - queue_work(cma_wq, ¤t_id->id.net_work); + if (!queue_work(cma_wq, ¤t_id->id.net_work)) + cma_id_put(current_id); } out: spin_unlock_irqrestore(&id_table_lock, flags); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b4e3e4beb7f455..d4263385850a7a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1352,6 +1352,9 @@ static void ib_device_notify_register(struct ib_device *device) down_read(&devices_rwsem); + /* Mark for userspace that device is ready */ + kobject_uevent(&device->dev.kobj, KOBJ_ADD); + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); if (ret) goto out; @@ -1468,10 +1471,9 @@ int ib_register_device(struct ib_device *device, const char *name, return ret; } dev_set_uevent_suppress(&device->dev, false); - /* Mark for userspace that device is ready */ - kobject_uevent(&device->dev.kobj, KOBJ_ADD); ib_device_notify_register(device); + ib_device_put(device); return 0; diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c index 6853c6d078f91e..de5cb8bf0a6132 100644 --- a/drivers/infiniband/core/ucaps.c +++ b/drivers/infiniband/core/ucaps.c @@ -170,7 +170,7 @@ int ib_create_ucap(enum rdma_user_cap type) ucap->dev.class = &ucaps_class; ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type); ucap->dev.release = ucap_dev_release; - ret = dev_set_name(&ucap->dev, ucap_names[type]); + ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]); if (ret) goto err_device; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e9fa22d31c2332..c48ef608302055 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,12 +76,14 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, npfns = (end - start) >> PAGE_SHIFT; umem_odp->pfn_list = kvcalloc( - npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL); + npfns, sizeof(*umem_odp->pfn_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->pfn_list) return -ENOMEM; umem_odp->dma_list = kvcalloc( - ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL); + ndmas, sizeof(*umem_odp->dma_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->dma_list) { ret = -ENOMEM; goto out_pfn_list; diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c index af91d16c3c77f5..e632f1661b9295 100644 --- a/drivers/infiniband/hw/bnxt_re/debugfs.c +++ b/drivers/infiniband/hw/bnxt_re/debugfs.c @@ -170,6 +170,9 @@ static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP: *val = ccparam->tcp_cp; break; + case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP: + *val = ccparam->inact_th; + break; default: return -EINVAL; } @@ -203,7 +206,7 @@ static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer, return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc); } -static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val) +static int bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val) { u32 modify_mask; @@ -247,7 +250,9 @@ static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offs ccparam->tcp_cp = val; break; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE: + return -EOPNOTSUPP; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP: + ccparam->inact_th = val; break; case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE: ccparam->time_pph = val; @@ -258,17 +263,20 @@ static void bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offs } ccparam->mask = modify_mask; + return 0; } static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val) { struct bnxt_qplib_cc_param ccparam = { }; + int rc; - /* Supporting only Gen 0 now */ - if (gen_ext == CC_CONFIG_GEN0_EXT0) - bnxt_re_fill_gen0_ext0(&ccparam, offset, val); - else - return -EINVAL; + if (gen_ext != CC_CONFIG_GEN0_EXT0) + return -EOPNOTSUPP; + + rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val); + if (rc) + return rc; bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam); return 0; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9082b3fd2b4729..063801384b2b04 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1774,10 +1774,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; - struct bnxt_qplib_nq *nq = NULL; - if (qplib_srq->cq) - nq = qplib_srq->cq->nq; if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) { free_page((unsigned long)srq->uctx_srq_page); hash_del(&srq->hash_entry); @@ -1785,8 +1782,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); atomic_dec(&rdev->stats.res.srq_count); - if (nq) - nq->budget--; return 0; } @@ -1827,7 +1822,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; - struct bnxt_qplib_nq *nq = NULL; struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; @@ -1873,7 +1867,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; - nq = &rdev->nqr->nq[0]; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); @@ -1908,8 +1901,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto fail; } } - if (nq) - nq->budget++; active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); if (active_srqs > rdev->stats.res.srq_watermark) rdev->stats.res.srq_watermark = active_srqs; @@ -3079,7 +3070,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ib_umem_release(cq->umem); atomic_dec(&rdev->stats.res.cq_count); - nq->budget--; kfree(cq->cql); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 4fc5b9d5fea87e..307c35888b3003 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -33,7 +33,6 @@ #include #include #include -#include "hnae3.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 160e8927d364e1..59352d1b62099f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -43,7 +43,6 @@ #include #include -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_cmd.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 91a5665465ffba..bc7466830eaf9d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_HW_V2_H #include +#include "hnae3.h" #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index cf89a8db4f64cd..e7a497cc125cc3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -37,7 +37,6 @@ #include #include #include -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" @@ -763,7 +762,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 356d9881694973..f637b73b946e44 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -4,7 +4,6 @@ #include #include #include -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 1ee8969595d3d6..7599e31b574369 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -221,7 +221,7 @@ static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf) break; if (i < IRDMA_MIN_MSIX) { - for (; i > 0; i--) + while (--i >= 0) ice_free_rdma_qvector(pf, &rf->msix_entries[i]); kfree(rf->msix_entries); @@ -255,6 +255,8 @@ static void irdma_remove(struct auxiliary_device *aux_dev) ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false); irdma_deinit_interrupts(iwdev->rf, pf); + kfree(iwdev->rf); + pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index eeb932e5873036..1e8c92826de221 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -4871,5 +4871,4 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev) irdma_rt_deinit_hw(iwdev); irdma_ctrl_deinit_hw(iwdev->rf); - kfree(iwdev->rf); } diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 251246c73b339b..0ff9f18a71e828 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -3461,7 +3461,6 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); const struct uapi_definition mlx5_ib_flow_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( @@ -3472,7 +3471,6 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_STEERING_ANCHOR, UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), -#endif {}, }; diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index d3dcc272200afa..146d03ae40bd9f 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); - if (common) + if (common && !common->invalid) refcount_inc(&common->refcount); + else + common = NULL; spin_unlock_irqrestore(&table->lock, flags); @@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev, return 0; } +static void modify_resource_common_state(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *qp, + bool invalid) +{ + struct mlx5_qp_table *table = &dev->qp_table; + unsigned long flags; + + spin_lock_irqsave(&table->lock, flags); + qp->common.invalid = invalid; + spin_unlock_irqrestore(&table->lock, flags); +} + static void destroy_resource_common(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) { @@ -609,8 +623,20 @@ int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen, int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, struct mlx5_core_qp *rq) { + int ret; + + /* The rq destruction can be called again in case it fails, hence we + * mark the common resource as invalid and only once FW destruction + * is completed successfully we actually destroy the resources. + */ + modify_resource_common_state(dev, rq, true); + ret = destroy_rq_tracked(dev, rq->qpn, rq->uid); + if (ret) { + modify_resource_common_state(dev, rq, false); + return ret; + } destroy_resource_common(dev, rq); - return destroy_rq_tracked(dev, rq->qpn, rq->uid); + return 0; } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 4ddcd5860e0fa4..11eca39b73a93e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -397,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(-EFAULT); + return NULL; } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); @@ -517,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) } us_ibdev = usnic_ib_device_add(parent_pci); - if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + if (!us_ibdev) { + us_ibdev = ERR_PTR(-EFAULT); goto out; } @@ -586,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } pf = usnic_ib_discover_pf(vf->vnic); - if (IS_ERR_OR_NULL(pf)) { - usnic_err("Failed to discover pf of vnic %s with err%ld\n", - pci_name(pdev), PTR_ERR(pf)); - err = pf ? PTR_ERR(pf) : -EFAULT; + if (IS_ERR(pf)) { + err = PTR_ERR(pf); + usnic_err("Failed to discover pf of vnic %s with err%d\n", + pci_name(pdev), err); goto out_clean_vnic; } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index fec87c9030abdc..fffd144d509eb0 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); - if (err) { - vfree(cq->queue->buf); - kfree(cq->queue); + if (err) return err; - } cq->is_user = uresp; diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index feb386d98d1da6..0bc3fbb6554f4d 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -140,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } +static inline bool is_odp_mr(struct rxe_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 868d2f0b74e967..432d864c3ce9c7 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -323,7 +323,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return err; } - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return rxe_odp_mr_copy(mr, iova, addr, length, dir); else return rxe_mr_copy_xarray(mr, iova, addr, length, dir); @@ -536,7 +536,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) u64 *va; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* See IBA oA19-28 */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 7975fb0e2782f0..f2af3e0aef35b5 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -811,7 +811,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work) spin_unlock_irqrestore(&qp->state_lock, flags); qp->qp_timeout_jiffies = 0; - if (qp_type(qp) == IB_QPT_RC) { + /* In the function timer_setup, .function is initialized. If .function + * is NULL, it indicates the function timer_setup is not called, the + * timer is not initialized. Or else, the timer is initialized. + */ + if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function && + qp->rnr_nak_timer.function) { timer_delete_sync(&qp->retrans_timer); timer_delete_sync(&qp->rnr_nak_timer); } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 54ba9ee1acc598..5d9174e408db44 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -650,7 +650,7 @@ static enum resp_states process_flush(struct rxe_qp *qp, struct resp_res *res = qp->resp.res; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* oA19-14, oA19-15 */ @@ -706,7 +706,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, if (!res->replay) { u64 iova = qp->resp.va + qp->resp.offset; - if (mr->umem->is_odp) + if (is_odp_mr(mr)) err = rxe_odp_atomic_op(mr, iova, pkt->opcode, atmeth_comp(pkt), atmeth_swap_add(pkt), diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index b5cbb57ee5f600..a0f7fa1518c660 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -46,6 +46,7 @@ struct evdev_client { struct fasync_struct *fasync; struct evdev *evdev; struct list_head node; + struct rcu_head rcu; enum input_clock_type clk_type; bool revoked; unsigned long *evmasks[EV_CNT]; @@ -368,13 +369,22 @@ static void evdev_attach_client(struct evdev *evdev, spin_unlock(&evdev->client_lock); } +static void evdev_reclaim_client(struct rcu_head *rp) +{ + struct evdev_client *client = container_of(rp, struct evdev_client, rcu); + unsigned int i; + for (i = 0; i < EV_CNT; ++i) + bitmap_free(client->evmasks[i]); + kvfree(client); +} + static void evdev_detach_client(struct evdev *evdev, struct evdev_client *client) { spin_lock(&evdev->client_lock); list_del_rcu(&client->node); spin_unlock(&evdev->client_lock); - synchronize_rcu(); + call_rcu(&client->rcu, evdev_reclaim_client); } static int evdev_open_device(struct evdev *evdev) @@ -427,7 +437,6 @@ static int evdev_release(struct inode *inode, struct file *file) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - unsigned int i; mutex_lock(&evdev->mutex); @@ -439,11 +448,6 @@ static int evdev_release(struct inode *inode, struct file *file) evdev_detach_client(evdev, client); - for (i = 0; i < EV_CNT; ++i) - bitmap_free(client->evmasks[i]); - - kvfree(client); - evdev_close_device(evdev); return 0; @@ -486,7 +490,6 @@ static int evdev_open(struct inode *inode, struct file *file) err_free_client: evdev_detach_client(evdev, client); - kvfree(client); return error; } diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c index 2eaa25c9c68c25..7622638e5bb8e0 100644 --- a/drivers/input/joystick/magellan.c +++ b/drivers/input/joystick/magellan.c @@ -48,7 +48,7 @@ struct magellan { static int magellan_crunch_nibbles(unsigned char *data, int count) { - static unsigned char nibbles[16] = "0AB3D56GH9:Kdev; bool do_sync = false; @@ -1068,8 +1080,12 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char /* menu/view buttons */ input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); - if (xpad->mapping & MAP_SELECT_BUTTON) - input_report_key(dev, KEY_RECORD, data[22] & BIT(0)); + if (xpad->mapping & MAP_SHARE_BUTTON) { + if (xpad->mapping & MAP_SHARE_OFFSET) + input_report_key(dev, KEY_RECORD, data[len - 26] & BIT(0)); + else + input_report_key(dev, KEY_RECORD, data[len - 18] & BIT(0)); + } /* buttons A,B,X,Y */ input_report_key(dev, BTN_A, data[4] & BIT(4)); @@ -1217,7 +1233,7 @@ static void xpad_irq_in(struct urb *urb) xpad360w_process_packet(xpad, 0, xpad->idata); break; case XTYPE_XBOXONE: - xpadone_process_packet(xpad, 0, xpad->idata); + xpadone_process_packet(xpad, 0, xpad->idata, urb->actual_length); break; default: xpad_process_packet(xpad, 0, xpad->idata); @@ -1944,7 +1960,7 @@ static int xpad_init_input(struct usb_xpad *xpad) xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); - if (xpad->mapping & MAP_SELECT_BUTTON) + if (xpad->mapping & MAP_SHARE_BUTTON) input_set_capability(input_dev, EV_KEY, KEY_RECORD); } else { for (i = 0; xpad_btn[i] >= 0; i++) diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c index 5ad6be9141603a..061d48350df661 100644 --- a/drivers/input/keyboard/mtk-pmic-keys.c +++ b/drivers/input/keyboard/mtk-pmic-keys.c @@ -147,8 +147,8 @@ static void mtk_pmic_keys_lp_reset_setup(struct mtk_pmic_keys *keys, u32 value, mask; int error; - kregs_home = keys->keys[MTK_PMIC_HOMEKEY_INDEX].regs; - kregs_pwr = keys->keys[MTK_PMIC_PWRKEY_INDEX].regs; + kregs_home = ®s->keys_regs[MTK_PMIC_HOMEKEY_INDEX]; + kregs_pwr = ®s->keys_regs[MTK_PMIC_PWRKEY_INDEX]; error = of_property_read_u32(keys->dev->of_node, "power-off-time-sec", &long_press_debounce); diff --git a/drivers/input/misc/hisi_powerkey.c b/drivers/input/misc/hisi_powerkey.c index d3c293a95d322e..d315017324d93c 100644 --- a/drivers/input/misc/hisi_powerkey.c +++ b/drivers/input/misc/hisi_powerkey.c @@ -30,7 +30,7 @@ static irqreturn_t hi65xx_power_press_isr(int irq, void *q) { struct input_dev *input = q; - pm_wakeup_event(input->dev.parent, MAX_HELD_TIME); + pm_wakeup_dev_event(input->dev.parent, MAX_HELD_TIME, true); input_report_key(input, KEY_POWER, 1); input_sync(input); diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c index 8d7303fc13bce3..1cfadd73829f83 100644 --- a/drivers/input/misc/sparcspkr.c +++ b/drivers/input/misc/sparcspkr.c @@ -74,9 +74,14 @@ static int bbc_spkr_event(struct input_dev *dev, unsigned int type, unsigned int return -1; switch (code) { - case SND_BELL: if (value) value = 1000; - case SND_TONE: break; - default: return -1; + case SND_BELL: + if (value) + value = 1000; + break; + case SND_TONE: + break; + default: + return -1; } if (value > 20 && value < 32767) @@ -109,9 +114,14 @@ static int grover_spkr_event(struct input_dev *dev, unsigned int type, unsigned return -1; switch (code) { - case SND_BELL: if (value) value = 1000; - case SND_TONE: break; - default: return -1; + case SND_BELL: + if (value) + value = 1000; + break; + case SND_TONE: + break; + default: + return -1; } if (value > 20 && value < 32767) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 309c360aab5597..c5c88a75a019ec 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -164,6 +164,7 @@ static const char * const topbuttonpad_pnp_ids[] = { #ifdef CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ + "DLL060d", /* Dell Precision M3800 */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ "LEN0049", /* Yoga 11e */ @@ -190,11 +191,15 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN1221", /* TUXEDO InfinityBook Pro 14 v5 */ + "SYN3003", /* HP EliteBook 850 G1 */ "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "TOS01f6", /* Dynabook Portege X30L-G */ + "TOS0213", /* Dynabook Portege X30-D */ NULL }; #endif diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c index d760af4cc12efb..f1947f03b06af6 100644 --- a/drivers/input/rmi4/rmi_f34.c +++ b/drivers/input/rmi4/rmi_f34.c @@ -4,6 +4,7 @@ * Copyright (C) 2016 Zodiac Inflight Innovations */ +#include "linux/device.h" #include #include #include @@ -289,39 +290,30 @@ static int rmi_f34_update_firmware(struct f34_data *f34, return rmi_f34_flash_firmware(f34, syn_fw); } -static int rmi_f34_status(struct rmi_function *fn) -{ - struct f34_data *f34 = dev_get_drvdata(&fn->dev); - - /* - * The status is the percentage complete, or once complete, - * zero for success or a negative return code. - */ - return f34->update_status; -} - static ssize_t rmi_driver_bootloader_id_show(struct device *dev, struct device_attribute *dattr, char *buf) { struct rmi_driver_data *data = dev_get_drvdata(dev); - struct rmi_function *fn = data->f34_container; + struct rmi_function *fn; struct f34_data *f34; - if (fn) { - f34 = dev_get_drvdata(&fn->dev); - - if (f34->bl_version == 5) - return sysfs_emit(buf, "%c%c\n", - f34->bootloader_id[0], - f34->bootloader_id[1]); - else - return sysfs_emit(buf, "V%d.%d\n", - f34->bootloader_id[1], - f34->bootloader_id[0]); - } + fn = data->f34_container; + if (!fn) + return -ENODEV; - return 0; + f34 = dev_get_drvdata(&fn->dev); + if (!f34) + return -ENODEV; + + if (f34->bl_version == 5) + return sysfs_emit(buf, "%c%c\n", + f34->bootloader_id[0], + f34->bootloader_id[1]); + else + return sysfs_emit(buf, "V%d.%d\n", + f34->bootloader_id[1], + f34->bootloader_id[0]); } static DEVICE_ATTR(bootloader_id, 0444, rmi_driver_bootloader_id_show, NULL); @@ -334,13 +326,16 @@ static ssize_t rmi_driver_configuration_id_show(struct device *dev, struct rmi_function *fn = data->f34_container; struct f34_data *f34; - if (fn) { - f34 = dev_get_drvdata(&fn->dev); + fn = data->f34_container; + if (!fn) + return -ENODEV; + + f34 = dev_get_drvdata(&fn->dev); + if (!f34) + return -ENODEV; - return sysfs_emit(buf, "%s\n", f34->configuration_id); - } - return 0; + return sysfs_emit(buf, "%s\n", f34->configuration_id); } static DEVICE_ATTR(configuration_id, 0444, @@ -356,10 +351,14 @@ static int rmi_firmware_update(struct rmi_driver_data *data, if (!data->f34_container) { dev_warn(dev, "%s: No F34 present!\n", __func__); - return -EINVAL; + return -ENODEV; } f34 = dev_get_drvdata(&data->f34_container->dev); + if (!f34) { + dev_warn(dev, "%s: No valid F34 present!\n", __func__); + return -ENODEV; + } if (f34->bl_version >= 7) { if (data->pdt_props & HAS_BSR) { @@ -485,10 +484,18 @@ static ssize_t rmi_driver_update_fw_status_show(struct device *dev, char *buf) { struct rmi_driver_data *data = dev_get_drvdata(dev); - int update_status = 0; + struct f34_data *f34; + int update_status = -ENODEV; - if (data->f34_container) - update_status = rmi_f34_status(data->f34_container); + /* + * The status is the percentage complete, or once complete, + * zero for success or a negative return code. + */ + if (data->f34_container) { + f34 = dev_get_drvdata(&data->f34_container->dev); + if (f34) + update_status = f34->update_status; + } return sysfs_emit(buf, "%d\n", update_status); } @@ -508,33 +515,21 @@ static const struct attribute_group rmi_firmware_attr_group = { .attrs = rmi_firmware_attrs, }; -static int rmi_f34_probe(struct rmi_function *fn) +static int rmi_f34v5_probe(struct f34_data *f34) { - struct f34_data *f34; - unsigned char f34_queries[9]; + struct rmi_function *fn = f34->fn; + u8 f34_queries[9]; bool has_config_id; - u8 version = fn->fd.function_version; - int ret; - - f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); - if (!f34) - return -ENOMEM; - - f34->fn = fn; - dev_set_drvdata(&fn->dev, f34); - - /* v5 code only supported version 0, try V7 probe */ - if (version > 0) - return rmi_f34v7_probe(f34); + int error; f34->bl_version = 5; - ret = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, - f34_queries, sizeof(f34_queries)); - if (ret) { + error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, + f34_queries, sizeof(f34_queries)); + if (error) { dev_err(&fn->dev, "%s: Failed to query properties\n", __func__); - return ret; + return error; } snprintf(f34->bootloader_id, sizeof(f34->bootloader_id), @@ -560,11 +555,11 @@ static int rmi_f34_probe(struct rmi_function *fn) f34->v5.config_blocks); if (has_config_id) { - ret = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, - f34_queries, sizeof(f34_queries)); - if (ret) { + error = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, + f34_queries, sizeof(f34_queries)); + if (error) { dev_err(&fn->dev, "Failed to read F34 config ID\n"); - return ret; + return error; } snprintf(f34->configuration_id, sizeof(f34->configuration_id), @@ -573,12 +568,34 @@ static int rmi_f34_probe(struct rmi_function *fn) f34_queries[2], f34_queries[3]); rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Configuration ID: %s\n", - f34->configuration_id); + f34->configuration_id); } return 0; } +static int rmi_f34_probe(struct rmi_function *fn) +{ + struct f34_data *f34; + u8 version = fn->fd.function_version; + int error; + + f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); + if (!f34) + return -ENOMEM; + + f34->fn = fn; + + /* v5 code only supported version 0 */ + error = version == 0 ? rmi_f34v5_probe(f34) : rmi_f34v7_probe(f34); + if (error) + return error; + + dev_set_drvdata(&fn->dev, f34); + + return 0; +} + int rmi_f34_create_sysfs(struct rmi_device *rmi_dev) { return sysfs_create_group(&rmi_dev->dev.kobj, &rmi_firmware_attr_group); diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index eafe5a9b896484..071b7c9bf566eb 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -580,7 +580,7 @@ static int cyttsp5_power_control(struct cyttsp5 *ts, bool on) int rc; SET_CMD_REPORT_TYPE(cmd[0], 0); - SET_CMD_REPORT_ID(cmd[0], HID_POWER_SLEEP); + SET_CMD_REPORT_ID(cmd[0], state); SET_CMD_OPCODE(cmd[1], HID_CMD_SET_POWER); rc = cyttsp5_write(ts, HID_COMMAND_REG, cmd, sizeof(cmd)); @@ -870,13 +870,16 @@ static int cyttsp5_probe(struct device *dev, struct regmap *regmap, int irq, ts->input->phys = ts->phys; input_set_drvdata(ts->input, ts); - /* Reset the gpio to be in a reset state */ + /* Assert gpio to be in a reset state */ ts->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(ts->reset_gpio)) { error = PTR_ERR(ts->reset_gpio); dev_err(dev, "Failed to request reset gpio, error %d\n", error); return error; } + + fsleep(10); /* Ensure long-enough reset pulse (minimum 10us). */ + gpiod_set_value_cansleep(ts->reset_gpio, 0); /* Need a delay to have device up */ diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c index a94a1997f96b7d..af0fb38bcfdcd2 100644 --- a/drivers/input/touchscreen/stmpe-ts.c +++ b/drivers/input/touchscreen/stmpe-ts.c @@ -366,12 +366,7 @@ static struct platform_driver stmpe_ts_driver = { }; module_platform_driver(stmpe_ts_driver); -static const struct of_device_id stmpe_ts_ids[] = { - { .compatible = "st,stmpe-ts", }, - { }, -}; -MODULE_DEVICE_TABLE(of, stmpe_ts_ids); - +MODULE_ALIAS("platform:stmpe-ts"); MODULE_AUTHOR("Luotao Fu "); MODULE_DESCRIPTION("STMPEXXX touchscreen driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index cd750f512deee2..bad585b45a31df 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -199,7 +199,6 @@ source "drivers/iommu/riscv/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI - select DMAR_TABLE if INTEL_IOMMU help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index dd9e26b7b71848..14aa0d77df26df 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3664,6 +3664,14 @@ static int __init parse_ivrs_acpihid(char *str) while (*uid == '0' && *(uid + 1)) uid++; + if (strlen(hid) >= ACPIHID_HID_LEN) { + pr_err("Invalid command line: hid is too long\n"); + return 1; + } else if (strlen(uid) >= ACPIHID_UID_LEN) { + pr_err("Invalid command line: uid is too long\n"); + return 1; + } + i = early_acpihid_map_size++; memcpy(early_acpihid_map[i].hid, hid, strlen(hid)); memcpy(early_acpihid_map[i].uid, uid, strlen(uid)); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index be8761bbef0ffb..f34209b08b4c54 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3869,6 +3869,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + if (ir_data->iommu == NULL) return -EINVAL; @@ -3879,21 +3882,11 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) * we should not modify the IRTE */ if (!dev_data || !dev_data->use_vapic) - return 0; + return -EINVAL; ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; - /* Note: - * SVM tries to set up for VAPIC mode, but we are in - * legacy mode. So, we force legacy mode instead. - */ - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) { - pr_debug("%s: Fall back to using intr legacy remap\n", - __func__); - pi_data->is_guest_mode = false; - } - pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { ir_data->ga_root_ptr = (pi_data->base >> 12); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 9ba596430e7cf9..980cc6b33c430f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -411,6 +411,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, return ERR_CAST(smmu_domain); smmu_domain->domain.type = IOMMU_DOMAIN_SVA; smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + + /* + * Choose page_size as the leaf page size for invalidation when + * ARM_SMMU_FEAT_RANGE_INV is present + */ + smmu_domain->domain.pgsize_bitmap = PAGE_SIZE; smmu_domain->smmu = smmu; ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b4c21aaed1266a..be8d0f7db617d0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2953,7 +2953,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) smmu = master->smmu; if (smmu_domain->smmu != smmu) - return ret; + return -EINVAL; if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); @@ -3388,6 +3388,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, mutex_lock(&smmu->streams_mutex); for (i = 0; i < fwspec->num_ids; i++) { struct arm_smmu_stream *new_stream = &master->streams[i]; + struct rb_node *existing; u32 sid = fwspec->ids[i]; new_stream->id = sid; @@ -3398,11 +3399,21 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu, break; /* Insert into SID tree */ - if (rb_find_add(&new_stream->node, &smmu->streams, - arm_smmu_streams_cmp_node)) { - dev_warn(master->dev, "stream %u already in tree\n", - sid); - ret = -EINVAL; + existing = rb_find_add(&new_stream->node, &smmu->streams, + arm_smmu_streams_cmp_node); + if (existing) { + struct arm_smmu_master *existing_master = + rb_entry(existing, struct arm_smmu_stream, node) + ->master; + + /* Bridged PCI devices may end up with duplicated IDs */ + if (existing_master == master) + continue; + + dev_warn(master->dev, + "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n", + sid, dev_name(existing_master->dev)); + ret = -ENODEV; break; } } @@ -4429,6 +4440,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + if (FIELD_GET(IDR3_FWB, reg)) + smmu->features |= ARM_SMMU_FEAT_S2FWB; /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index d525ab43a4aebf..dd7d030d2e8909 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -487,17 +487,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) /* VCMDQ Resource Helpers */ -static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) -{ - struct arm_smmu_queue *q = &vcmdq->cmdq.q; - size_t nents = 1 << q->llq.max_n_shift; - size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; - - if (!q->base) - return; - dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); -} - static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) { struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; @@ -560,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; char header[64]; - tegra241_vcmdq_free_smmu_cmdq(vcmdq); + /* Note that the lvcmdq queue memory space is managed by devres */ + tegra241_vintf_deinit_lvcmdq(vintf, lidx); dev_dbg(vintf->cmdqv->dev, @@ -768,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); if (!vintf) - goto out_fallback; + return -ENOMEM; /* Init VINTF0 for in-kernel use */ ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); if (ret) { dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; + return ret; } /* Preallocate logical VCMDQs to VINTF0 */ @@ -783,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); if (IS_ERR(vcmdq)) - goto free_lvcmdq; + return PTR_ERR(vcmdq); } /* Now, we are ready to run all the impl ops */ smmu->impl_ops = &tegra241_cmdqv_impl_ops; return 0; - -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -out_fallback: - dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); - smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; - tegra241_cmdqv_remove(smmu); - return 0; } #ifdef CONFIG_IOMMU_DEBUGFS diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index cb7e29dcac15ac..a775e4dbe06f0d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1754,7 +1754,7 @@ static size_t cookie_msi_granule(const struct iommu_domain *domain) return PAGE_SIZE; default: BUG(); - }; + } } static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) @@ -1766,7 +1766,7 @@ static struct list_head *cookie_msi_pages(const struct iommu_domain *domain) return &domain->msi_cookie->msi_page_list; default: BUG(); - }; + } } static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 69e23e017d9e5f..317266aca6e28e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (&data->domain->domain != &exynos_identity_domain) { + if (data->domain) { dev_dbg(data->sysmmu, "saving state\n"); __sysmmu_disable(data); } @@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev) struct exynos_iommu_owner *owner = dev_iommu_priv_get(master); mutex_lock(&owner->rpm_lock); - if (&data->domain->domain != &exynos_identity_domain) { + if (data->domain) { dev_dbg(data->sysmmu, "restoring state\n"); __sysmmu_enable(data); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6e67cc66a204c6..cb0b993bebb4dd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3785,6 +3785,22 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) intel_iommu_debugfs_create_dev(info); + return &iommu->iommu; +free_table: + intel_pasid_free_table(dev); +clear_rbtree: + device_rbtree_remove(info); +free: + kfree(info); + + return ERR_PTR(ret); +} + +static void intel_iommu_probe_finalize(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + /* * The PCIe spec, in its wisdom, declares that the behaviour of the * device is undefined if you enable PASID support after ATS support. @@ -3792,22 +3808,12 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) * we can't yet know if we're ever going to use it. */ if (info->pasid_supported && - !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) iommu_enable_pci_ats(info); iommu_enable_pci_pri(info); - - return &iommu->iommu; -free_table: - intel_pasid_free_table(dev); -clear_rbtree: - device_rbtree_remove(info); -free: - kfree(info); - - return ERR_PTR(ret); } static void intel_iommu_release_device(struct device *dev) @@ -3835,7 +3841,6 @@ static void intel_iommu_release_device(struct device *dev) intel_pasid_free_table(dev); intel_iommu_debugfs_remove_dev(info); kfree(info); - set_dma_ops(dev, NULL); } static void intel_iommu_get_resv_regions(struct device *device, @@ -4392,6 +4397,7 @@ const struct iommu_ops intel_iommu_ops = { .domain_alloc_sva = intel_svm_domain_alloc, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, + .probe_finalize = intel_iommu_probe_finalize, .release_device = intel_iommu_release_device, .get_resv_regions = intel_iommu_get_resv_regions, .device_group = intel_iommu_device_group, @@ -4433,6 +4439,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx); +/* QM57/QS57 integrated gfx malfunctions with dmar */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx); + /* Broadwell igfx malfunctions with dmar */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx); @@ -4510,7 +4519,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index ea3ca520391962..3bc2a03ccecaae 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1287,43 +1287,44 @@ static struct irq_chip intel_ir_chip = { }; /* - * With posted MSIs, all vectors are multiplexed into a single notification - * vector. Devices MSIs are then dispatched in a demux loop where - * EOIs can be coalesced as well. + * With posted MSIs, the MSI vectors are multiplexed into a single notification + * vector, and only the notification vector is sent to the APIC IRR. Device + * MSIs are then dispatched in a demux loop that harvests the MSIs from the + * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to + * the APIC IRR, and thus do not need an EOI. The notification handler instead + * performs a single EOI after processing the PIR. * - * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack() - * function. Instead EOI is performed by the posted interrupt notification - * handler. + * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be + * honored, only the APIC EOI is omitted. * * For the example below, 3 MSIs are coalesced into one CPU notification. Only - * one apic_eoi() is needed. + * one apic_eoi() is needed, but each MSI needs to process pending changes to + * its CPU affinity. * * __sysvec_posted_msi_notification() * irq_enter(); * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * apic_eoi() * irq_exit() + * */ - -static void dummy_ack(struct irq_data *d) { } - static struct irq_chip intel_ir_chip_post_msi = { .name = "INTEL-IR-POST", - .irq_ack = dummy_ack, + .irq_ack = irq_move_irq, .irq_set_affinity = intel_ir_set_affinity, .irq_compose_msi_msg = intel_ir_compose_msi_msg, .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7632c80edea63a..396c4f6f5a5bd9 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1433,15 +1434,17 @@ static int __init arm_lpae_do_selftests(void) }; int i, j, k, pass = 0, fail = 0; - struct device dev; + struct faux_device *dev; struct io_pgtable_cfg cfg = { .tlb = &dummy_tlb_ops, .coherent_walk = true, - .iommu_dev = &dev, }; - /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */ - set_dev_node(&dev, NUMA_NO_NODE); + dev = faux_device_create("io-pgtable-test", NULL, 0); + if (!dev) + return -ENOMEM; + + cfg.iommu_dev = &dev->dev; for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { for (j = 0; j < ARRAY_SIZE(address_size); ++j) { @@ -1461,6 +1464,8 @@ static int __init arm_lpae_do_selftests(void) } pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); + faux_device_destroy(dev); + return fail ? -EFAULT : 0; } subsys_initcall(arm_lpae_do_selftests); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c8033ca6637771..e4628d96216102 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -277,6 +277,8 @@ int iommu_device_register(struct iommu_device *iommu, err = bus_iommu_probe(iommu_buses[i]); if (err) iommu_device_unregister(iommu); + else + WRITE_ONCE(iommu->ready, true); return err; } EXPORT_SYMBOL_GPL(iommu_device_register); @@ -422,13 +424,15 @@ static int iommu_init_device(struct device *dev) * is buried in the bus dma_configure path. Properly unpicking that is * still a big job, so for now just invoke the whole thing. The device * already having a driver bound means dma_configure has already run and - * either found no IOMMU to wait for, or we're in its replay call right - * now, so either way there's no point calling it again. + * found no IOMMU to wait for, so there's no point calling it again. */ - if (!dev->driver && dev->bus->dma_configure) { + if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) { mutex_unlock(&iommu_probe_device_lock); dev->bus->dma_configure(dev); mutex_lock(&iommu_probe_device_lock); + /* If another instance finished the job for us, skip it */ + if (!dev->iommu || dev->iommu_group) + return -ENODEV; } /* * At this point, relevant devices either now have a fwspec which will @@ -538,6 +542,9 @@ static void iommu_deinit_device(struct device *dev) dev->iommu_group = NULL; module_put(ops->owner); dev_iommu_free(dev); +#ifdef CONFIG_IOMMU_DMA + dev->dma_iommu = false; +#endif } static struct iommu_domain *pasid_array_entry_to_domain(void *entry) @@ -2392,6 +2399,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, unsigned int pgsize_idx, pgsize_idx_next; unsigned long pgsizes; size_t offset, pgsize, pgsize_next; + size_t offset_end; unsigned long addr_merge = paddr | iova; /* Page sizes supported by the hardware and small enough for @size */ @@ -2432,7 +2440,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova, * If size is big enough to accommodate the larger page, reduce * the number of smaller pages. */ - if (offset + pgsize_next <= size) + if (!check_add_overflow(offset, pgsize_next, &offset_end) && + offset_end <= size) size = offset; out_set_count: @@ -2717,7 +2726,8 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev, * if upper layers showed interest and installed a fault handler, * invoke it. */ - if (domain->handler) + if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER && + domain->handler) ret = domain->handler(domain, dev, iova, flags, domain->handler_token); @@ -2826,31 +2836,39 @@ bool iommu_default_passthrough(void) } EXPORT_SYMBOL_GPL(iommu_default_passthrough); -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) +static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode) { - const struct iommu_ops *ops = NULL; - struct iommu_device *iommu; + const struct iommu_device *iommu, *ret = NULL; spin_lock(&iommu_device_lock); list_for_each_entry(iommu, &iommu_device_list, list) if (iommu->fwnode == fwnode) { - ops = iommu->ops; + ret = iommu; break; } spin_unlock(&iommu_device_lock); - return ops; + return ret; +} + +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) +{ + const struct iommu_device *iommu = iommu_from_fwnode(fwnode); + + return iommu ? iommu->ops : NULL; } int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) { - const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode); + const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - if (!ops) + if (!iommu) return driver_deferred_probe_check_state(dev); + if (!dev->iommu && !READ_ONCE(iommu->ready)) + return -EPROBE_DEFER; if (fwspec) - return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; + return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; if (!dev_iommu_get(dev)) return -ENOMEM; @@ -3362,10 +3380,12 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, int ret; for_each_group_device(group, device) { - ret = domain->ops->set_dev_pasid(domain, device->dev, - pasid, old); - if (ret) - goto err_revert; + if (device->dev->iommu->max_pasids > 0) { + ret = domain->ops->set_dev_pasid(domain, device->dev, + pasid, old); + if (ret) + goto err_revert; + } } return 0; @@ -3375,15 +3395,18 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain, for_each_group_device(group, device) { if (device == last_gdev) break; - /* - * If no old domain, undo the succeeded devices/pasid. - * Otherwise, rollback the succeeded devices/pasid to the old - * domain. And it is a driver bug to fail attaching with a - * previously good domain. - */ - if (!old || WARN_ON(old->ops->set_dev_pasid(old, device->dev, + if (device->dev->iommu->max_pasids > 0) { + /* + * If no old domain, undo the succeeded devices/pasid. + * Otherwise, rollback the succeeded devices/pasid to + * the old domain. And it is a driver bug to fail + * attaching with a previously good domain. + */ + if (!old || + WARN_ON(old->ops->set_dev_pasid(old, device->dev, pasid, domain))) - iommu_remove_dev_pasid(device->dev, pasid, domain); + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } return ret; } @@ -3394,8 +3417,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, { struct group_device *device; - for_each_group_device(group, device) - iommu_remove_dev_pasid(device->dev, pasid, domain); + for_each_group_device(group, device) { + if (device->dev->iommu->max_pasids > 0) + iommu_remove_dev_pasid(device->dev, pasid, domain); + } } /* @@ -3436,7 +3461,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, mutex_lock(&group->mutex); for_each_group_device(group, device) { - if (pasid >= device->dev->iommu->max_pasids) { + /* + * Skip PASID validation for devices without PASID support + * (max_pasids = 0). These devices cannot issue transactions + * with PASID, so they don't affect group's PASID usage. + */ + if ((device->dev->iommu->max_pasids > 0) && + (pasid >= device->dev->iommu->max_pasids)) { ret = -EINVAL; goto out_unlock; } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 074daf1aac4e4c..90341b24a81155 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1081,31 +1081,25 @@ static int ipmmu_probe(struct platform_device *pdev) } } + platform_set_drvdata(pdev, mmu); /* * Register the IPMMU to the IOMMU subsystem in the following cases: * - R-Car Gen2 IPMMU (all devices registered) * - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device) */ - if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) { - ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, - dev_name(&pdev->dev)); - if (ret) - return ret; - - ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev); - if (ret) - return ret; - } + if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu)) + return 0; - /* - * We can't create the ARM mapping here as it requires the bus to have - * an IOMMU, which only happens when bus_set_iommu() is called in - * ipmmu_init() after the probe function returns. - */ + ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, "%s", + dev_name(&pdev->dev)); + if (ret) + return ret; - platform_set_drvdata(pdev, mmu); + ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev); + if (ret) + iommu_device_sysfs_remove(&mmu->iommu); - return 0; + return ret; } static void ipmmu_remove(struct platform_device *pdev) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 034b0e670384a2..df98d0c65f5469 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -1372,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); mutex_init(&data->mutex); - ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, - "mtk-iommu.%pa", &ioaddr); - if (ret) - goto out_link_remove; - - ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); - if (ret) - goto out_sysfs_remove; - if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) { list_add_tail(&data->list, data->plat_data->hw_list); data->hw_list = data->plat_data->hw_list; @@ -1390,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev) data->hw_list = &data->hw_list_head; } + ret = iommu_device_sysfs_add(&data->iommu, dev, NULL, + "mtk-iommu.%pa", &ioaddr); + if (ret) + goto out_list_del; + + ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev); + if (ret) + goto out_sysfs_remove; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match); if (ret) - goto out_list_del; + goto out_device_unregister; } return ret; -out_list_del: - list_del(&data->list); +out_device_unregister: iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); -out_link_remove: +out_list_del: + list_del(&data->list); if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) device_link_remove(data->smicomm_dev, dev); out_runtime_disable: diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index cec05e443083b8..08bb3b031f2309 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -114,8 +114,8 @@ config I8259 config BCM2712_MIP tristate "Broadcom BCM2712 MSI-X Interrupt Peripheral support" - depends on ARCH_BRCMSTB || COMPILE_TEST - default m if ARCH_BRCMSTB + depends on ARCH_BRCMSTB || ARCH_BCM2835 || COMPILE_TEST + default m if ARCH_BRCMSTB || ARCH_BCM2835 depends on ARM_GIC select GENERIC_IRQ_CHIP select IRQ_DOMAIN_HIERARCHY diff --git a/drivers/irqchip/irq-bcm2712-mip.c b/drivers/irqchip/irq-bcm2712-mip.c index 49a19db2d1e1b3..4cce24233f0f34 100644 --- a/drivers/irqchip/irq-bcm2712-mip.c +++ b/drivers/irqchip/irq-bcm2712-mip.c @@ -163,6 +163,7 @@ static const struct irq_domain_ops mip_middle_domain_ops = { static const struct msi_parent_ops mip_msi_parent_ops = { .supported_flags = MIP_MSI_FLAGS_SUPPORTED, .required_flags = MIP_MSI_FLAGS_REQUIRED, + .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PCI_MSI, .prefix = "MIP-MSI-", diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c6989486186667..cc6a6c1585d20b 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -252,7 +252,7 @@ static void __init gicv2m_teardown(void) static struct msi_parent_ops gicv2m_msi_parent_ops = { .supported_flags = GICV2M_MSI_FLAGS_SUPPORTED, .required_flags = GICV2M_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "GICv2m-", @@ -421,7 +421,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; diff --git a/drivers/irqchip/irq-gic-v3-its-msi-parent.c b/drivers/irqchip/irq-gic-v3-its-msi-parent.c index bdb04c8081480d..c5a7eb1c041959 100644 --- a/drivers/irqchip/irq-gic-v3-its-msi-parent.c +++ b/drivers/irqchip/irq-gic-v3-its-msi-parent.c @@ -203,7 +203,7 @@ static bool its_init_dev_msi_info(struct device *dev, struct irq_domain *domain, const struct msi_parent_ops gic_v3_its_msi_parent_ops = { .supported_flags = ITS_MSI_FLAGS_SUPPORTED, .required_flags = ITS_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "ITS-", diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index 34e9ca77a8c368..647b18e24e0c2d 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -197,7 +197,7 @@ static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain, static const struct msi_parent_ops gic_v3_mbi_msi_parent_ops = { .supported_flags = MBI_MSI_FLAGS_SUPPORTED, .required_flags = MBI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_NEXUS, .bus_select_mask = MATCH_PCI_MSI | MATCH_PLATFORM_MSI, .prefix = "MBI-", diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c index d67f93f6d75056..60b976286636f2 100644 --- a/drivers/irqchip/irq-mvebu-gicp.c +++ b/drivers/irqchip/irq-mvebu-gicp.c @@ -161,7 +161,7 @@ static const struct irq_domain_ops gicp_domain_ops = { static const struct msi_parent_ops gicp_msi_parent_ops = { .supported_flags = GICP_MSI_FLAGS_SUPPORTED, .required_flags = GICP_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "GICP-", diff --git a/drivers/irqchip/irq-mvebu-odmi.c b/drivers/irqchip/irq-mvebu-odmi.c index 28f7e81df94f0a..54f6f081157339 100644 --- a/drivers/irqchip/irq-mvebu-odmi.c +++ b/drivers/irqchip/irq-mvebu-odmi.c @@ -157,7 +157,7 @@ static const struct irq_domain_ops odmi_domain_ops = { static const struct msi_parent_ops odmi_msi_parent_ops = { .supported_flags = ODMI_MSI_FLAGS_SUPPORTED, .required_flags = ODMI_MSI_FLAGS_REQUIRED, - .chip_flags = MSI_CHIP_FLAG_SET_EOI | MSI_CHIP_FLAG_SET_ACK, + .chip_flags = MSI_CHIP_FLAG_SET_EOI, .bus_select_token = DOMAIN_BUS_GENERIC_MSI, .bus_select_mask = MATCH_PLATFORM_MSI, .prefix = "ODMI-", diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index 7942d8eb3d00ea..f772deb9cba574 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -227,6 +227,9 @@ static int qcom_mpm_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + if (pin == GPIO_NO_WAKE_IRQ) + return irq_domain_disconnect_hierarchy(domain, virq); + ret = irq_domain_set_hwirq_and_chip(domain, virq, pin, &qcom_mpm_chip, priv); if (ret) diff --git a/drivers/irqchip/irq-renesas-rzv2h.c b/drivers/irqchip/irq-renesas-rzv2h.c index 3d5b5fdf9bde82..0f0fd7d4dfdf2c 100644 --- a/drivers/irqchip/irq-renesas-rzv2h.c +++ b/drivers/irqchip/irq-renesas-rzv2h.c @@ -170,6 +170,14 @@ static void rzv2h_tint_irq_endisable(struct irq_data *d, bool enable) else tssr &= ~ICU_TSSR_TIEN(tssel_n, priv->info->field_width); writel_relaxed(tssr, priv->base + priv->info->t_offs + ICU_TSSR(k)); + + /* + * A glitch in the edge detection circuit can cause a spurious + * interrupt. Clear the status flag after setting the ICU_TSSRk + * registers, which is recommended by the hardware manual as a + * countermeasure. + */ + writel_relaxed(BIT(tint_nr), priv->base + priv->info->t_offs + ICU_TSCLR); } static void rzv2h_icu_irq_disable(struct irq_data *d) diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index bdf5cd2037f289..62f76950a113b5 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -208,17 +208,17 @@ static bool __imsic_local_sync(struct imsic_local_priv *lpriv) } #ifdef CONFIG_SMP -static void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { lockdep_assert_held(&lpriv->lock); if (!timer_pending(&lpriv->timer)) { lpriv->timer.expires = jiffies + 1; - add_timer_on(&lpriv->timer, smp_processor_id()); + add_timer_on(&lpriv->timer, cpu); } } #else -static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv) +static inline void __imsic_local_timer_start(struct imsic_local_priv *lpriv, unsigned int cpu) { } #endif @@ -233,7 +233,7 @@ void imsic_local_sync_all(bool force_all) if (force_all) bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1); if (!__imsic_local_sync(lpriv)) - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, smp_processor_id()); raw_spin_unlock_irqrestore(&lpriv->lock, flags); } @@ -278,7 +278,7 @@ static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu return; } - __imsic_local_timer_start(lpriv); + __imsic_local_timer_start(lpriv, cpu); } } #else diff --git a/drivers/irqchip/irq-sg2042-msi.c b/drivers/irqchip/irq-sg2042-msi.c index ee682e87eb8be2..375b55aa0acd8d 100644 --- a/drivers/irqchip/irq-sg2042-msi.c +++ b/drivers/irqchip/irq-sg2042-msi.c @@ -151,6 +151,7 @@ static const struct irq_domain_ops sg2042_msi_middle_domain_ops = { static const struct msi_parent_ops sg2042_msi_parent_ops = { .required_flags = SG2042_MSI_FLAGS_REQUIRED, .supported_flags = SG2042_MSI_FLAGS_SUPPORTED, + .chip_flags = MSI_CHIP_FLAG_SET_ACK, .bus_select_mask = MATCH_PCI_MSI, .bus_select_token = DOMAIN_BUS_NEXUS, .prefix = "SG2042-", diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index ed52db272f4d05..e8445cda7c6182 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -191,8 +191,8 @@ config POLARFIRE_SOC_MAILBOX config MCHP_SBI_IPC_MBOX tristate "Microchip Inter-processor Communication (IPC) SBI driver" - depends on RISCV_SBI || COMPILE_TEST - depends on ARCH_MICROCHIP + depends on RISCV_SBI + depends on ARCH_MICROCHIP || COMPILE_TEST help Mailbox implementation for Microchip devices with an Inter-process communication (IPC) controller. diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c index 6ef8338add0d61..6778afc64a048c 100644 --- a/drivers/mailbox/imx-mailbox.c +++ b/drivers/mailbox/imx-mailbox.c @@ -226,7 +226,7 @@ static int imx_mu_generic_tx(struct imx_mu_priv *priv, { u32 *arg = data; u32 val; - int ret; + int ret, count; switch (cp->type) { case IMX_MU_TYPE_TX: @@ -240,11 +240,20 @@ static int imx_mu_generic_tx(struct imx_mu_priv *priv, case IMX_MU_TYPE_TXDB_V2: imx_mu_write(priv, IMX_MU_xCR_GIRn(priv->dcfg->type, cp->idx), priv->dcfg->xCR[IMX_MU_GCR]); - ret = readl_poll_timeout(priv->base + priv->dcfg->xCR[IMX_MU_GCR], val, - !(val & IMX_MU_xCR_GIRn(priv->dcfg->type, cp->idx)), - 0, 1000); - if (ret) - dev_warn_ratelimited(priv->dev, "channel type: %d failure\n", cp->type); + ret = -ETIMEDOUT; + count = 0; + while (ret && (count < 10)) { + ret = + readl_poll_timeout(priv->base + priv->dcfg->xCR[IMX_MU_GCR], val, + !(val & IMX_MU_xCR_GIRn(priv->dcfg->type, cp->idx)), + 0, 10000); + + if (ret) { + dev_warn_ratelimited(priv->dev, + "channel type: %d timeout, %d times, retry\n", + cp->type, ++count); + } + } break; default: dev_warn_ratelimited(priv->dev, "Send data on wrong channel type: %d\n", cp->type); diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c index d186865b8dce64..ab4e8d1954a16e 100644 --- a/drivers/mailbox/mtk-cmdq-mailbox.c +++ b/drivers/mailbox/mtk-cmdq-mailbox.c @@ -92,18 +92,6 @@ struct gce_plat { u32 gce_num; }; -static void cmdq_sw_ddr_enable(struct cmdq *cmdq, bool enable) -{ - WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); - - if (enable) - writel(GCE_DDR_EN | GCE_CTRL_BY_SW, cmdq->base + GCE_GCTL_VALUE); - else - writel(GCE_CTRL_BY_SW, cmdq->base + GCE_GCTL_VALUE); - - clk_bulk_disable(cmdq->pdata->gce_num, cmdq->clocks); -} - u8 cmdq_get_shift_pa(struct mbox_chan *chan) { struct cmdq *cmdq = container_of(chan->mbox, struct cmdq, mbox); @@ -112,6 +100,19 @@ u8 cmdq_get_shift_pa(struct mbox_chan *chan) } EXPORT_SYMBOL(cmdq_get_shift_pa); +static void cmdq_gctl_value_toggle(struct cmdq *cmdq, bool ddr_enable) +{ + u32 val = cmdq->pdata->control_by_sw ? GCE_CTRL_BY_SW : 0; + + if (!cmdq->pdata->control_by_sw && !cmdq->pdata->sw_ddr_en) + return; + + if (cmdq->pdata->sw_ddr_en && ddr_enable) + val |= GCE_DDR_EN; + + writel(val, cmdq->base + GCE_GCTL_VALUE); +} + static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread) { u32 status; @@ -140,16 +141,10 @@ static void cmdq_thread_resume(struct cmdq_thread *thread) static void cmdq_init(struct cmdq *cmdq) { int i; - u32 gctl_regval = 0; WARN_ON(clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks)); - if (cmdq->pdata->control_by_sw) - gctl_regval = GCE_CTRL_BY_SW; - if (cmdq->pdata->sw_ddr_en) - gctl_regval |= GCE_DDR_EN; - if (gctl_regval) - writel(gctl_regval, cmdq->base + GCE_GCTL_VALUE); + cmdq_gctl_value_toggle(cmdq, true); writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES); for (i = 0; i <= CMDQ_MAX_EVENT; i++) @@ -315,14 +310,21 @@ static irqreturn_t cmdq_irq_handler(int irq, void *dev) static int cmdq_runtime_resume(struct device *dev) { struct cmdq *cmdq = dev_get_drvdata(dev); + int ret; - return clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks); + ret = clk_bulk_enable(cmdq->pdata->gce_num, cmdq->clocks); + if (ret) + return ret; + + cmdq_gctl_value_toggle(cmdq, true); + return 0; } static int cmdq_runtime_suspend(struct device *dev) { struct cmdq *cmdq = dev_get_drvdata(dev); + cmdq_gctl_value_toggle(cmdq, false); clk_bulk_disable(cmdq->pdata->gce_num, cmdq->clocks); return 0; } @@ -347,9 +349,6 @@ static int cmdq_suspend(struct device *dev) if (task_running) dev_warn(dev, "exist running task(s) in suspend\n"); - if (cmdq->pdata->sw_ddr_en) - cmdq_sw_ddr_enable(cmdq, false); - return pm_runtime_force_suspend(dev); } @@ -360,9 +359,6 @@ static int cmdq_resume(struct device *dev) WARN_ON(pm_runtime_force_resume(dev)); cmdq->suspended = false; - if (cmdq->pdata->sw_ddr_en) - cmdq_sw_ddr_enable(cmdq, true); - return 0; } @@ -370,9 +366,6 @@ static void cmdq_remove(struct platform_device *pdev) { struct cmdq *cmdq = platform_get_drvdata(pdev); - if (cmdq->pdata->sw_ddr_en) - cmdq_sw_ddr_enable(cmdq, false); - if (!IS_ENABLED(CONFIG_PM)) cmdq_runtime_suspend(&pdev->dev); diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 02a680c73979b9..bf0d7d58c8b014 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -96,7 +96,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, ret = mcb_device_register(bus, mdev); if (ret < 0) - goto err; + return ret; return 0; diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 06f809e70f1535..ddb37f6670de88 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -139,7 +139,7 @@ config MD_RAID456 tristate "RAID-4/RAID-5/RAID-6 mode" depends on BLK_DEV_MD select RAID6_PQ - select LIBCRC32C + select CRC32 select ASYNC_MEMCPY select ASYNC_XOR select ASYNC_PQ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e42f1400cea9d7..813b38aec3e4e0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -546,7 +546,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) static struct uuid_entry *uuid_find_empty(struct cache_set *c) { - static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + static const char zero_uuid[16] = { 0 }; return uuid_find(c, zero_uuid); } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c8ed65cd87e63..f0b5a6931161a0 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -68,6 +68,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -2424,7 +2426,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 3637761f35853c..f3a3f2ef632261 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -141,6 +141,7 @@ struct mapped_device { #ifdef CONFIG_BLK_DEV_ZONED unsigned int nr_zones; void *zone_revalidate_map; + struct task_struct *revalidate_map_task; #endif #ifdef CONFIG_IMA diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9dfdb63220d746..91aeee72a15e92 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3284,6 +3284,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } +#ifdef CONFIG_CACHY + set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags); + set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags); +#endif + ret = crypt_ctr_cipher(ti, argv[0], argv[1]); if (ret < 0) goto bad; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b690905ab89ffb..347881f323d5bc 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -47,14 +47,15 @@ enum feature_flag_bits { }; struct per_bio_data { - bool bio_submitted; + bool bio_can_corrupt; + struct bvec_iter saved_iter; }; static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, struct dm_target *ti) { - int r; - unsigned int argc; + int r = 0; + unsigned int argc = 0; const char *arg_name; static const struct dm_arg _args[] = { @@ -65,14 +66,13 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, {0, PROBABILITY_BASE, "Invalid random corrupt argument"}, }; - /* No feature arguments supplied. */ - if (!as->argc) - return 0; - - r = dm_read_arg_group(_args, as, &argc, &ti->error); - if (r) + if (as->argc && (r = dm_read_arg_group(_args, as, &argc, &ti->error))) return r; + /* No feature arguments supplied. */ + if (!argc) + goto error_all_io; + while (argc) { arg_name = dm_shift_arg(as); argc--; @@ -217,6 +217,7 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, if (!fc->corrupt_bio_byte && !test_bit(ERROR_READS, &fc->flags) && !test_bit(DROP_WRITES, &fc->flags) && !test_bit(ERROR_WRITES, &fc->flags) && !fc->random_read_corrupt && !fc->random_write_corrupt) { +error_all_io: set_bit(ERROR_WRITES, &fc->flags); set_bit(ERROR_READS, &fc->flags); } @@ -339,7 +340,8 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) } static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte, - unsigned char corrupt_bio_value) + unsigned char corrupt_bio_value, + struct bvec_iter start) { struct bvec_iter iter; struct bio_vec bvec; @@ -348,7 +350,7 @@ static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte, * Overwrite the Nth byte of the bio's data, on whichever page * it falls. */ - bio_for_each_segment(bvec, bio, iter) { + __bio_for_each_segment(bvec, bio, iter, start) { if (bio_iter_len(bio, iter) > corrupt_bio_byte) { unsigned char *segment = bvec_kmap_local(&bvec); segment[corrupt_bio_byte] = corrupt_bio_value; @@ -357,36 +359,31 @@ static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte, "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", bio, corrupt_bio_value, corrupt_bio_byte, (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, - (unsigned long long)bio->bi_iter.bi_sector, - bio->bi_iter.bi_size); + (unsigned long long)start.bi_sector, + start.bi_size); break; } corrupt_bio_byte -= bio_iter_len(bio, iter); } } -static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) +static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc, + struct bvec_iter start) { unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1; - if (!bio_has_data(bio)) - return; - - corrupt_bio_common(bio, corrupt_bio_byte, fc->corrupt_bio_value); + corrupt_bio_common(bio, corrupt_bio_byte, fc->corrupt_bio_value, start); } -static void corrupt_bio_random(struct bio *bio) +static void corrupt_bio_random(struct bio *bio, struct bvec_iter start) { unsigned int corrupt_byte; unsigned char corrupt_value; - if (!bio_has_data(bio)) - return; - - corrupt_byte = get_random_u32() % bio->bi_iter.bi_size; + corrupt_byte = get_random_u32() % start.bi_size; corrupt_value = get_random_u8(); - corrupt_bio_common(bio, corrupt_byte, corrupt_value); + corrupt_bio_common(bio, corrupt_byte, corrupt_value, start); } static void clone_free(struct bio *clone) @@ -481,7 +478,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) unsigned int elapsed; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); - pb->bio_submitted = false; + pb->bio_can_corrupt = false; if (op_is_zone_mgmt(bio_op(bio))) goto map_bio; @@ -490,10 +487,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) elapsed = (jiffies - fc->start_time) / HZ; if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { bool corrupt_fixed, corrupt_random; - /* - * Flag this bio as submitted while down. - */ - pb->bio_submitted = true; + + if (bio_has_data(bio)) { + pb->bio_can_corrupt = true; + pb->saved_iter = bio->bi_iter; + } /* * Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set. @@ -516,6 +514,8 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } + if (!pb->bio_can_corrupt) + goto map_bio; /* * Corrupt matching writes. */ @@ -535,9 +535,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) struct bio *clone = clone_bio(ti, fc, bio); if (clone) { if (corrupt_fixed) - corrupt_bio_data(clone, fc); + corrupt_bio_data(clone, fc, + clone->bi_iter); if (corrupt_random) - corrupt_bio_random(clone); + corrupt_bio_random(clone, + clone->bi_iter); submit_bio(clone); return DM_MAPIO_SUBMITTED; } @@ -559,21 +561,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, if (op_is_zone_mgmt(bio_op(bio))) return DM_ENDIO_DONE; - if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { + if (!*error && pb->bio_can_corrupt && (bio_data_dir(bio) == READ)) { if (fc->corrupt_bio_byte) { if ((fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) { /* * Corrupt successful matching READs while in down state. */ - corrupt_bio_data(bio, fc); + corrupt_bio_data(bio, fc, pb->saved_iter); } } if (fc->random_read_corrupt) { u64 rnd = get_random_u64(); u32 rem = do_div(rnd, PROBABILITY_BASE); if (rem < fc->random_read_corrupt) - corrupt_bio_random(bio); + corrupt_bio_random(bio, pb->saved_iter); } if (test_bit(ERROR_READS, &fc->flags)) { /* diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2a283feb3319ca..cc3d3897ef428f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -5164,7 +5164,7 @@ static void dm_integrity_dtr(struct dm_target *ti) BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); BUG_ON(!list_empty(&ic->wait_list)); - if (ic->mode == 'B') + if (ic->mode == 'B' && ic->bitmap_flush_work.work.func) cancel_delayed_work_sync(&ic->bitmap_flush_work); if (ic->metadata_wq) destroy_workqueue(ic->metadata_wq); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 35100a435c88ba..e009bba52d4c0c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -523,8 +523,9 @@ static char **realloc_argv(unsigned int *size, char **old_argv) gfp = GFP_NOIO; } argv = kmalloc_array(new_size, sizeof(*argv), gfp); - if (argv && old_argv) { - memcpy(argv, old_argv, *size * sizeof(*argv)); + if (argv) { + if (old_argv) + memcpy(argv, old_argv, *size * sizeof(*argv)); *size = new_size; } @@ -1049,7 +1050,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * unsigned int min_pool_size = 0, pool_size; struct dm_md_mempools *pools; unsigned int bioset_flags = 0; - bool mempool_needs_integrity = t->integrity_supported; if (unlikely(type == DM_TYPE_NONE)) { DMERR("no table type is set, can't allocate mempools"); @@ -1074,8 +1074,6 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * per_io_data_size = max(per_io_data_size, ti->per_io_data_size); min_pool_size = max(min_pool_size, ti->num_flush_bios); - - mempool_needs_integrity |= ti->mempool_needs_integrity; } pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, @@ -1175,7 +1173,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, t = dm_get_live_table(md, &srcu_idx); if (!t) - return 0; + goto put_live_table; for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1186,6 +1184,7 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile, (void *)key); } +put_live_table: dm_put_live_table(md, srcu_idx); return 0; } @@ -1491,6 +1490,18 @@ bool dm_table_has_no_data_devices(struct dm_table *t) return true; } +bool dm_table_is_wildcard(struct dm_table *t) +{ + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (!dm_target_is_wildcard(ti->type)) + return false; + } + + return true; +} + static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1831,10 +1842,24 @@ static bool dm_table_supports_atomic_writes(struct dm_table *t) return true; } +bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size, + sector_t new_size) +{ + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && dm_has_zone_plugs(t->md) && + old_size != new_size) { + DMWARN("%s: device has zone write plug resources. " + "Cannot change size", + dm_device_name(t->md)); + return false; + } + return true; +} + int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { int r; + struct queue_limits old_limits; if (!dm_table_supports_nowait(t)) limits->features &= ~BLK_FEAT_NOWAIT; @@ -1861,28 +1886,30 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_supports_flush(t)) limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; - if (dm_table_supports_dax(t, device_not_dax_capable)) { + if (dm_table_supports_dax(t, device_not_dax_capable)) limits->features |= BLK_FEAT_DAX; - if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) - set_dax_synchronous(t->md->dax_dev); - } else + else limits->features &= ~BLK_FEAT_DAX; - if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) - dax_write_cache(t->md->dax_dev, true); - /* For a zoned table, setup the zone related queue attributes. */ - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - (limits->features & BLK_FEAT_ZONED)) { - r = dm_set_zones_restrictions(t, q, limits); - if (r) - return r; + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { + if (limits->features & BLK_FEAT_ZONED) { + r = dm_set_zones_restrictions(t, q, limits); + if (r) + return r; + } else if (dm_has_zone_plugs(t->md)) { + DMWARN("%s: device has zone write plug resources. " + "Cannot switch to non-zoned table.", + dm_device_name(t->md)); + return -EINVAL; + } } if (dm_table_supports_atomic_writes(t)) limits->features |= BLK_FEAT_ATOMIC_WRITES; - r = queue_limits_set(q, limits); + old_limits = queue_limits_start_update(q); + r = queue_limits_commit_update(q, limits); if (r) return r; @@ -1893,10 +1920,21 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && (limits->features & BLK_FEAT_ZONED)) { r = dm_revalidate_zones(t, q); - if (r) + if (r) { + queue_limits_set(q, &old_limits); return r; + } } + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + dm_finalize_zone_settings(t, limits); + + if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) + set_dax_synchronous(t->md->dax_dev); + + if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) + dax_write_cache(t->md->dax_dev, true); + dm_update_crypto_profile(q, t); return 0; } diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 20edd3fabbabfe..4af78111d0b4dd 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -56,24 +56,31 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector, { struct mapped_device *md = disk->private_data; struct dm_table *map; - int srcu_idx, ret; + struct dm_table *zone_revalidate_map = md->zone_revalidate_map; + int srcu_idx, ret = -EIO; + bool put_table = false; - if (!md->zone_revalidate_map) { - /* Regular user context */ + if (!zone_revalidate_map || md->revalidate_map_task != current) { + /* + * Regular user context or + * Zone revalidation during __bind() is in progress, but this + * call is from a different process + */ if (dm_suspended_md(md)) return -EAGAIN; map = dm_get_live_table(md, &srcu_idx); - if (!map) - return -EIO; + put_table = true; } else { /* Zone revalidation during __bind() */ - map = md->zone_revalidate_map; + map = zone_revalidate_map; } - ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); + if (map) + ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, + data); - if (!md->zone_revalidate_map) + if (put_table) dm_put_live_table(md, srcu_idx); return ret; @@ -153,33 +160,36 @@ int dm_revalidate_zones(struct dm_table *t, struct request_queue *q) { struct mapped_device *md = t->md; struct gendisk *disk = md->disk; + unsigned int nr_zones = disk->nr_zones; int ret; if (!get_capacity(disk)) return 0; - /* Revalidate only if something changed. */ - if (!disk->nr_zones || disk->nr_zones != md->nr_zones) { - DMINFO("%s using %s zone append", - disk->disk_name, - queue_emulates_zone_append(q) ? "emulated" : "native"); - md->nr_zones = 0; - } - - if (md->nr_zones) + /* + * Do not revalidate if zone write plug resources have already + * been allocated. + */ + if (dm_has_zone_plugs(md)) return 0; + DMINFO("%s using %s zone append", disk->disk_name, + queue_emulates_zone_append(q) ? "emulated" : "native"); + /* * Our table is not live yet. So the call to dm_get_live_table() * in dm_blk_report_zones() will fail. Set a temporary pointer to * our table for dm_blk_report_zones() to use directly. */ md->zone_revalidate_map = t; + md->revalidate_map_task = current; ret = blk_revalidate_disk_zones(disk); + md->revalidate_map_task = NULL; md->zone_revalidate_map = NULL; if (ret) { DMERR("Revalidate zones failed %d", ret); + disk->nr_zones = nr_zones; return ret; } @@ -340,12 +350,8 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, * mapped device queue as needing zone append emulation. */ WARN_ON_ONCE(queue_is_mq(q)); - if (dm_table_supports_zone_append(t)) { - clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); - } else { - set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + if (!dm_table_supports_zone_append(t)) lim->max_hw_zone_append_sectors = 0; - } /* * Determine the max open and max active zone limits for the mapped @@ -380,15 +386,28 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, lim->max_open_zones = 0; lim->max_active_zones = 0; lim->max_hw_zone_append_sectors = 0; + lim->max_zone_append_sectors = 0; lim->zone_write_granularity = 0; lim->chunk_sectors = 0; lim->features &= ~BLK_FEAT_ZONED; - clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); - md->nr_zones = 0; - disk->nr_zones = 0; return 0; } + if (get_capacity(disk) && dm_has_zone_plugs(t->md)) { + if (q->limits.chunk_sectors != lim->chunk_sectors) { + DMWARN("%s: device has zone write plug resources. " + "Cannot change zone size", + disk->disk_name); + return -EINVAL; + } + if (lim->max_hw_zone_append_sectors != 0 && + !dm_table_is_wildcard(t)) { + DMWARN("%s: device has zone write plug resources. " + "New table must emulate zone append", + disk->disk_name); + return -EINVAL; + } + } /* * Warn once (when the capacity is not yet set) if the mapped device is * partially using zone resources of the target devices as that leads to @@ -408,6 +427,23 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, return 0; } +void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim) +{ + struct mapped_device *md = t->md; + + if (lim->features & BLK_FEAT_ZONED) { + if (dm_table_supports_zone_append(t)) + clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + else + set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + } else { + clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); + md->nr_zones = 0; + md->disk->nr_zones = 0; + } +} + + /* * IO completion callback called from clone_endio(). */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5ab7574c0c76ab..240f6dab8ddafb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2421,21 +2421,35 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, struct queue_limits *limits) { struct dm_table *old_map; - sector_t size; + sector_t size, old_size; int ret; lockdep_assert_held(&md->suspend_lock); size = dm_table_get_size(t); + old_size = dm_get_size(md); + + if (!dm_table_supports_size_change(t, old_size, size)) { + old_map = ERR_PTR(-EINVAL); + goto out; + } + + set_capacity(md->disk, size); + + ret = dm_table_set_restrictions(t, md->queue, limits); + if (ret) { + set_capacity(md->disk, old_size); + old_map = ERR_PTR(ret); + goto out; + } + /* * Wipe any geometry if the size of the table changed. */ - if (size != dm_get_size(md)) + if (size != old_size) memset(&md->geometry, 0, sizeof(md->geometry)); - set_capacity(md->disk, size); - dm_table_event_callback(t, event_callback, md); if (dm_table_request_based(t)) { @@ -2453,10 +2467,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, * requests in the queue may refer to bio from the old bioset, * so you must walk through the queue to unprep. */ - if (!md->mempools) { + if (!md->mempools) md->mempools = t->mempools; - t->mempools = NULL; - } + else + dm_free_md_mempools(t->mempools); } else { /* * The md may already have mempools that need changing. @@ -2465,14 +2479,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, */ dm_free_md_mempools(md->mempools); md->mempools = t->mempools; - t->mempools = NULL; - } - - ret = dm_table_set_restrictions(t, md->queue, limits); - if (ret) { - old_map = ERR_PTR(ret); - goto out; } + t->mempools = NULL; old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); rcu_assign_pointer(md->map, (void *)t); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index a0a8ff11981580..245f52b592154d 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -58,6 +58,7 @@ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); bool dm_table_has_no_data_devices(struct dm_table *table); +bool dm_table_is_wildcard(struct dm_table *t); int dm_calculate_queue_limits(struct dm_table *table, struct queue_limits *limits); int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, @@ -72,6 +73,8 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); struct dm_target *dm_table_get_immutable_target(struct dm_table *t); struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); +bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size, + sector_t new_size); void dm_lock_md_type(struct mapped_device *md); void dm_unlock_md_type(struct mapped_device *md); @@ -102,6 +105,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t); int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *lim); int dm_revalidate_zones(struct dm_table *t, struct request_queue *q); +void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim); void dm_zone_endio(struct dm_io *io, struct bio *clone); #ifdef CONFIG_BLK_DEV_ZONED int dm_blk_report_zones(struct gendisk *disk, sector_t sector, @@ -110,12 +114,14 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio); int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t, sector_t sector, unsigned int nr_zones, unsigned long *need_reset); +#define dm_has_zone_plugs(md) ((md)->disk->zone_wplugs_hash != NULL) #else #define dm_blk_report_zones NULL static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) { return false; } +#define dm_has_zone_plugs(md) false #endif /* diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 44ec9b17cfd336..37b08f26c62f5d 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2357,9 +2357,8 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) if (!bitmap) return -ENOENT; - if (bitmap->mddev->bitmap_info.external) - return -ENOENT; - if (!bitmap->storage.sb_page) /* no superblock */ + if (!bitmap->mddev->bitmap_info.external && + !bitmap->storage.sb_page) return -EINVAL; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index f4f948b0e17310..dbb97a7233ab95 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -2,7 +2,7 @@ config DM_PERSISTENT_DATA tristate depends on BLK_DEV_DM - select LIBCRC32C + select CRC32 select DM_BUFIO help Library providing immutable on-disk data structure support for diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index c7efd8aab675cc..b8b3a90697012c 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -293,3 +293,13 @@ static inline bool raid1_should_read_first(struct mddev *mddev, return false; } + +/* + * bio with REQ_RAHEAD or REQ_NOWAIT can fail at anytime, before such IO is + * submitted to the underlying disks, hence don't record badblocks or retry + * in this case. + */ +static inline bool raid1_should_handle_error(struct bio *bio) +{ + return !(bio->bi_opf & (REQ_RAHEAD | REQ_NOWAIT)); +} diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0efc03cea24efe..1fe645e6300121 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -373,14 +373,16 @@ static void raid1_end_read_request(struct bio *bio) */ update_head_pos(r1_bio->read_disk, r1_bio); - if (uptodate) + if (uptodate) { set_bit(R1BIO_Uptodate, &r1_bio->state); - else if (test_bit(FailFast, &rdev->flags) && - test_bit(R1BIO_FailFast, &r1_bio->state)) + } else if (test_bit(FailFast, &rdev->flags) && + test_bit(R1BIO_FailFast, &r1_bio->state)) { /* This was a fail-fast read so we definitely * want to retry */ ; - else { + } else if (!raid1_should_handle_error(bio)) { + uptodate = 1; + } else { /* If all other devices have failed, we want to return * the error upwards rather than fail the last device. * Here we redefine "uptodate" to mean "Don't want to retry" @@ -451,16 +453,15 @@ static void raid1_end_write_request(struct bio *bio) struct bio *to_put = NULL; int mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[mirror].rdev; - bool discard_error; sector_t lo = r1_bio->sector; sector_t hi = r1_bio->sector + r1_bio->sectors; - - discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; + bool ignore_error = !raid1_should_handle_error(bio) || + (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD); /* * 'one mirror IO has finished' event handler: */ - if (bio->bi_status && !discard_error) { + if (bio->bi_status && !ignore_error) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & @@ -511,7 +512,7 @@ static void raid1_end_write_request(struct bio *bio) /* Maybe we can clear some bad blocks. */ if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) && - !discard_error) { + !ignore_error) { r1_bio->bios[mirror] = IO_MADE_GOOD; set_bit(R1BIO_MadeGood, &r1_bio->state); } @@ -2200,14 +2201,9 @@ static int fix_sync_read_error(struct r1bio *r1_bio) if (!rdev_set_badblocks(rdev, sect, s, 0)) abort = 1; } - if (abort) { - conf->recovery_disabled = - mddev->recovery_disabled; - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - md_done_sync(mddev, r1_bio->sectors, 0); - put_buf(r1_bio); + if (abort) return 0; - } + /* Try next page */ sectors -= s; sect += s; @@ -2346,10 +2342,21 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) int disks = conf->raid_disks * 2; struct bio *wbio; - if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) - /* ouch - failed to read all of that. */ - if (!fix_sync_read_error(r1_bio)) + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { + /* + * ouch - failed to read all of that. + * No need to fix read error for check/repair + * because all member disks are read. + */ + if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) || + !fix_sync_read_error(r1_bio)) { + conf->recovery_disabled = mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_done_sync(mddev, r1_bio->sectors, 0); + put_buf(r1_bio); return; + } + } if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) process_checks(r1_bio); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 846c5f29486e3f..54320a887ecc50 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -399,6 +399,8 @@ static void raid10_end_read_request(struct bio *bio) * wait for the 'master' bio. */ set_bit(R10BIO_Uptodate, &r10_bio->state); + } else if (!raid1_should_handle_error(bio)) { + uptodate = 1; } else { /* If all other devices that store this block have * failed, we want to return the error upwards rather @@ -456,9 +458,8 @@ static void raid10_end_write_request(struct bio *bio) int slot, repl; struct md_rdev *rdev = NULL; struct bio *to_put = NULL; - bool discard_error; - - discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD; + bool ignore_error = !raid1_should_handle_error(bio) || + (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD); dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); @@ -472,7 +473,7 @@ static void raid10_end_write_request(struct bio *bio) /* * this branch is our 'one mirror IO has finished' event handler: */ - if (bio->bi_status && !discard_error) { + if (bio->bi_status && !ignore_error) { if (repl) /* Never record new bad blocks to replacement, * just fail it. @@ -527,7 +528,7 @@ static void raid10_end_write_request(struct bio *bio) /* Maybe we can clear some bad blocks. */ if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr, r10_bio->sectors) && - !discard_error) { + !ignore_error) { bio_put(bio); if (repl) r10_bio->devs[slot].repl_bio = IO_MADE_GOOD; @@ -1735,6 +1736,7 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) * The discard bio returns only first r10bio finishes */ if (first_copy) { + md_account_bio(mddev, &bio); r10_bio->master_bio = bio; set_bit(R10BIO_Discard, &r10_bio->state); first_copy = false; diff --git a/drivers/media/cec/i2c/Kconfig b/drivers/media/cec/i2c/Kconfig index b9d21643eef189..c31abc26f60204 100644 --- a/drivers/media/cec/i2c/Kconfig +++ b/drivers/media/cec/i2c/Kconfig @@ -16,6 +16,7 @@ config CEC_CH7322 config CEC_NXP_TDA9950 tristate "NXP Semiconductors TDA9950/TDA998X HDMI CEC" + depends on I2C select CEC_NOTIFIER select CEC_CORE default DRM_I2C_NXP_TDA998X diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index e576b213084d23..e45ba127069fc0 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1149,8 +1149,11 @@ config VIDEO_ISL7998X config VIDEO_LT6911UXE tristate "Lontium LT6911UXE decoder" - depends on ACPI && VIDEO_DEV + depends on ACPI && VIDEO_DEV && I2C select V4L2_FWNODE + select V4L2_CCI_I2C + select MEDIA_CONTROLLER + select VIDEO_V4L2_SUBDEV_API help This is a Video4Linux2 sensor-level driver for the Lontium LT6911UXE HDMI to MIPI CSI-2 bridge. diff --git a/drivers/media/platform/synopsys/hdmirx/Kconfig b/drivers/media/platform/synopsys/hdmirx/Kconfig index 27e6706f84a373..4321f985f63206 100644 --- a/drivers/media/platform/synopsys/hdmirx/Kconfig +++ b/drivers/media/platform/synopsys/hdmirx/Kconfig @@ -2,6 +2,7 @@ config VIDEO_SYNOPSYS_HDMIRX tristate "Synopsys DesignWare HDMI Receiver driver" + depends on ARCH_ROCKCHIP || COMPILE_TEST depends on VIDEO_DEV select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API diff --git a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c index 3d2913de9a86c6..7af6765532e332 100644 --- a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c +++ b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c @@ -114,7 +114,7 @@ struct hdmirx_stream { spinlock_t vbq_lock; /* to lock video buffer queue */ bool stopping; wait_queue_head_t wq_stopped; - u32 frame_idx; + u32 sequence; u32 line_flag_int_cnt; u32 irq_stat; }; @@ -1540,7 +1540,7 @@ static int hdmirx_start_streaming(struct vb2_queue *queue, unsigned int count) int line_flag; mutex_lock(&hdmirx_dev->stream_lock); - stream->frame_idx = 0; + stream->sequence = 0; stream->line_flag_int_cnt = 0; stream->curr_buf = NULL; stream->next_buf = NULL; @@ -1948,7 +1948,7 @@ static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev, if (vb_done) { vb_done->vb2_buf.timestamp = ktime_get_ns(); - vb_done->sequence = stream->frame_idx; + vb_done->sequence = stream->sequence; if (bt->interlaced) vb_done->field = V4L2_FIELD_INTERLACED_TB; @@ -1956,10 +1956,6 @@ static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev, vb_done->field = V4L2_FIELD_NONE; hdmirx_vb_done(stream, vb_done); - stream->frame_idx++; - if (stream->frame_idx == 30) - v4l2_dbg(1, debug, v4l2_dev, - "rcv frames\n"); } stream->curr_buf = NULL; @@ -1971,6 +1967,10 @@ static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev, v4l2_dbg(3, debug, v4l2_dev, "%s: next_buf NULL, skip vb_done\n", __func__); } + + stream->sequence++; + if (stream->sequence == 30) + v4l2_dbg(1, debug, v4l2_dev, "rcv frames\n"); } DMA_IDLE_OUT: diff --git a/drivers/media/platform/verisilicon/hantro_postproc.c b/drivers/media/platform/verisilicon/hantro_postproc.c index c435a393e0cb70..9f559a13d409bb 100644 --- a/drivers/media/platform/verisilicon/hantro_postproc.c +++ b/drivers/media/platform/verisilicon/hantro_postproc.c @@ -250,8 +250,10 @@ int hantro_postproc_init(struct hantro_ctx *ctx) for (i = 0; i < num_buffers; i++) { ret = hantro_postproc_alloc(ctx, i); - if (ret) + if (ret) { + hantro_postproc_free(ctx); return ret; + } } return 0; diff --git a/drivers/media/test-drivers/vivid/Kconfig b/drivers/media/test-drivers/vivid/Kconfig index e95edc0f22bfb9..cc470070a7a5ea 100644 --- a/drivers/media/test-drivers/vivid/Kconfig +++ b/drivers/media/test-drivers/vivid/Kconfig @@ -32,7 +32,8 @@ config VIDEO_VIVID_CEC config VIDEO_VIVID_OSD bool "Enable Framebuffer for testing Output Overlay" - depends on VIDEO_VIVID && FB + depends on VIDEO_VIVID && FB_CORE + depends on VIDEO_VIVID=m || FB_CORE=y default y select FB_IOMEM_HELPERS help diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index 331b8e535e5bbf..80dabeebf58048 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -40,6 +40,11 @@ config VIDEO_TUNER config V4L2_JPEG_HELPER tristate +config V4L2_LOOPBACK + tristate "V4L2 loopback device" + help + V4L2 loopback device + # Used by drivers that need v4l2-h264.ko config V4L2_H264 tristate diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 2177b9d63a8ffc..c179507cedc490 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -33,5 +33,7 @@ obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o obj-$(CONFIG_V4L2_MEM2MEM_DEV) += v4l2-mem2mem.o obj-$(CONFIG_V4L2_VP9) += v4l2-vp9.o +obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o + obj-$(CONFIG_VIDEO_TUNER) += tuner.o obj-$(CONFIG_VIDEO_DEV) += v4l2-dv-timings.o videodev.o diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c new file mode 100644 index 00000000000000..74bd8125caa32c --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -0,0 +1,3313 @@ +/* -*- c-file-style: "linux" -*- */ +/* + * v4l2loopback.c -- video4linux2 loopback driver + * + * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) + * Copyright (C) 2010-2023 IOhannes m zmoelnig (zmoelnig@iem.at) + * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) + * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "v4l2loopback.h" + +#define V4L2LOOPBACK_CTL_ADD_legacy 0x4C80 +#define V4L2LOOPBACK_CTL_REMOVE_legacy 0x4C81 +#define V4L2LOOPBACK_CTL_QUERY_legacy 0x4C82 + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) +#error This module is not supported on kernels before 4.0.0. +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) +#define strscpy strlcpy +#endif + +#if defined(timer_setup) && defined(from_timer) +#define HAVE_TIMER_SETUP +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) +#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 2, 0) +#define timer_delete_sync del_timer_sync +#endif + +#define V4L2LOOPBACK_VERSION_CODE \ + KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ + V4L2LOOPBACK_VERSION_BUGFIX) + +MODULE_DESCRIPTION("V4L2 loopback video device"); +MODULE_AUTHOR("Vasily Levin, " + "IOhannes m zmoelnig ," + "Stefan Diewald," + "Anton Novikov" + "et al."); +#ifdef SNAPSHOT_VERSION +MODULE_VERSION(__stringify(SNAPSHOT_VERSION)); +#else +MODULE_VERSION("" __stringify(V4L2LOOPBACK_VERSION_MAJOR) "." __stringify( + V4L2LOOPBACK_VERSION_MINOR) "." __stringify(V4L2LOOPBACK_VERSION_BUGFIX)); +#endif +MODULE_LICENSE("GPL"); + +/* + * helpers + */ +#define dprintk(fmt, args...) \ + do { \ + if (debug > 0) { \ + printk(KERN_INFO "v4l2-loopback[" __stringify( \ + __LINE__) "], pid(%d): " fmt, \ + task_pid_nr(current), ##args); \ + } \ + } while (0) + +#define MARK() \ + do { \ + if (debug > 1) { \ + printk(KERN_INFO "%s:%d[%s], pid(%d)\n", __FILE__, \ + __LINE__, __func__, task_pid_nr(current)); \ + } \ + } while (0) + +#define dprintkrw(fmt, args...) \ + do { \ + if (debug > 2) { \ + printk(KERN_INFO "v4l2-loopback[" __stringify( \ + __LINE__) "], pid(%d): " fmt, \ + task_pid_nr(current), ##args); \ + } \ + } while (0) + +static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) +{ + struct timespec64 ts; + ktime_get_ts64(&ts); + + b->timestamp.tv_sec = ts.tv_sec; + b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); + b->flags |= V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + b->flags &= ~V4L2_BUF_FLAG_TIMESTAMP_COPY; +} + +#if BITS_PER_LONG == 32 +#include /* do_div() for 64bit division */ +static inline int v4l2l_mod64(const s64 A, const u32 B) +{ + u64 a = (u64)A; + u32 b = B; + + if (A > 0) + return do_div(a, b); + a = -A; + return -do_div(a, b); +} +#else +static inline int v4l2l_mod64(const s64 A, const u32 B) +{ + return A % B; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) +typedef unsigned __poll_t; +#endif + +/* module constants + * can be overridden during he build process using something like + * make KCPPFLAGS="-DMAX_DEVICES=100" + */ + +/* maximum number of v4l2loopback devices that can be created */ +#ifndef MAX_DEVICES +#define MAX_DEVICES 8 +#endif + +/* whether the default is to announce capabilities exclusively or not */ +#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 +#endif + +/* when a producer is considered to have gone stale */ +#ifndef MAX_TIMEOUT +#define MAX_TIMEOUT (100 * 1000) /* in msecs */ +#endif + +/* max buffers that can be mapped, actually they + * are all mapped to max_buffers buffers */ +#ifndef MAX_BUFFERS +#define MAX_BUFFERS 32 +#endif + +/* module parameters */ +static int debug = 0; +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); + +#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 +static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; +module_param(max_buffers, int, S_IRUGO); +MODULE_PARM_DESC(max_buffers, + "how many buffers should be allocated [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); + +/* how many times a device can be opened + * the per-module default value can be overridden on a per-device basis using + * the /sys/devices interface + * + * note that max_openers should be at least 2 in order to get a working system: + * one opener for the producer and one opener for the consumer + * however, we leave that to the user + */ +#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 +static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; +module_param(max_openers, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + max_openers, + "how many users can open the loopback device [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); + +static int devices = -1; +module_param(devices, int, 0); +MODULE_PARM_DESC(devices, "how many devices should be created"); + +static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; +module_param_array(video_nr, int, NULL, 0444); +MODULE_PARM_DESC(video_nr, + "video device numbers (-1=auto, 0=/dev/video0, etc.)"); + +static char *card_label[MAX_DEVICES]; +module_param_array(card_label, charp, NULL, 0000); +MODULE_PARM_DESC(card_label, "card labels for each device"); + +static bool exclusive_caps[MAX_DEVICES] = { + [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +}; +module_param_array(exclusive_caps, bool, NULL, 0444); +/* FIXXME: wording */ +MODULE_PARM_DESC( + exclusive_caps, + "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " __stringify( + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); + +/* format specifications */ +#define V4L2LOOPBACK_SIZE_MIN_WIDTH 2 +#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 1 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 + +#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 +#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 + +static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; +module_param(max_width, int, S_IRUGO); +MODULE_PARM_DESC(max_width, + "maximum allowed frame width [DEFAULT: " __stringify( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); +static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; +module_param(max_height, int, S_IRUGO); +MODULE_PARM_DESC(max_height, + "maximum allowed frame height [DEFAULT: " __stringify( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); + +static DEFINE_IDR(v4l2loopback_index_idr); +static DEFINE_MUTEX(v4l2loopback_ctl_mutex); + +/* frame intervals */ +#define V4L2LOOPBACK_FRAME_INTERVAL_MAX __UINT32_MAX__ +#define V4L2LOOPBACK_FPS_DEFAULT 30 +#define V4L2LOOPBACK_FPS_MAX 1000 + +/* control IDs */ +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) +#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) +#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) +#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) +#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); +static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { + .s_ctrl = v4l2loopback_s_ctrl, +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_KEEP_FORMAT, + .name = "keep_format", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_SUSTAIN_FRAMERATE, + .name = "sustain_framerate", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT, + .name = "timeout", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = MAX_TIMEOUT, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT_IMAGE_IO, + .name = "timeout_image_io", + .type = V4L2_CTRL_TYPE_BUTTON, + .min = 0, + .max = 0, + .step = 0, + .def = 0, + // clang-format on +}; + +/* module structures */ +struct v4l2loopback_private { + int device_nr; +}; + +/* TODO(vasaka) use typenames which are common to kernel, but first find out if + * it is needed */ +/* struct keeping state and settings of loopback device */ + +struct v4l2l_buffer { + struct v4l2_buffer buffer; + struct list_head list_head; + atomic_t use_count; +}; + +struct v4l2_loopback_device { + struct v4l2_device v4l2_dev; + struct v4l2_ctrl_handler ctrl_handler; + struct video_device *vdev; + + /* loopback device-specific parameters */ + char card_label[32]; + bool announce_all_caps; /* announce both OUTPUT and CAPTURE capabilities + * when true; else announce OUTPUT when no + * writer is streaming, otherwise CAPTURE. */ + int max_openers; /* how many times can this device be opened */ + int min_width, max_width; + int min_height, max_height; + + /* pixel and stream format */ + struct v4l2_pix_format pix_format; + bool pix_format_has_valid_sizeimage; + struct v4l2_captureparm capture_param; + unsigned long frame_jiffies; + + /* ctrls */ + int keep_format; /* CID_KEEP_FORMAT; lock the format, do not free + * on close(), and when `!announce_all_caps` do NOT + * fall back to OUTPUT when no writers attached (clear + * `keep_format` to attach a new writer) */ + int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain + (close to) nominal framerate */ + unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ + int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will + * queue/dequeue the timeout image buffer */ + + /* buffers for OUTPUT and CAPTURE */ + u8 *image; /* pointer to actual buffers data */ + unsigned long image_size; /* number of bytes alloc'd for all buffers */ + struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ + u32 buffer_count; /* should not be big, 4 is a good choice */ + u32 buffer_size; /* number of bytes alloc'd per buffer */ + u32 used_buffer_count; /* number of buffers allocated to openers */ + struct list_head outbufs_list; /* FIFO queue for OUTPUT buffers */ + u32 bufpos2index[MAX_BUFFERS]; /* mapping of `(position % used_buffers)` + * to `buffers[index]` */ + s64 write_position; /* sequence number of last 'displayed' buffer plus + * one */ + + /* synchronization between openers */ + atomic_t open_count; + struct mutex image_mutex; /* mutex for allocating image(s) and + * exchanging format tokens */ + spinlock_t lock; /* lock for the timeout and framerate timers */ + spinlock_t list_lock; /* lock for the OUTPUT buffer queue */ + wait_queue_head_t read_event; + u32 format_tokens; /* tokens to 'set format' for OUTPUT, CAPTURE, or + * timeout buffers */ + u32 stream_tokens; /* tokens to 'start' OUTPUT, CAPTURE, or timeout + * stream */ + + /* sustain framerate */ + struct timer_list sustain_timer; + unsigned int reread_count; + + /* timeout */ + u8 *timeout_image; /* copied to outgoing buffers when timeout passes */ + struct v4l2l_buffer timeout_buffer; + u32 timeout_buffer_size; /* number bytes alloc'd for timeout buffer */ + struct timer_list timeout_timer; + int timeout_happened; +}; + +enum v4l2l_io_method { + V4L2L_IO_NONE = 0, + V4L2L_IO_MMAP = 1, + V4L2L_IO_FILE = 2, + V4L2L_IO_TIMEOUT = 3, +}; + +/* struct keeping state and type of opener */ +struct v4l2_loopback_opener { + u32 format_token; /* token (if any) for type used in call to S_FMT or + * REQBUFS */ + u32 stream_token; /* token (if any) for type used in call to STREAMON */ + u32 buffer_count; /* number of buffers (if any) that opener acquired via + * REQBUFS */ + s64 read_position; /* sequence number of the next 'captured' frame */ + unsigned int reread_count; + enum v4l2l_io_method io_method; + + struct v4l2_fh fh; +}; + +#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) + +/* this is heavily inspired by the bttv driver found in the linux kernel */ +struct v4l2l_format { + char *name; + int fourcc; /* video4linux 2 */ + int depth; /* bit/pixel */ + int flags; +}; +/* set the v4l2l_format.flags to PLANAR for non-packed formats */ +#define FORMAT_FLAGS_PLANAR 0x01 +#define FORMAT_FLAGS_COMPRESSED 0x02 + +#include "v4l2loopback_formats.h" + +#ifndef V4L2_TYPE_IS_CAPTURE +#define V4L2_TYPE_IS_CAPTURE(type) \ + ((type) == V4L2_BUF_TYPE_VIDEO_CAPTURE || \ + (type) == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) +#endif /* V4L2_TYPE_IS_CAPTURE */ +#ifndef V4L2_TYPE_IS_OUTPUT +#define V4L2_TYPE_IS_OUTPUT(type) \ + ((type) == V4L2_BUF_TYPE_VIDEO_OUTPUT || \ + (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) +#endif /* V4L2_TYPE_IS_OUTPUT */ + +/* token values for privilege to set format or start/stop stream */ +#define V4L2L_TOKEN_CAPTURE 0x01 +#define V4L2L_TOKEN_OUTPUT 0x02 +#define V4L2L_TOKEN_TIMEOUT 0x04 +#define V4L2L_TOKEN_MASK \ + (V4L2L_TOKEN_CAPTURE | V4L2L_TOKEN_OUTPUT | V4L2L_TOKEN_TIMEOUT) + +/* helpers for token exchange and token status */ +#define token_from_type(type) \ + (V4L2_TYPE_IS_CAPTURE(type) ? V4L2L_TOKEN_CAPTURE : V4L2L_TOKEN_OUTPUT) +#define acquire_token(dev, opener, label, token) \ + do { \ + (opener)->label##_token = token; \ + (dev)->label##_tokens &= ~token; \ + } while (0) +#define release_token(dev, opener, label) \ + do { \ + (dev)->label##_tokens |= (opener)->label##_token; \ + (opener)->label##_token = 0; \ + } while (0) +#define has_output_token(token) (token & V4L2L_TOKEN_OUTPUT) +#define has_capture_token(token) (token & V4L2L_TOKEN_CAPTURE) +#define has_no_owners(dev) ((~((dev)->format_tokens) & V4L2L_TOKEN_MASK) == 0) +#define has_other_owners(opener, dev) \ + (~((dev)->format_tokens ^ (opener)->format_token) & V4L2L_TOKEN_MASK) +#define need_timeout_buffer(dev, token) \ + ((dev)->timeout_jiffies > 0 || (token) & V4L2L_TOKEN_TIMEOUT) + +static const unsigned int FORMATS = ARRAY_SIZE(formats); + +static char *fourcc2str(unsigned int fourcc, char buf[5]) +{ + buf[0] = (fourcc >> 0) & 0xFF; + buf[1] = (fourcc >> 8) & 0xFF; + buf[2] = (fourcc >> 16) & 0xFF; + buf[3] = (fourcc >> 24) & 0xFF; + buf[4] = 0; + + return buf; +} + +static const struct v4l2l_format *format_by_fourcc(int fourcc) +{ + unsigned int i; + char buf[5]; + + for (i = 0; i < FORMATS; i++) { + if (formats[i].fourcc == fourcc) + return formats + i; + } + + dprintk("unsupported format '%4s'\n", fourcc2str(fourcc, buf)); + return NULL; +} + +static void pix_format_set_size(struct v4l2_pix_format *f, + const struct v4l2l_format *fmt, + unsigned int width, unsigned int height) +{ + f->width = width; + f->height = height; + + if (fmt->flags & FORMAT_FLAGS_PLANAR) { + f->bytesperline = width; /* Y plane */ + f->sizeimage = (width * height * fmt->depth) >> 3; + } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { + /* doesn't make sense for compressed formats */ + f->bytesperline = 0; + f->sizeimage = (width * height * fmt->depth) >> 3; + } else { + f->bytesperline = (width * fmt->depth) >> 3; + f->sizeimage = height * f->bytesperline; + } +} + +static int v4l2l_fill_format(struct v4l2_format *fmt, const u32 minwidth, + const u32 maxwidth, const u32 minheight, + const u32 maxheight) +{ + u32 width = fmt->fmt.pix.width, height = fmt->fmt.pix.height; + u32 pixelformat = fmt->fmt.pix.pixelformat; + struct v4l2_format fmt0 = *fmt; + u32 bytesperline = 0, sizeimage = 0; + + if (!width) + width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + if (!height) + height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + width = clamp_val(width, minwidth, maxwidth); + height = clamp_val(height, minheight, maxheight); + + /* sets: width,height,pixelformat,bytesperline,sizeimage */ + if (!(V4L2_TYPE_IS_MULTIPLANAR(fmt0.type))) { + fmt0.fmt.pix.bytesperline = 0; + fmt0.fmt.pix.sizeimage = 0; + } + + if (0) { + ; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + } else if (!v4l2_fill_pixfmt(&fmt0.fmt.pix, pixelformat, width, + height)) { + ; + } else if (!v4l2_fill_pixfmt_mp(&fmt0.fmt.pix_mp, pixelformat, width, + height)) { + ; +#endif + } else { + const struct v4l2l_format *format = + format_by_fourcc(pixelformat); + if (!format) + return -EINVAL; + pix_format_set_size(&fmt0.fmt.pix, format, width, height); + fmt0.fmt.pix.pixelformat = format->fourcc; + } + + if (V4L2_TYPE_IS_MULTIPLANAR(fmt0.type)) { + *fmt = fmt0; + + if ((fmt->fmt.pix_mp.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix_mp.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix_mp.colorspace = V4L2_COLORSPACE_SRGB; + if (V4L2_FIELD_ANY == fmt->fmt.pix_mp.field) + fmt->fmt.pix_mp.field = V4L2_FIELD_NONE; + } else { + bytesperline = fmt->fmt.pix.bytesperline; + sizeimage = fmt->fmt.pix.sizeimage; + + *fmt = fmt0; + + if (!fmt->fmt.pix.bytesperline) + fmt->fmt.pix.bytesperline = bytesperline; + if (!fmt->fmt.pix.sizeimage) + fmt->fmt.pix.sizeimage = sizeimage; + + if ((fmt->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) || + (fmt->fmt.pix.colorspace > V4L2_COLORSPACE_DCI_P3)) + fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; + if (V4L2_FIELD_ANY == fmt->fmt.pix.field) + fmt->fmt.pix.field = V4L2_FIELD_NONE; + } + + return 0; +} + +/* Checks if v4l2l_fill_format() has set a valid, fixed sizeimage val. */ +static bool v4l2l_pix_format_has_valid_sizeimage(struct v4l2_format *fmt) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 2, 0) + const struct v4l2_format_info *info; + + info = v4l2_format_info(fmt->fmt.pix.pixelformat); + if (info && info->mem_planes == 1) + return true; +#endif + + return false; +} + +static int pix_format_eq(const struct v4l2_pix_format *ref, + const struct v4l2_pix_format *tgt, int strict) +{ + /* check if the two formats are equivalent. + * ANY fields are handled gracefully + */ +#define _pix_format_eq0(x) \ + if (ref->x != tgt->x) \ + result = 0 +#define _pix_format_eq1(x, def) \ + do { \ + if ((def != tgt->x) && (ref->x != tgt->x)) { \ + printk(KERN_INFO #x " failed"); \ + result = 0; \ + } \ + } while (0) + int result = 1; + _pix_format_eq0(width); + _pix_format_eq0(height); + _pix_format_eq0(pixelformat); + if (!strict) + return result; + _pix_format_eq1(field, V4L2_FIELD_ANY); + _pix_format_eq0(bytesperline); + _pix_format_eq0(sizeimage); + _pix_format_eq1(colorspace, V4L2_COLORSPACE_DEFAULT); + return result; +} + +static void set_timeperframe(struct v4l2_loopback_device *dev, + struct v4l2_fract *tpf) +{ + if (!tpf->denominator && !tpf->numerator) { + tpf->numerator = 1; + tpf->denominator = V4L2LOOPBACK_FPS_DEFAULT; + } else if (tpf->numerator > + V4L2LOOPBACK_FRAME_INTERVAL_MAX * tpf->denominator) { + /* divide-by-zero or greater than maximum interval => min FPS */ + tpf->numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX; + tpf->denominator = 1; + } else if (tpf->numerator * V4L2LOOPBACK_FPS_MAX < tpf->denominator) { + /* zero or lower than minimum interval => max FPS */ + tpf->numerator = 1; + tpf->denominator = V4L2LOOPBACK_FPS_MAX; + } + + dev->capture_param.timeperframe = *tpf; + dev->frame_jiffies = + max(1UL, (msecs_to_jiffies(1000) * tpf->numerator) / + tpf->denominator); +} + +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); + +/* device attributes */ +/* available via sysfs: /sys/devices/virtual/video4linux/video* */ + +static ssize_t attr_show_format(struct device *cd, + struct device_attribute *attr, char *buf) +{ + /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + const struct v4l2_fract *tpf; + char buf4cc[5], buf_fps[32]; + + if (!dev || (has_no_owners(dev) && !dev->keep_format)) + return 0; + tpf = &dev->capture_param.timeperframe; + + fourcc2str(dev->pix_format.pixelformat, buf4cc); + if (tpf->numerator == 1) + snprintf(buf_fps, sizeof(buf_fps), "%u", tpf->denominator); + else + snprintf(buf_fps, sizeof(buf_fps), "%u/%u", tpf->denominator, + tpf->numerator); + return sprintf(buf, "%4s:%ux%u@%s\n", buf4cc, dev->pix_format.width, + dev->pix_format.height, buf_fps); +} + +static ssize_t attr_store_format(struct device *cd, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + int fps_num = 0, fps_den = 1; + + if (!dev) + return -ENODEV; + + /* only fps changing is supported */ + if (sscanf(buf, "@%u/%u", &fps_num, &fps_den) > 0) { + struct v4l2_fract f = { .numerator = fps_den, + .denominator = fps_num }; + set_timeperframe(dev, &f); + return len; + } + return -EINVAL; +} + +static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, + attr_store_format); + +static ssize_t attr_show_buffers(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%u\n", dev->used_buffer_count); +} + +static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); + +static ssize_t attr_show_maxopeners(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + return sprintf(buf, "%d\n", dev->max_openers); +} + +static ssize_t attr_store_maxopeners(struct device *cd, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct v4l2_loopback_device *dev = NULL; + unsigned long curr = 0; + + if (kstrtoul(buf, 0, &curr)) + return -EINVAL; + + dev = v4l2loopback_cd2dev(cd); + if (!dev) + return -ENODEV; + + if (dev->max_openers == curr) + return len; + + if (curr > __INT_MAX__ || dev->open_count.counter > curr) { + /* request to limit to less openers as are currently attached to us */ + return -EINVAL; + } + + dev->max_openers = (int)curr; + + return len; +} + +static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, + attr_store_maxopeners); + +static ssize_t attr_show_state(struct device *cd, struct device_attribute *attr, + char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + if (!dev) + return -ENODEV; + + if (!has_output_token(dev->stream_tokens) || dev->keep_format) { + return sprintf(buf, "capture\n"); + } else + return sprintf(buf, "output\n"); + + return -EAGAIN; +} + +static DEVICE_ATTR(state, S_IRUGO, attr_show_state, NULL); + +static void v4l2loopback_remove_sysfs(struct video_device *vdev) +{ +#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) + + if (vdev) { + V4L2_SYSFS_DESTROY(format); + V4L2_SYSFS_DESTROY(buffers); + V4L2_SYSFS_DESTROY(max_openers); + V4L2_SYSFS_DESTROY(state); + /* ... */ + } +} + +static void v4l2loopback_create_sysfs(struct video_device *vdev) +{ + int res = 0; + +#define V4L2_SYSFS_CREATE(x) \ + res = device_create_file(&vdev->dev, &dev_attr_##x); \ + if (res < 0) \ + break + if (!vdev) + return; + do { + V4L2_SYSFS_CREATE(format); + V4L2_SYSFS_CREATE(buffers); + V4L2_SYSFS_CREATE(max_openers); + V4L2_SYSFS_CREATE(state); + /* ... */ + } while (0); + + if (res >= 0) + return; + dev_err(&vdev->dev, "%s error: %d\n", __func__, res); +} + +/* Event APIs */ + +#define V4L2LOOPBACK_EVENT_BASE (V4L2_EVENT_PRIVATE_START) +#define V4L2LOOPBACK_EVENT_OFFSET 0x08E00000 +#define V4L2_EVENT_PRI_CLIENT_USAGE \ + (V4L2LOOPBACK_EVENT_BASE + V4L2LOOPBACK_EVENT_OFFSET + 1) + +struct v4l2_event_client_usage { + __u32 count; +}; + +/* global module data */ +/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ +struct v4l2loopback_lookup_cb_data { + int device_nr; + struct v4l2_loopback_device *device; +}; +static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *device = ptr; + struct v4l2loopback_lookup_cb_data *cbdata = data; + if (cbdata && device && device->vdev) { + if (device->vdev->num == cbdata->device_nr) { + cbdata->device = device; + cbdata->device_nr = id; + return 1; + } + } + return 0; +} +static int v4l2loopback_lookup(int device_nr, + struct v4l2_loopback_device **device) +{ + struct v4l2loopback_lookup_cb_data data = { + .device_nr = device_nr, + .device = NULL, + }; + int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, + &data); + if (1 == err) { + if (device) + *device = data.device; + return data.device_nr; + } + return -ENODEV; +} +#define v4l2loopback_get_vdev_nr(vdev) \ + ((struct v4l2loopback_private *)video_get_drvdata(vdev))->device_nr +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) +{ + struct video_device *loopdev = to_video_device(cd); + int device_nr = v4l2loopback_get_vdev_nr(loopdev); + + return idr_find(&v4l2loopback_index_idr, device_nr); +} + +static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) +{ + struct v4l2loopback_private *ptr = video_drvdata(f); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +/* forward declarations */ +static void client_usage_queue_event(struct video_device *vdev); +static bool any_buffers_mapped(struct v4l2_loopback_device *dev); +static int allocate_buffers(struct v4l2_loopback_device *dev, + struct v4l2_pix_format *pix_format); +static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used, + u32 buffer_size); +static void free_buffers(struct v4l2_loopback_device *dev); +static int allocate_timeout_buffer(struct v4l2_loopback_device *dev); +static void free_timeout_buffer(struct v4l2_loopback_device *dev); +static void check_timers(struct v4l2_loopback_device *dev); +static const struct v4l2_file_operations v4l2_loopback_fops; +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; + +/* V4L2 ioctl caps and params calls */ +/* returns device capabilities + * called on VIDIOC_QUERYCAP + */ +static int vidioc_querycap(struct file *file, void *fh, + struct v4l2_capability *cap) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + int device_nr = v4l2loopback_get_vdev_nr(dev->vdev); + __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; + + strscpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); + snprintf(cap->card, sizeof(cap->card), "%s", dev->card_label); + snprintf(cap->bus_info, sizeof(cap->bus_info), + "platform:v4l2loopback-%03d", device_nr); + + if (dev->announce_all_caps) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; + } else { + if (opener->io_method == V4L2L_IO_TIMEOUT || + (has_output_token(dev->stream_tokens) && + !dev->keep_format)) { + capabilities |= V4L2_CAP_VIDEO_OUTPUT; + } else + capabilities |= V4L2_CAP_VIDEO_CAPTURE; + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + dev->vdev->device_caps = +#endif /* >=linux-4.7.0 */ + cap->device_caps = cap->capabilities = capabilities; + + cap->capabilities |= V4L2_CAP_DEVICE_CAPS; + + memset(cap->reserved, 0, sizeof(cap->reserved)); + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *fh, + struct v4l2_frmsizeenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + if (dev->keep_format || has_other_owners(opener, dev)) { + /* only current frame size supported */ + if (argp->pixel_format != dev->pix_format.pixelformat) + return -EINVAL; + + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->pix_format.width; + argp->discrete.height = dev->pix_format.height; + } else { + /* return continuous sizes if pixel format is supported */ + if (NULL == format_by_fourcc(argp->pixel_format)) + return -EINVAL; + + if (dev->min_width == dev->max_width && + dev->min_height == dev->max_height) { + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->min_width; + argp->discrete.height = dev->min_height; + } else { + argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + + argp->stepwise.min_width = dev->min_width; + argp->stepwise.min_height = dev->min_height; + + argp->stepwise.max_width = dev->max_width; + argp->stepwise.max_height = dev->max_height; + + argp->stepwise.step_width = 1; + argp->stepwise.step_height = 1; + } + } + return 0; +} + +/* Test if the device is currently 'capable' of the buffer (stream) type when + * the `exclusive_caps` parameter is set. `keep_format` should lock the format + * and prevent free of buffers */ +static int check_buffer_capability(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener, + enum v4l2_buf_type type) +{ + /* short-circuit for (non-compliant) timeout image mode */ + if (opener->io_method == V4L2L_IO_TIMEOUT) + return 0; + if (dev->announce_all_caps) + return (type == V4L2_BUF_TYPE_VIDEO_CAPTURE || + type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ? + 0 : + -EINVAL; + /* CAPTURE if opener has a capture format or a writer is streaming; + * else OUTPUT. */ + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (!(has_capture_token(opener->format_token) || + !has_output_token(dev->stream_tokens))) + return -EINVAL; + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (!(has_output_token(opener->format_token) || + has_output_token(dev->stream_tokens))) + return -EINVAL; + break; + default: + return -EINVAL; + } + return 0; +} +/* returns frameinterval (fps) for the set resolution + * called on VIDIOC_ENUM_FRAMEINTERVALS + */ +static int vidioc_enum_frameintervals(struct file *file, void *fh, + struct v4l2_frmivalenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + if (dev->keep_format || has_other_owners(opener, dev)) { + /* keep_format also locks the frame rate */ + if (argp->width != dev->pix_format.width || + argp->height != dev->pix_format.height || + argp->pixel_format != dev->pix_format.pixelformat) + return -EINVAL; + + argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; + argp->discrete = dev->capture_param.timeperframe; + } else { + if (argp->width < dev->min_width || + argp->width > dev->max_width || + argp->height < dev->min_height || + argp->height > dev->max_height || + !format_by_fourcc(argp->pixel_format)) + return -EINVAL; + + argp->type = V4L2_FRMIVAL_TYPE_CONTINUOUS; + argp->stepwise.min.numerator = 1; + argp->stepwise.min.denominator = V4L2LOOPBACK_FPS_MAX; + argp->stepwise.max.numerator = V4L2LOOPBACK_FRAME_INTERVAL_MAX; + argp->stepwise.max.denominator = 1; + argp->stepwise.step.numerator = 1; + argp->stepwise.step.denominator = 1; + } + + return 0; +} + +/* Enumerate device formats + * Returns: + * - EINVAL the index is out of bounds; or if non-zero when format is fixed + * - EFAULT unexpected null pointer */ +static int vidioc_enum_fmt_vid(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + int fixed = dev->keep_format || has_other_owners(opener, dev); + const struct v4l2l_format *fmt; + + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + + if (!(f->index < FORMATS)) + return -EINVAL; + /* TODO: Support 6.14 V4L2_FMTDESC_FLAG_ENUM_ALL */ + if (fixed && f->index) + return -EINVAL; + + fmt = fixed ? format_by_fourcc(dev->pix_format.pixelformat) : + &formats[f->index]; + if (!fmt) + return -EFAULT; + + f->flags = 0; + if (fmt->flags & FORMAT_FLAGS_COMPRESSED) + f->flags |= V4L2_FMT_FLAG_COMPRESSED; + snprintf(f->description, sizeof(f->description), fmt->name); + f->pixelformat = fmt->fourcc; + return 0; +} + +/* Tests (or tries) the format. + * Returns: + * - EINVAL if the buffer type or format is not supported + */ +static int vidioc_try_fmt_vid(struct file *file, void *fh, + struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + if (v4l2l_fill_format(f, dev->min_width, dev->max_width, + dev->min_height, dev->max_height) != 0) + return -EINVAL; + if (dev->keep_format || has_other_owners(opener, dev)) + /* use existing format - including colorspace info */ + f->fmt.pix = dev->pix_format; + + return 0; +} + +/* Sets new format. Fills 'f' argument with the requested or existing format. + * Side-effect: buffers are allocated for the (returned) format. + * Returns: + * - EINVAL if the type is not supported + * - EBUSY if buffers are already allocated + * TODO: (vasaka) set subregions of input + */ +static int vidioc_s_fmt_vid(struct file *file, void *fh, struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = opener->io_method == V4L2L_IO_TIMEOUT ? + V4L2L_TOKEN_TIMEOUT : + token_from_type(f->type); + int changed, result; + char buf[5]; + + result = vidioc_try_fmt_vid(file, fh, f); + if (result < 0) + return result; + + if (opener->buffer_count > 0) + /* must free buffers before format can be set */ + return -EBUSY; + + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + + if (opener->format_token) + release_token(dev, opener, format); + if (!(dev->format_tokens & token)) { + result = -EBUSY; + goto exit_s_fmt_unlock; + } + + dprintk("S_FMT[%s] %4s:%ux%u size=%u\n", + V4L2_TYPE_IS_CAPTURE(f->type) ? "CAPTURE" : "OUTPUT", + fourcc2str(f->fmt.pix.pixelformat, buf), f->fmt.pix.width, + f->fmt.pix.height, f->fmt.pix.sizeimage); + changed = !pix_format_eq(&dev->pix_format, &f->fmt.pix, 0); + if (changed || has_no_owners(dev)) { + result = allocate_buffers(dev, &f->fmt.pix); + if (result < 0) + goto exit_s_fmt_unlock; + } + if ((dev->timeout_image && changed) || + (!dev->timeout_image && need_timeout_buffer(dev, token))) { + result = allocate_timeout_buffer(dev); + if (result < 0) + goto exit_s_fmt_free; + } + if (changed) { + dev->pix_format = f->fmt.pix; + dev->pix_format_has_valid_sizeimage = + v4l2l_pix_format_has_valid_sizeimage(f); + } + acquire_token(dev, opener, format, token); + if (opener->io_method == V4L2L_IO_TIMEOUT) + dev->timeout_image_io = 0; + goto exit_s_fmt_unlock; +exit_s_fmt_free: + free_buffers(dev); +exit_s_fmt_unlock: + mutex_unlock(&dev->image_mutex); + return result; +} + +/* ------------------ CAPTURE ----------------------- */ +/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type + * is V4L2_BUF_TYPE_VIDEO_CAPTURE */ + +static int vidioc_enum_fmt_cap(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt_vid(file, fh, f); +} + +static int vidioc_g_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + f->fmt.pix = dev->pix_format; + return 0; +} + +static int vidioc_try_fmt_cap(struct file *file, void *fh, + struct v4l2_format *f) +{ + return vidioc_try_fmt_vid(file, fh, f); +} + +static int vidioc_s_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) +{ + return vidioc_s_fmt_vid(file, fh, f); +} + +/* ------------------ OUTPUT ----------------------- */ +/* ioctl for VIDIOC_ENUM_FMT, _G_FMT, _S_FMT, and _TRY_FMT when buffer type + * is V4L2_BUF_TYPE_VIDEO_OUTPUT */ + +static int vidioc_enum_fmt_out(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + return vidioc_enum_fmt_vid(file, fh, f); +} + +static int vidioc_g_fmt_out(struct file *file, void *fh, struct v4l2_format *f) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, f->type) < 0) + return -EINVAL; + /* + * LATER: this should return the currently valid format + * gstreamer doesn't like it, if this returns -EINVAL, as it + * then concludes that there is _no_ valid format + * CHECK whether this assumption is wrong, + * or whether we have to always provide a valid format + */ + f->fmt.pix = dev->pix_format; + return 0; +} + +static int vidioc_try_fmt_out(struct file *file, void *fh, + struct v4l2_format *f) +{ + return vidioc_try_fmt_vid(file, fh, f); +} + +static int vidioc_s_fmt_out(struct file *file, void *fh, struct v4l2_format *f) +{ + return vidioc_s_fmt_vid(file, fh, f); +} + +// #define V4L2L_OVERLAY +#ifdef V4L2L_OVERLAY +/* ------------------ OVERLAY ----------------------- */ +/* currently unsupported */ +/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work + * while it should only require it, if overlay is requested + * once the gstreamer element is fixed, remove the overlay dummies + */ +#warning OVERLAY dummies +static int vidioc_g_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} + +static int vidioc_s_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} +#endif /* V4L2L_OVERLAY */ + +/* ------------------ PARAMs ----------------------- */ + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_G_PARM + */ +static int vidioc_g_parm(struct file *file, void *fh, + struct v4l2_streamparm *parm) +{ + /* do not care about type of opener, hope these enums would always be + * compatible */ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, parm->type) < 0) + return -EINVAL; + parm->parm.capture = dev->capture_param; + return 0; +} + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_S_PARM + */ +static int vidioc_s_parm(struct file *file, void *fh, + struct v4l2_streamparm *parm) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + dprintk("S_PARM(frame-time=%u/%u)\n", + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator); + if (check_buffer_capability(dev, opener, parm->type) < 0) + return -EINVAL; + + switch (parm->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + set_timeperframe(dev, &parm->parm.capture.timeperframe); + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + set_timeperframe(dev, &parm->parm.output.timeperframe); + break; + default: + return -EINVAL; + } + + parm->parm.capture = dev->capture_param; + return 0; +} + +#ifdef V4L2LOOPBACK_WITH_STD +/* sets a tv standard, actually we do not need to handle this any special way + * added to support effecttv + * called on VIDIOC_S_STD + */ +static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) +{ + v4l2_std_id req_std = 0, supported_std = 0; + const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; + + if (_std) { + req_std = *_std; + *_std = all_std; + } + + /* we support everything in V4L2_STD_ALL, but not more... */ + supported_std = (all_std & req_std); + if (no_std == supported_std) + return -EINVAL; + + return 0; +} + +/* gets a fake video standard + * called on VIDIOC_G_STD + */ +static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +/* gets a fake video standard + * called on VIDIOC_QUERYSTD + */ +static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +#endif /* V4L2LOOPBACK_WITH_STD */ + +static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, + s64 val) +{ + int result = 0; + switch (id) { + case CID_KEEP_FORMAT: + if (val < 0 || val > 1) + return -EINVAL; + dev->keep_format = val; + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + if (!dev->keep_format) { + if (has_no_owners(dev) && !any_buffers_mapped(dev)) + free_buffers(dev); + } + mutex_unlock(&dev->image_mutex); + break; + case CID_SUSTAIN_FRAMERATE: + if (val < 0 || val > 1) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->sustain_framerate = val; + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT: + if (val < 0 || val > MAX_TIMEOUT) + return -EINVAL; + if (val > 0) { + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + /* on-the-fly allocate if device is owned; else + * allocate occurs on next S_FMT or REQBUFS */ + if (!has_no_owners(dev)) + result = allocate_timeout_buffer(dev); + mutex_unlock(&dev->image_mutex); + if (result < 0) { + /* disable timeout as buffer not alloc'd */ + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = 0; + spin_unlock_bh(&dev->lock); + return result; + } + } + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = msecs_to_jiffies(val); + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT_IMAGE_IO: + dev->timeout_image_io = 1; + break; + default: + return -EINVAL; + } + return 0; +} + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct v4l2_loopback_device *dev = container_of( + ctrl->handler, struct v4l2_loopback_device, ctrl_handler); + return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); +} + +/* returns set of device outputs, in our case there is only one + * called on VIDIOC_ENUMOUTPUT + */ +static int vidioc_enum_output(struct file *file, void *fh, + struct v4l2_output *outp) +{ + __u32 index = outp->index; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -ENOTTY; + if (index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(outp, 0, sizeof(*outp)); + + outp->index = index; + strscpy(outp->name, "loopback in", sizeof(outp->name)); + outp->type = V4L2_OUTPUT_TYPE_ANALOG; + outp->audioset = 0; + outp->modulator = 0; +#ifdef V4L2LOOPBACK_WITH_STD + outp->std = V4L2_STD_ALL; +#ifdef V4L2_OUT_CAP_STD + outp->capabilities |= V4L2_OUT_CAP_STD; +#endif /* V4L2_OUT_CAP_STD */ +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which output is currently active, + * called on VIDIOC_G_OUTPUT + */ +static int vidioc_g_output(struct file *file, void *fh, unsigned int *index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -ENOTTY; + if (index) + *index = 0; + return 0; +} + +/* set output, can make sense if we have more than one video src, + * called on VIDIOC_S_OUTPUT + */ +static int vidioc_s_output(struct file *file, void *fh, unsigned int index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -ENOTTY; + return index == 0 ? index : -EINVAL; +} + +/* returns set of device inputs, in our case there is only one, + * but later I may add more + * called on VIDIOC_ENUMINPUT + */ +static int vidioc_enum_input(struct file *file, void *fh, + struct v4l2_input *inp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + __u32 index = inp->index; + + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) + return -ENOTTY; + if (index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(inp, 0, sizeof(*inp)); + + inp->index = index; + strscpy(inp->name, "loopback", sizeof(inp->name)); + inp->type = V4L2_INPUT_TYPE_CAMERA; + inp->audioset = 0; + inp->tuner = 0; + inp->status = 0; + +#ifdef V4L2LOOPBACK_WITH_STD + inp->std = V4L2_STD_ALL; +#ifdef V4L2_IN_CAP_STD + inp->capabilities |= V4L2_IN_CAP_STD; +#endif +#endif /* V4L2LOOPBACK_WITH_STD */ + + if (has_output_token(dev->stream_tokens) && !dev->keep_format) + /* if no outputs attached; pretend device is powered off */ + inp->status |= V4L2_IN_ST_NO_SIGNAL; + + return 0; +} + +/* which input is currently active, + * called on VIDIOC_G_INPUT + */ +static int vidioc_g_input(struct file *file, void *fh, unsigned int *index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) + return -ENOTTY; /* NOTE: -EAGAIN might be more informative */ + if (index) + *index = 0; + return 0; +} + +/* set input, can make sense if we have more than one video src, + * called on VIDIOC_S_INPUT + */ +static int vidioc_s_input(struct file *file, void *fh, unsigned int index) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + if (index != 0) + return -EINVAL; + if (check_buffer_capability(dev, opener, V4L2_BUF_TYPE_VIDEO_CAPTURE)) + return -ENOTTY; /* NOTE: -EAGAIN might be more informative */ + return 0; +} + +/* --------------- V4L2 ioctl buffer related calls ----------------- */ + +#define is_allocated(opener, type, index) \ + (opener->format_token & (opener->io_method == V4L2L_IO_TIMEOUT ? \ + V4L2L_TOKEN_TIMEOUT : \ + token_from_type(type)) && \ + (index) < (opener)->buffer_count) +#define BUFFER_DEBUG_FMT_STR \ + "buffer#%u @ %p type=%u bytesused=%u length=%u flags=%x " \ + "field=%u timestamp= %lld.%06lldsequence=%u\n" +#define BUFFER_DEBUG_FMT_ARGS(buf) \ + (buf)->index, (buf), (buf)->type, (buf)->bytesused, (buf)->length, \ + (buf)->flags, (buf)->field, \ + (long long)(buf)->timestamp.tv_sec, \ + (long long)(buf)->timestamp.tv_usec, (buf)->sequence +/* Buffer flag helpers */ +#define unset_flags(flags) \ + do { \ + flags &= ~V4L2_BUF_FLAG_QUEUED; \ + flags &= ~V4L2_BUF_FLAG_DONE; \ + } while (0) +#define set_queued(flags) \ + do { \ + flags |= V4L2_BUF_FLAG_QUEUED; \ + flags &= ~V4L2_BUF_FLAG_DONE; \ + } while (0) +#define set_done(flags) \ + do { \ + flags &= ~V4L2_BUF_FLAG_QUEUED; \ + flags |= V4L2_BUF_FLAG_DONE; \ + } while (0) + +static bool any_buffers_mapped(struct v4l2_loopback_device *dev) +{ + u32 index; + for (index = 0; index < dev->buffer_count; ++index) + if (dev->buffers[index].buffer.flags & V4L2_BUF_FLAG_MAPPED) + return true; + return false; +} + +static void prepare_buffer_queue(struct v4l2_loopback_device *dev, int count) +{ + struct v4l2l_buffer *bufd, *n; + u32 pos; + + spin_lock_bh(&dev->list_lock); + + /* ensure sufficient number of buffers in queue */ + for (pos = 0; pos < count; ++pos) { + bufd = &dev->buffers[pos]; + if (list_empty(&bufd->list_head)) + list_add_tail(&bufd->list_head, &dev->outbufs_list); + } + if (list_empty(&dev->outbufs_list)) + goto exit_prepare_queue_unlock; + + /* remove any excess buffers */ + list_for_each_entry_safe(bufd, n, &dev->outbufs_list, list_head) { + if (bufd->buffer.index >= count) + list_del_init(&bufd->list_head); + } + + /* buffers are no longer queued; and `write_position` will correspond + * to the first item of `outbufs_list`. */ + pos = v4l2l_mod64(dev->write_position, count); + list_for_each_entry(bufd, &dev->outbufs_list, list_head) { + unset_flags(bufd->buffer.flags); + dev->bufpos2index[pos % count] = bufd->buffer.index; + ++pos; + } +exit_prepare_queue_unlock: + spin_unlock_bh(&dev->list_lock); +} + +/* forward declaration */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type); +/* negotiate buffer type + * only mmap streaming supported + * called on VIDIOC_REQBUFS + */ +static int vidioc_reqbufs(struct file *file, void *fh, + struct v4l2_requestbuffers *reqbuf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = opener->io_method == V4L2L_IO_TIMEOUT ? + V4L2L_TOKEN_TIMEOUT : + token_from_type(reqbuf->type); + u32 req_count = reqbuf->count; + int result = 0; + + dprintk("REQBUFS(memory=%u, req_count=%u) and device-bufs=%u/%u " + "[used/max]\n", + reqbuf->memory, req_count, dev->used_buffer_count, + dev->buffer_count); + + switch (reqbuf->memory) { + case V4L2_MEMORY_MMAP: +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) + reqbuf->capabilities = 0; /* only guarantee MMAP support */ +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + reqbuf->flags = 0; /* no memory consistency support */ +#endif + break; + default: + return -EINVAL; + } + + if (opener->format_token & ~token) + /* different (buffer) type already assigned to descriptor by + * S_FMT or REQBUFS */ + return -EINVAL; + + MARK(); + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; /* -EINTR */ + + /* CASE queue/dequeue timeout-buffer only: */ + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + opener->buffer_count = req_count; + if (req_count == 0) + release_token(dev, opener, format); + goto exit_reqbufs_unlock; + } + + MARK(); + /* CASE count is zero: streamoff, free buffers, release their token */ + if (req_count == 0) { + if (dev->format_tokens & token) { + acquire_token(dev, opener, format, token); + opener->io_method = V4L2L_IO_MMAP; + } + result = vidioc_streamoff(file, fh, reqbuf->type); + opener->buffer_count = 0; + /* undocumented requirement - REQBUFS with count zero should + * ALSO release lock on logical stream */ + if (opener->format_token) + release_token(dev, opener, format); + if (has_no_owners(dev)) + dev->used_buffer_count = 0; + goto exit_reqbufs_unlock; + } + + /* CASE count non-zero: allocate buffers and acquire token for them */ + MARK(); + switch (reqbuf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (!(dev->format_tokens & token || + opener->format_token & token)) + /* only exclusive ownership for each stream */ + result = -EBUSY; + break; + default: + result = -EINVAL; + } + if (result < 0) + goto exit_reqbufs_unlock; + + if (has_other_owners(opener, dev) && dev->used_buffer_count > 0) { + /* allow 'allocation' of existing number of buffers */ + req_count = dev->used_buffer_count; + } else if (any_buffers_mapped(dev)) { + /* do not allow re-allocation if buffers are mapped */ + result = -EBUSY; + goto exit_reqbufs_unlock; + } + + MARK(); + opener->buffer_count = 0; + + if (req_count > dev->buffer_count) + req_count = dev->buffer_count; + + if (has_no_owners(dev)) { + result = allocate_buffers(dev, &dev->pix_format); + if (result < 0) + goto exit_reqbufs_unlock; + } + if (!dev->timeout_image && need_timeout_buffer(dev, token)) { + result = allocate_timeout_buffer(dev); + if (result < 0) + goto exit_reqbufs_unlock; + } + acquire_token(dev, opener, format, token); + + MARK(); + switch (opener->io_method) { + case V4L2L_IO_TIMEOUT: + dev->timeout_image_io = 0; + opener->buffer_count = req_count; + break; + default: + opener->io_method = V4L2L_IO_MMAP; + prepare_buffer_queue(dev, req_count); + dev->used_buffer_count = opener->buffer_count = req_count; + } +exit_reqbufs_unlock: + mutex_unlock(&dev->image_mutex); + reqbuf->count = opener->buffer_count; + return result; +} + +/* returns buffer asked for; + * give app as many buffers as it wants, if it less than MAX, + * but map them in our inner buffers + * called on VIDIOC_QUERYBUF + */ +static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 type = buf->type; + u32 index = buf->index; + + if ((type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && + (type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) + return -EINVAL; + if (!is_allocated(opener, type, index)) + return -EINVAL; + + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + *buf = dev->timeout_buffer.buffer; + buf->index = index; + } else + *buf = dev->buffers[index].buffer; + + buf->type = type; + + if (!(buf->flags & (V4L2_BUF_FLAG_DONE | V4L2_BUF_FLAG_QUEUED))) { + /* v4l2-compliance requires these to be zero */ + buf->sequence = 0; + buf->timestamp.tv_sec = buf->timestamp.tv_usec = 0; + } else if (V4L2_TYPE_IS_CAPTURE(type)) { + /* guess flags based on sequence values */ + if (buf->sequence >= opener->read_position) { + set_done(buf->flags); + } else if (buf->flags & V4L2_BUF_FLAG_DONE) { + set_queued(buf->flags); + } + } + dprintkrw("QUERYBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR, + V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index, + BUFFER_DEBUG_FMT_ARGS(buf)); + return 0; +} + +static void buffer_written(struct v4l2_loopback_device *dev, + struct v4l2l_buffer *buf) +{ + timer_delete_sync(&dev->sustain_timer); + timer_delete_sync(&dev->timeout_timer); + + spin_lock_bh(&dev->list_lock); + list_move_tail(&buf->list_head, &dev->outbufs_list); + spin_unlock_bh(&dev->list_lock); + + spin_lock_bh(&dev->lock); + dev->bufpos2index[v4l2l_mod64(dev->write_position, + dev->used_buffer_count)] = + buf->buffer.index; + ++dev->write_position; + dev->reread_count = 0; + + check_timers(dev); + spin_unlock_bh(&dev->lock); +} + +/* put buffer to queue + * called on VIDIOC_QBUF + */ +static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + struct v4l2l_buffer *bufd; + u32 index = buf->index; + u32 type = buf->type; + + if (!is_allocated(opener, type, index)) + return -EINVAL; + bufd = &dev->buffers[index]; + + switch (buf->memory) { + case V4L2_MEMORY_MMAP: + if (!(bufd->buffer.flags & V4L2_BUF_FLAG_MAPPED)) + dprintkrw("QBUF() unmapped buffer [index=%u]\n", index); + break; + default: + return -EINVAL; + } + + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + set_queued(buf->flags); + return 0; + } + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + dprintkrw("QBUF(CAPTURE, index=%u) -> " BUFFER_DEBUG_FMT_STR, + index, BUFFER_DEBUG_FMT_ARGS(buf)); + set_queued(buf->flags); + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + dprintkrw("QBUF(OUTPUT, index=%u) -> " BUFFER_DEBUG_FMT_STR, + index, BUFFER_DEBUG_FMT_ARGS(buf)); + if (!(bufd->buffer.flags & V4L2_BUF_FLAG_TIMESTAMP_COPY) && + (buf->timestamp.tv_sec == 0 && + buf->timestamp.tv_usec == 0)) { + v4l2l_get_timestamp(&bufd->buffer); + } else { + bufd->buffer.timestamp = buf->timestamp; + bufd->buffer.flags |= V4L2_BUF_FLAG_TIMESTAMP_COPY; + bufd->buffer.flags &= + ~V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; + } + if (dev->pix_format_has_valid_sizeimage) { + if (buf->bytesused >= dev->pix_format.sizeimage) { + bufd->buffer.bytesused = + dev->pix_format.sizeimage; + } else { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) + dev_warn_ratelimited( + &dev->vdev->dev, +#else + dprintkrw( +#endif + "warning queued output buffer bytesused too small %u < %u\n", + buf->bytesused, + dev->pix_format.sizeimage); + bufd->buffer.bytesused = buf->bytesused; + } + } else { + bufd->buffer.bytesused = buf->bytesused; + } + bufd->buffer.sequence = dev->write_position; + set_queued(bufd->buffer.flags); + *buf = bufd->buffer; + buffer_written(dev, bufd); + set_done(bufd->buffer.flags); + wake_up_all(&dev->read_event); + break; + default: + return -EINVAL; + } + buf->type = type; + return 0; +} + +static int can_read(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener) +{ + int ret; + + spin_lock_bh(&dev->lock); + check_timers(dev); + ret = dev->write_position > opener->read_position || + dev->reread_count > opener->reread_count || dev->timeout_happened; + spin_unlock_bh(&dev->lock); + return ret; +} + +static int get_capture_buffer(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int pos, timeout_happened; + u32 index; + + if ((file->f_flags & O_NONBLOCK) && + (dev->write_position <= opener->read_position && + dev->reread_count <= opener->reread_count && + !dev->timeout_happened)) + return -EAGAIN; + wait_event_interruptible(dev->read_event, can_read(dev, opener)); + + spin_lock_bh(&dev->lock); + if (dev->write_position == opener->read_position) { + if (dev->reread_count > opener->reread_count + 2) + opener->reread_count = dev->reread_count - 1; + ++opener->reread_count; + pos = v4l2l_mod64(opener->read_position + + dev->used_buffer_count - 1, + dev->used_buffer_count); + } else { + opener->reread_count = 0; + if (dev->write_position > + opener->read_position + dev->used_buffer_count) + opener->read_position = dev->write_position - 1; + pos = v4l2l_mod64(opener->read_position, + dev->used_buffer_count); + ++opener->read_position; + } + timeout_happened = dev->timeout_happened && (dev->timeout_jiffies > 0); + dev->timeout_happened = 0; + spin_unlock_bh(&dev->lock); + + index = dev->bufpos2index[pos]; + if (timeout_happened) { + if (index >= dev->used_buffer_count) { + dprintkrw("get_capture_buffer() read position is at " + "an unallocated buffer [index=%u]\n", + index); + return -EFAULT; + } + /* although allocated on-demand, timeout_image is freed only + * in free_buffers(), so we don't need to worry about it being + * deallocated suddenly */ + memcpy(dev->image + dev->buffers[index].buffer.m.offset, + dev->timeout_image, dev->buffer_size); + } + return (int)index; +} + +/* put buffer to dequeue + * called on VIDIOC_DQBUF + */ +static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 type = buf->type; + int index; + struct v4l2l_buffer *bufd; + + if (buf->memory != V4L2_MEMORY_MMAP) + return -EINVAL; + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) { + *buf = dev->timeout_buffer.buffer; + buf->type = type; + unset_flags(buf->flags); + return 0; + } + if ((opener->buffer_count == 0) || + !(opener->format_token & token_from_type(type))) + return -EINVAL; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + index = get_capture_buffer(file); + if (index < 0) + return index; + *buf = dev->buffers[index].buffer; + unset_flags(buf->flags); + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + spin_lock_bh(&dev->list_lock); + + bufd = list_first_entry_or_null(&dev->outbufs_list, + struct v4l2l_buffer, list_head); + if (bufd) + list_move_tail(&bufd->list_head, &dev->outbufs_list); + + spin_unlock_bh(&dev->list_lock); + if (!bufd) + return -EFAULT; + unset_flags(bufd->buffer.flags); + *buf = bufd->buffer; + break; + default: + return -EINVAL; + } + + buf->type = type; + dprintkrw("DQBUF(%s, index=%u) -> " BUFFER_DEBUG_FMT_STR, + V4L2_TYPE_IS_CAPTURE(type) ? "CAPTURE" : "OUTPUT", index, + BUFFER_DEBUG_FMT_ARGS(buf)); + return 0; +} + +/* ------------- STREAMING ------------------- */ + +/* start streaming + * called on VIDIOC_STREAMON + */ +static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = token_from_type(type); + + /* short-circuit when using timeout buffer set */ + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) + return 0; + /* opener must have claimed (same) buffer set via REQBUFS */ + if (!opener->buffer_count || !(opener->format_token & token)) + return -EINVAL; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (has_output_token(dev->stream_tokens) && !dev->keep_format) + return -EIO; + if (dev->stream_tokens & token) { + acquire_token(dev, opener, stream, token); + client_usage_queue_event(dev->vdev); + } + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (dev->stream_tokens & token) + acquire_token(dev, opener, stream, token); + return 0; + default: + return -EINVAL; + } +} + +/* stop streaming + * called on VIDIOC_STREAMOFF + */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + u32 token = token_from_type(type); + + /* short-circuit when using timeout buffer set */ + if (opener->format_token & V4L2L_TOKEN_TIMEOUT) + return 0; + /* short-circuit when buffer set has no owner */ + if (dev->format_tokens & token) + return 0; + /* opener needs a claim to buffer set */ + if (!opener->format_token) + return -EBUSY; + if (opener->format_token & ~token) + return -EINVAL; + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (opener->stream_token & token) + release_token(dev, opener, stream); + /* reset output queue */ + if (dev->used_buffer_count > 0) + prepare_buffer_queue(dev, dev->used_buffer_count); + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if (opener->stream_token & token) { + release_token(dev, opener, stream); + client_usage_queue_event(dev->vdev); + } + return 0; + default: + return -EINVAL; + } +} + +#ifdef CONFIG_VIDEO_V4L1_COMPAT +static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + p->frames = dev->buffer_count; + p->offsets[0] = 0; + p->offsets[1] = 0; + p->size = dev->buffer_size; + return 0; +} +#endif + +static void client_usage_queue_event(struct video_device *vdev) +{ + struct v4l2_event ev; + struct v4l2_loopback_device *dev; + + dev = container_of(vdev->v4l2_dev, struct v4l2_loopback_device, + v4l2_dev); + + memset(&ev, 0, sizeof(ev)); + ev.type = V4L2_EVENT_PRI_CLIENT_USAGE; + ((struct v4l2_event_client_usage *)&ev.u)->count = + !has_capture_token(dev->stream_tokens); + + v4l2_event_queue(vdev, &ev); +} + +static int client_usage_ops_add(struct v4l2_subscribed_event *sev, + unsigned elems) +{ + if (!(sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL)) + return 0; + + client_usage_queue_event(sev->fh->vdev); + return 0; +} + +static void client_usage_ops_replace(struct v4l2_event *old, + const struct v4l2_event *new) +{ + *((struct v4l2_event_client_usage *)&old->u) = + *((struct v4l2_event_client_usage *)&new->u); +} + +static void client_usage_ops_merge(const struct v4l2_event *old, + struct v4l2_event *new) +{ + *((struct v4l2_event_client_usage *)&new->u) = + *((struct v4l2_event_client_usage *)&old->u); +} + +const struct v4l2_subscribed_event_ops client_usage_ops = { + .add = client_usage_ops_add, + .replace = client_usage_ops_replace, + .merge = client_usage_ops_merge, +}; + +static int vidioc_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + switch (sub->type) { + case V4L2_EVENT_CTRL: + return v4l2_ctrl_subscribe_event(fh, sub); + case V4L2_EVENT_PRI_CLIENT_USAGE: + return v4l2_event_subscribe(fh, sub, 0, &client_usage_ops); + } + + return -EINVAL; +} + +/* file operations */ +static void vm_open(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + atomic_inc(&buf->use_count); + buf->buffer.flags |= V4L2_BUF_FLAG_MAPPED; +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + if (atomic_dec_and_test(&buf->use_count)) + buf->buffer.flags &= ~V4L2_BUF_FLAG_MAPPED; +} + +static struct vm_operations_struct vm_ops = { + .open = vm_open, + .close = vm_close, +}; + +static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) +{ + u8 *addr; + unsigned long start, size, offset; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + struct v4l2l_buffer *buffer = NULL; + int result = 0; + MARK(); + + offset = (unsigned long)vma->vm_pgoff << PAGE_SHIFT; + start = (unsigned long)vma->vm_start; + size = (unsigned long)(vma->vm_end - vma->vm_start); /* always != 0 */ + + /* ensure buffer size, count, and allocated image(s) are not altered by + * other file descriptors */ + result = mutex_lock_killable(&dev->image_mutex); + if (result < 0) + return result; + + if (size > dev->buffer_size) { + dprintk("mmap() attempt to map %lubytes when %ubytes are " + "allocated to buffers\n", + size, dev->buffer_size); + result = -EINVAL; + goto exit_mmap_unlock; + } + if (offset % dev->buffer_size != 0) { + dprintk("mmap() offset does not match start of any buffer\n"); + result = -EINVAL; + goto exit_mmap_unlock; + } + switch (opener->format_token) { + case V4L2L_TOKEN_TIMEOUT: + if (offset != (unsigned long)dev->buffer_size * MAX_BUFFERS) { + dprintk("mmap() incorrect offset for timeout image\n"); + result = -EINVAL; + goto exit_mmap_unlock; + } + buffer = &dev->timeout_buffer; + addr = dev->timeout_image; + break; + default: + if (offset >= dev->image_size) { + dprintk("mmap() attempt to map beyond all buffers\n"); + result = -EINVAL; + goto exit_mmap_unlock; + } + u32 index = offset / dev->buffer_size; + buffer = &dev->buffers[index]; + addr = dev->image + offset; + break; + } + + while (size > 0) { + struct page *page = vmalloc_to_page(addr); + + result = vm_insert_page(vma, start, page); + if (result < 0) + goto exit_mmap_unlock; + + start += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + vma->vm_ops = &vm_ops; + vma->vm_private_data = buffer; + + vm_open(vma); +exit_mmap_unlock: + mutex_unlock(&dev->image_mutex); + return result; +} + +static unsigned int v4l2_loopback_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + __poll_t req_events = poll_requested_events(pts); + int ret_mask = 0; + + /* call poll_wait in first call, regardless, to ensure that the + * wait-queue is not null */ + poll_wait(file, &dev->read_event, pts); + poll_wait(file, &opener->fh.wait, pts); + + if (req_events & POLLPRI) { + if (v4l2_event_pending(&opener->fh)) { + ret_mask |= POLLPRI; + if (!(req_events & DEFAULT_POLLMASK)) + return ret_mask; + } + } + + switch (opener->format_token) { + case V4L2L_TOKEN_OUTPUT: + if (opener->stream_token != 0 || + opener->io_method == V4L2L_IO_NONE) + ret_mask |= POLLOUT | POLLWRNORM; + break; + case V4L2L_TOKEN_CAPTURE: + if ((opener->io_method == V4L2L_IO_NONE || + opener->stream_token != 0) && + can_read(dev, opener)) + ret_mask |= POLLIN | POLLWRNORM; + break; + case V4L2L_TOKEN_TIMEOUT: + ret_mask |= POLLOUT | POLLWRNORM; + break; + default: + break; + } + + return ret_mask; +} + +/* do not want to limit device opens, it can be as many readers as user want, + * writers are limited by means of setting writer field */ +static int v4l2_loopback_open(struct file *file) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + + dev = v4l2loopback_getdevice(file); + if (dev->open_count.counter >= dev->max_openers) + return -EBUSY; + /* kfree on close */ + opener = kzalloc(sizeof(*opener), GFP_KERNEL); + if (opener == NULL) + return -ENOMEM; + + atomic_inc(&dev->open_count); + if (dev->timeout_image_io && dev->format_tokens & V4L2L_TOKEN_TIMEOUT) + /* will clear timeout_image_io once buffer set acquired */ + opener->io_method = V4L2L_IO_TIMEOUT; + + v4l2_fh_init(&opener->fh, video_devdata(file)); + file->private_data = &opener->fh; + + v4l2_fh_add(&opener->fh); + dprintk("open() -> dev@%p with image@%p\n", dev, + dev ? dev->image : NULL); + return 0; +} + +static int v4l2_loopback_close(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int result = 0; + dprintk("close() -> dev@%p with image@%p\n", dev, + dev ? dev->image : NULL); + + if (opener->format_token) { + struct v4l2_requestbuffers reqbuf = { + .count = 0, .memory = V4L2_MEMORY_MMAP, .type = 0 + }; + switch (opener->format_token) { + case V4L2L_TOKEN_CAPTURE: + reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + break; + case V4L2L_TOKEN_OUTPUT: + case V4L2L_TOKEN_TIMEOUT: + reqbuf.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + break; + } + if (reqbuf.type) + result = vidioc_reqbufs(file, file->private_data, + &reqbuf); + if (result < 0) + dprintk("failed to free buffers REQBUFS(count=0) " + " returned %d\n", + result); + mutex_lock(&dev->image_mutex); + release_token(dev, opener, format); + mutex_unlock(&dev->image_mutex); + } + + if (atomic_dec_and_test(&dev->open_count)) { + timer_delete_sync(&dev->sustain_timer); + timer_delete_sync(&dev->timeout_timer); + if (!dev->keep_format) { + mutex_lock(&dev->image_mutex); + free_buffers(dev); + mutex_unlock(&dev->image_mutex); + } + } + + v4l2_fh_del(&opener->fh); + v4l2_fh_exit(&opener->fh); + + kfree(opener); + return 0; +} + +static int start_fileio(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + struct v4l2_requestbuffers reqbuf = { .count = dev->buffer_count, + .memory = V4L2_MEMORY_MMAP, + .type = type }; + int token = token_from_type(type); + int result; + + if (opener->format_token & V4L2L_TOKEN_TIMEOUT || + opener->format_token & ~token) + return -EBUSY; /* NOTE: -EBADF might be more informative */ + + /* short-circuit if already have stream token */ + if (opener->stream_token && opener->io_method == V4L2L_IO_FILE) + return 0; + + /* otherwise attempt to acquire stream token and assign IO method */ + if (!(dev->stream_tokens & token) || opener->io_method != V4L2L_IO_NONE) + return -EBUSY; + + result = vidioc_reqbufs(file, fh, &reqbuf); + if (result < 0) + return result; + result = vidioc_streamon(file, fh, type); + if (result < 0) + return result; + + opener->io_method = V4L2L_IO_FILE; + return 0; +} + +static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_buffer *b; + int index, result; + + dprintkrw("read() %zu bytes\n", count); + result = start_fileio(file, file->private_data, + V4L2_BUF_TYPE_VIDEO_CAPTURE); + if (result < 0) + return result; + + index = get_capture_buffer(file); + if (index < 0) + return index; + b = &dev->buffers[index].buffer; + if (count > b->bytesused) + count = b->bytesused; + if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), + count)) { + printk(KERN_ERR "v4l2-loopback read() failed copy_to_user()\n"); + return -EFAULT; + } + return count; +} + +static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_buffer *b; + int index, result; + + dprintkrw("write() %zu bytes\n", count); + result = start_fileio(file, file->private_data, + V4L2_BUF_TYPE_VIDEO_OUTPUT); + if (result < 0) + return result; + + if (count > dev->buffer_size) + count = dev->buffer_size; + index = v4l2l_mod64(dev->write_position, dev->used_buffer_count); + b = &dev->buffers[index].buffer; + + if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, + count)) { + printk(KERN_ERR + "v4l2-loopback write() failed copy_from_user()\n"); + return -EFAULT; + } + b->bytesused = count; + + v4l2l_get_timestamp(b); + b->sequence = dev->write_position; + set_queued(b->flags); + buffer_written(dev, &dev->buffers[index]); + set_done(b->flags); + wake_up_all(&dev->read_event); + + return count; +} + +/* init functions */ +/* frees buffers, if allocated */ +static void free_buffers(struct v4l2_loopback_device *dev) +{ + dprintk("free_buffers() with image@%p\n", dev->image); + if (!dev->image) + return; + if (!has_no_owners(dev) || any_buffers_mapped(dev)) + /* maybe an opener snuck in before image_mutex was acquired */ + printk(KERN_WARNING + "v4l2-loopback free_buffers() buffers of video device " + "#%u freed while still mapped to userspace\n", + dev->vdev->num); + vfree(dev->image); + dev->image = NULL; + dev->image_size = 0; + dev->buffer_size = 0; +} + +static void free_timeout_buffer(struct v4l2_loopback_device *dev) +{ + dprintk("free_timeout_buffer() with timeout_image@%p\n", + dev->timeout_image); + if (!dev->timeout_image) + return; + + if ((dev->timeout_jiffies > 0 && !has_no_owners(dev)) || + dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED) + printk(KERN_WARNING + "v4l2-loopback free_timeout_buffer() timeout image " + "of device #%u freed while still mapped to userspace\n", + dev->vdev->num); + + vfree(dev->timeout_image); + dev->timeout_image = NULL; + dev->timeout_buffer_size = 0; +} +/* allocates buffers if no (other) openers are already using them */ +static int allocate_buffers(struct v4l2_loopback_device *dev, + struct v4l2_pix_format *pix_format) +{ + u32 buffer_size = PAGE_ALIGN(pix_format->sizeimage); + unsigned long image_size = + (unsigned long)buffer_size * (unsigned long)dev->buffer_count; + /* vfree on close file operation in case no open handles left */ + + if (buffer_size == 0 || dev->buffer_count == 0 || + buffer_size < pix_format->sizeimage) + return -EINVAL; + + if ((__LONG_MAX__ / buffer_size) < dev->buffer_count) + return -ENOSPC; + + dprintk("allocate_buffers() size %lubytes = %ubytes x %ubuffers\n", + image_size, buffer_size, dev->buffer_count); + if (dev->image) { + /* check that no buffers are expected in user-space */ + if (!has_no_owners(dev) || any_buffers_mapped(dev)) + return -EBUSY; + dprintk("allocate_buffers() existing size=%lubytes\n", + dev->image_size); + /* FIXME: prevent double allocation more intelligently! */ + if (image_size == dev->image_size) { + dprintk("allocate_buffers() keep existing\n"); + return 0; + } + free_buffers(dev); + } + + /* FIXME: set buffers to 0 */ + dev->image = vmalloc(image_size); + if (dev->image == NULL) { + dev->buffer_size = dev->image_size = 0; + return -ENOMEM; + } + init_buffers(dev, pix_format->sizeimage, buffer_size); + dev->buffer_size = buffer_size; + dev->image_size = image_size; + dprintk("allocate_buffers() -> vmalloc'd %lubytes\n", dev->image_size); + return 0; +} +static int allocate_timeout_buffer(struct v4l2_loopback_device *dev) +{ + /* device's `buffer_size` and `buffers` must be initialised in + * allocate_buffers() */ + + dprintk("allocate_timeout_buffer() size %ubytes\n", dev->buffer_size); + if (dev->buffer_size == 0) + return -EINVAL; + + if (dev->timeout_image) { + if (dev->timeout_buffer.buffer.flags & V4L2_BUF_FLAG_MAPPED) + return -EBUSY; + if (dev->buffer_size == dev->timeout_buffer_size) + return 0; + free_timeout_buffer(dev); + } + + dev->timeout_image = vzalloc(dev->buffer_size); + if (!dev->timeout_image) { + dev->timeout_buffer_size = 0; + return -ENOMEM; + } + dev->timeout_buffer_size = dev->buffer_size; + return 0; +} +/* init inner buffers, they are capture mode and flags are set as for capture + * mode buffers */ +static void init_buffers(struct v4l2_loopback_device *dev, u32 bytes_used, + u32 buffer_size) +{ + u32 i; + + for (i = 0; i < dev->buffer_count; ++i) { + struct v4l2_buffer *b = &dev->buffers[i].buffer; + b->index = i; + b->bytesused = bytes_used; + b->length = buffer_size; + b->field = V4L2_FIELD_NONE; + b->flags = 0; + b->m.offset = i * buffer_size; + b->memory = V4L2_MEMORY_MMAP; + b->sequence = 0; + b->timestamp.tv_sec = 0; + b->timestamp.tv_usec = 0; + b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + v4l2l_get_timestamp(b); + } + dev->timeout_buffer = dev->buffers[0]; + dev->timeout_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; +} + +/* fills and register video device */ +static void init_vdev(struct video_device *vdev, int nr) +{ +#ifdef V4L2LOOPBACK_WITH_STD + vdev->tvnorms = V4L2_STD_ALL; +#endif /* V4L2LOOPBACK_WITH_STD */ + + vdev->vfl_type = VFL_TYPE_VIDEO; + vdev->fops = &v4l2_loopback_fops; + vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; + vdev->release = &video_device_release; + vdev->minor = -1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + vdev->device_caps = V4L2_CAP_DEVICE_CAPS | V4L2_CAP_VIDEO_CAPTURE | + V4L2_CAP_VIDEO_OUTPUT | V4L2_CAP_READWRITE | + V4L2_CAP_STREAMING; +#endif + + if (debug > 1) + vdev->dev_debug = V4L2_DEV_DEBUG_IOCTL | + V4L2_DEV_DEBUG_IOCTL_ARG; + + vdev->vfl_dir = VFL_DIR_M2M; +} + +/* init default capture parameters, only fps may be changed in future */ +static void init_capture_param(struct v4l2_captureparm *capture_param) +{ + capture_param->capability = V4L2_CAP_TIMEPERFRAME; /* since 2.16 */ + capture_param->capturemode = 0; + capture_param->extendedmode = 0; + capture_param->readbuffers = max_buffers; + capture_param->timeperframe.numerator = 1; + capture_param->timeperframe.denominator = V4L2LOOPBACK_FPS_DEFAULT; +} + +static void check_timers(struct v4l2_loopback_device *dev) +{ + if (has_output_token(dev->stream_tokens)) + return; + + if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies * 3 / 2); +} +#ifdef HAVE_TIMER_SETUP +static void sustain_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer); +#else +static void sustain_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->sustain_framerate) { + dev->reread_count++; + dprintkrw("sustain_timer_clb() write_pos=%lld reread=%u\n", + (long long)dev->write_position, dev->reread_count); + if (dev->reread_count == 1) + mod_timer(&dev->sustain_timer, + jiffies + max(1UL, dev->frame_jiffies / 2)); + else + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} +#ifdef HAVE_TIMER_SETUP +static void timeout_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer); +#else +static void timeout_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->timeout_jiffies > 0) { + dev->timeout_happened = 1; + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} + +/* init loopback main structure */ +#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ + ((conf) ? \ + ((conf->confmember default_condition) ? (default_value) : \ + (conf->confmember)) : \ + default_value) + +static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) +{ + struct v4l2_loopback_device *dev; + struct v4l2_ctrl_handler *hdl; + struct v4l2loopback_private *vdev_priv = NULL; + int err; + + u32 _width = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + u32 _height = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + + u32 _min_width = DEFAULT_FROM_CONF(min_width, + < V4L2LOOPBACK_SIZE_MIN_WIDTH, + V4L2LOOPBACK_SIZE_MIN_WIDTH); + u32 _min_height = DEFAULT_FROM_CONF(min_height, + < V4L2LOOPBACK_SIZE_MIN_HEIGHT, + V4L2LOOPBACK_SIZE_MIN_HEIGHT); + u32 _max_width = DEFAULT_FROM_CONF(max_width, < _min_width, max_width); + u32 _max_height = + DEFAULT_FROM_CONF(max_height, < _min_height, max_height); + bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? + (bool)(conf->announce_all_caps) : + !(V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS); + int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); + int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); + struct v4l2_format _fmt; + + int nr = -1; + + if (conf) { + const int output_nr = conf->output_nr; +#ifdef SPLIT_DEVICES + const int capture_nr = conf->capture_nr; +#else + const int capture_nr = output_nr; +#endif + if (capture_nr >= 0 && output_nr == capture_nr) { + nr = output_nr; + } else if (capture_nr < 0 && output_nr < 0) { + nr = -1; + } else if (capture_nr < 0) { + nr = output_nr; + } else if (output_nr < 0) { + nr = capture_nr; + } else { + printk(KERN_ERR + "v4l2-loopback add() split OUTPUT and CAPTURE " + "devices not yet supported.\n"); + printk(KERN_INFO + "v4l2-loopback add() both devices must have the " + "same number (%d != %d).\n", + output_nr, capture_nr); + return -EINVAL; + } + } + + if (idr_find(&v4l2loopback_index_idr, nr)) + return -EEXIST; + + /* initialisation of a new device */ + dprintk("add() creating device #%d\n", nr); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* allocate id, if @id >= 0, we're requesting that specific id */ + if (nr >= 0) { + err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); + } + if (err < 0) + goto out_free_dev; + + /* register new device */ + MARK(); + nr = err; + + if (conf && conf->card_label[0]) { + snprintf(dev->card_label, sizeof(dev->card_label), "%s", + conf->card_label); + } else { + snprintf(dev->card_label, sizeof(dev->card_label), + "Dummy video device (0x%04X)", nr); + } + snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), + "v4l2loopback-%03d", nr); + + err = v4l2_device_register(NULL, &dev->v4l2_dev); + if (err) + goto out_free_idr; + + /* initialise the _video_ device */ + MARK(); + err = -ENOMEM; + dev->vdev = video_device_alloc(); + if (dev->vdev == NULL) + goto out_unregister; + + vdev_priv = kzalloc(sizeof(struct v4l2loopback_private), GFP_KERNEL); + if (vdev_priv == NULL) + goto out_unregister; + + video_set_drvdata(dev->vdev, vdev_priv); + if (video_get_drvdata(dev->vdev) == NULL) + goto out_unregister; + + snprintf(dev->vdev->name, sizeof(dev->vdev->name), "%s", + dev->card_label); + vdev_priv->device_nr = nr; + init_vdev(dev->vdev, nr); + dev->vdev->v4l2_dev = &dev->v4l2_dev; + + /* initialise v4l2-loopback specific parameters */ + MARK(); + dev->announce_all_caps = _announce_all_caps; + dev->min_width = _min_width; + dev->min_height = _min_height; + dev->max_width = _max_width; + dev->max_height = _max_height; + dev->max_openers = _max_openers; + + /* set (initial) pixel and stream format */ + _width = clamp_val(_width, _min_width, _max_width); + _height = clamp_val(_height, _min_height, _max_height); + _fmt = (struct v4l2_format){ + .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, + .fmt.pix = { .width = _width, + .height = _height, + .pixelformat = formats[0].fourcc, + .colorspace = V4L2_COLORSPACE_DEFAULT, + .field = V4L2_FIELD_NONE } + }; + + err = v4l2l_fill_format(&_fmt, _min_width, _max_width, _min_height, + _max_height); + if (err) + /* highly unexpected failure to assign default format */ + goto out_unregister; + dev->pix_format = _fmt.fmt.pix; + init_capture_param(&dev->capture_param); + set_timeperframe(dev, &dev->capture_param.timeperframe); + + /* ctrls parameters */ + dev->keep_format = 0; + dev->sustain_framerate = 0; + dev->timeout_jiffies = 0; + dev->timeout_image_io = 0; + + /* initialise OUTPUT and CAPTURE buffer values */ + dev->image = NULL; + dev->image_size = 0; + dev->buffer_count = _max_buffers; + dev->buffer_size = 0; + dev->used_buffer_count = 0; + INIT_LIST_HEAD(&dev->outbufs_list); + do { + u32 index; + for (index = 0; index < dev->buffer_count; ++index) + INIT_LIST_HEAD(&dev->buffers[index].list_head); + + } while (0); + memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); + dev->write_position = 0; + + /* initialise synchronisation data */ + atomic_set(&dev->open_count, 0); + mutex_init(&dev->image_mutex); + spin_lock_init(&dev->lock); + spin_lock_init(&dev->list_lock); + init_waitqueue_head(&dev->read_event); + dev->format_tokens = V4L2L_TOKEN_MASK; + dev->stream_tokens = V4L2L_TOKEN_MASK; + + /* initialise sustain frame rate and timeout parameters, and timers */ + dev->reread_count = 0; + dev->timeout_image = NULL; + dev->timeout_happened = 0; +#ifdef HAVE_TIMER_SETUP + timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); + timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); +#else + setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); + setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); +#endif + + /* initialise the control handler and add controls */ + MARK(); + hdl = &dev->ctrl_handler; + err = v4l2_ctrl_handler_init(hdl, 4); + if (err) + goto out_unregister; + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); + if (hdl->error) { + err = hdl->error; + goto out_free_handler; + } + dev->v4l2_dev.ctrl_handler = hdl; + + err = v4l2_ctrl_handler_setup(hdl); + if (err) + goto out_free_handler; + + /* register the device (creates /dev/video*) */ + MARK(); + if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { + printk(KERN_ERR + "v4l2-loopback add() failed video_register_device()\n"); + err = -EFAULT; + goto out_free_device; + } + v4l2loopback_create_sysfs(dev->vdev); + /* NOTE: ambivalent if sysfs entries fail */ + + if (ret_nr) + *ret_nr = dev->vdev->num; + return 0; + +out_free_device: + video_device_release(dev->vdev); +out_free_handler: + v4l2_ctrl_handler_free(&dev->ctrl_handler); +out_unregister: + video_set_drvdata(dev->vdev, NULL); + if (vdev_priv != NULL) + kfree(vdev_priv); + v4l2_device_unregister(&dev->v4l2_dev); +out_free_idr: + idr_remove(&v4l2loopback_index_idr, nr); +out_free_dev: + kfree(dev); + return err; +} + +static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) +{ + int device_nr = v4l2loopback_get_vdev_nr(dev->vdev); + mutex_lock(&dev->image_mutex); + free_buffers(dev); + free_timeout_buffer(dev); + mutex_unlock(&dev->image_mutex); + v4l2loopback_remove_sysfs(dev->vdev); + v4l2_ctrl_handler_free(&dev->ctrl_handler); + kfree(video_get_drvdata(dev->vdev)); + video_unregister_device(dev->vdev); + v4l2_device_unregister(&dev->v4l2_dev); + idr_remove(&v4l2loopback_index_idr, device_nr); + kfree(dev); +} + +static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_config conf; + struct v4l2_loopback_config *confptr = &conf; + int device_nr, capture_nr, output_nr; + int ret; + const __u32 version = V4L2LOOPBACK_VERSION_CODE; + + ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); + if (ret) + return ret; + + ret = -EINVAL; + switch (cmd) { + default: + ret = -ENOSYS; + break; + /* add a v4l2loopback device (pair), based on the user-provided specs */ + case V4L2LOOPBACK_CTL_ADD: + case V4L2LOOPBACK_CTL_ADD_legacy: + if (parm) { + if ((ret = copy_from_user(&conf, (void *)parm, + sizeof(conf))) < 0) + break; + } else + confptr = NULL; + ret = v4l2_loopback_add(confptr, &device_nr); + if (ret >= 0) + ret = device_nr; + break; + /* remove a v4l2loopback device (both capture and output) */ + case V4L2LOOPBACK_CTL_REMOVE: + case V4L2LOOPBACK_CTL_REMOVE_legacy: + ret = v4l2loopback_lookup((__u32)parm, &dev); + if (ret >= 0 && dev) { + ret = -EBUSY; + if (dev->open_count.counter > 0) + break; + v4l2_loopback_remove(dev); + ret = 0; + }; + break; + /* get information for a loopback device. + * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends + */ + case V4L2LOOPBACK_CTL_QUERY: + case V4L2LOOPBACK_CTL_QUERY_legacy: + if (!parm) + break; + if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < + 0) + break; + capture_nr = output_nr = conf.output_nr; +#ifdef SPLIT_DEVICES + capture_nr = conf.capture_nr; +#endif + device_nr = (output_nr < 0) ? capture_nr : output_nr; + MARK(); + /* get the device from either capture_nr or output_nr (whatever is valid) */ + if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) + break; + MARK(); + /* if we got the device from output_nr and there is a valid capture_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != capture_nr) && (capture_nr >= 0) && + ((ret = v4l2loopback_lookup(capture_nr, 0)) < 0)) + break; + MARK(); + /* if otoh, we got the device from capture_nr and there is a valid output_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != output_nr) && (output_nr >= 0) && + ((ret = v4l2loopback_lookup(output_nr, 0)) < 0)) + break; + + /* v4l2_loopback_config identified a single device, so fetch the data */ + snprintf(conf.card_label, sizeof(conf.card_label), "%s", + dev->card_label); + + conf.output_nr = dev->vdev->num; +#ifdef SPLIT_DEVICES + conf.capture_nr = dev->vdev->num; +#endif + conf.min_width = dev->min_width; + conf.min_height = dev->min_height; + conf.max_width = dev->max_width; + conf.max_height = dev->max_height; + conf.announce_all_caps = dev->announce_all_caps; + conf.max_buffers = dev->buffer_count; + conf.max_openers = dev->max_openers; + conf.debug = debug; + MARK(); + if (copy_to_user((void *)parm, &conf, sizeof(conf))) { + ret = -EFAULT; + break; + } + ret = 0; + break; + case V4L2LOOPBACK_CTL_VERSION: + if (!parm) + break; + if (copy_to_user((void *)parm, &version, sizeof(version))) { + ret = -EFAULT; + break; + } + ret = 0; + break; + } + + mutex_unlock(&v4l2loopback_ctl_mutex); + MARK(); + return ret; +} + +/* LINUX KERNEL */ + +static const struct file_operations v4l2loopback_ctl_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = v4l2loopback_control_ioctl, + .compat_ioctl = v4l2loopback_control_ioctl, + .llseek = noop_llseek, + // clang-format on +}; + +static struct miscdevice v4l2loopback_misc = { + // clang-format off + .minor = MISC_DYNAMIC_MINOR, + .name = "v4l2loopback", + .fops = &v4l2loopback_ctl_fops, + // clang-format on +}; + +static const struct v4l2_file_operations v4l2_loopback_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = v4l2_loopback_open, + .release = v4l2_loopback_close, + .read = v4l2_loopback_read, + .write = v4l2_loopback_write, + .poll = v4l2_loopback_poll, + .mmap = v4l2_loopback_mmap, + .unlocked_ioctl = video_ioctl2, + // clang-format on +}; + +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { + // clang-format off + .vidioc_querycap = &vidioc_querycap, + .vidioc_enum_framesizes = &vidioc_enum_framesizes, + .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, + + .vidioc_enum_output = &vidioc_enum_output, + .vidioc_g_output = &vidioc_g_output, + .vidioc_s_output = &vidioc_s_output, + + .vidioc_enum_input = &vidioc_enum_input, + .vidioc_g_input = &vidioc_g_input, + .vidioc_s_input = &vidioc_s_input, + + .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, + .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, + .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, + .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, + + .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, + .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, + .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, + .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, + +#ifdef V4L2L_OVERLAY + .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, + .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, +#endif + +#ifdef V4L2LOOPBACK_WITH_STD + .vidioc_s_std = &vidioc_s_std, + .vidioc_g_std = &vidioc_g_std, + .vidioc_querystd = &vidioc_querystd, +#endif /* V4L2LOOPBACK_WITH_STD */ + + .vidioc_g_parm = &vidioc_g_parm, + .vidioc_s_parm = &vidioc_s_parm, + + .vidioc_reqbufs = &vidioc_reqbufs, + .vidioc_querybuf = &vidioc_querybuf, + .vidioc_qbuf = &vidioc_qbuf, + .vidioc_dqbuf = &vidioc_dqbuf, + + .vidioc_streamon = &vidioc_streamon, + .vidioc_streamoff = &vidioc_streamoff, + +#ifdef CONFIG_VIDEO_V4L1_COMPAT + .vidiocgmbuf = &vidiocgmbuf, +#endif + + .vidioc_subscribe_event = &vidioc_subscribe_event, + .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, + // clang-format on +}; + +static int free_device_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *dev = ptr; + v4l2_loopback_remove(dev); + return 0; +} +static void free_devices(void) +{ + idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); + idr_destroy(&v4l2loopback_index_idr); +} + +static int __init v4l2loopback_init_module(void) +{ + const u32 min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; + const u32 min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; + int err; + int i; + MARK(); + + err = misc_register(&v4l2loopback_misc); + if (err < 0) + return err; + + if (devices < 0) { + devices = 1; + + /* try guessing the devices from the "video_nr" parameter */ + for (i = MAX_DEVICES - 1; i >= 0; i--) { + if (video_nr[i] >= 0) { + devices = i + 1; + break; + } + } + } + + if (devices > MAX_DEVICES) { + devices = MAX_DEVICES; + printk(KERN_INFO + "v4l2-loopback init() number of initial devices is " + "limited to: %d\n", + MAX_DEVICES); + } + + if (max_buffers > MAX_BUFFERS) { + max_buffers = MAX_BUFFERS; + printk(KERN_INFO + "v4l2-loopback init() number of buffers is limited " + "to: %d\n", + MAX_BUFFERS); + } + + if (max_openers < 0) { + printk(KERN_INFO + "v4l2-loopback init() allowing %d openers rather " + "than %d\n", + 2, max_openers); + max_openers = 2; + } + + if (max_width < min_width) { + max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; + printk(KERN_INFO "v4l2-loopback init() using max_width %d\n", + max_width); + } + if (max_height < min_height) { + max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; + printk(KERN_INFO "v4l2-loopback init() using max_height %d\n", + max_height); + } + + for (i = 0; i < devices; i++) { + struct v4l2_loopback_config cfg = { + // clang-format off + .output_nr = video_nr[i], +#ifdef SPLIT_DEVICES + .capture_nr = video_nr[i], +#endif + .min_width = min_width, + .min_height = min_height, + .max_width = max_width, + .max_height = max_height, + .announce_all_caps = (!exclusive_caps[i]), + .max_buffers = max_buffers, + .max_openers = max_openers, + .debug = debug, + // clang-format on + }; + cfg.card_label[0] = 0; + if (card_label[i]) + snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", + card_label[i]); + err = v4l2_loopback_add(&cfg, 0); + if (err) { + free_devices(); + goto error; + } + } + + dprintk("module installed\n"); + + printk(KERN_INFO "v4l2-loopback driver version %d.%d.%d%s loaded\n", + // clang-format off + (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, + (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, + (V4L2LOOPBACK_VERSION_CODE ) & 0xff, +#ifdef SNAPSHOT_VERSION + " (" __stringify(SNAPSHOT_VERSION) ")" +#else + "" +#endif + ); + // clang-format on + + return 0; +error: + misc_deregister(&v4l2loopback_misc); + return err; +} + +static void v4l2loopback_cleanup_module(void) +{ + MARK(); + /* unregister the device -> it deletes /dev/video* */ + free_devices(); + /* and get rid of /dev/v4l2loopback */ + misc_deregister(&v4l2loopback_misc); + dprintk("module removed\n"); +} + +MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); + +module_init(v4l2loopback_init_module); +module_exit(v4l2loopback_cleanup_module); diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h new file mode 100644 index 00000000000000..e48e0ce5949d6e --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * v4l2loopback.h + * + * Written by IOhannes m zmölnig, 7/1/20. + * + * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is + * permitted under the GNU General Public License. + */ +#ifndef _V4L2LOOPBACK_H +#define _V4L2LOOPBACK_H + +#define V4L2LOOPBACK_VERSION_MAJOR 0 +#define V4L2LOOPBACK_VERSION_MINOR 15 +#define V4L2LOOPBACK_VERSION_BUGFIX 0 + +/* /dev/v4l2loopback interface */ + +struct v4l2_loopback_config { + /** + * the device-number (/dev/video) + * V4L2LOOPBACK_CTL_ADD: + * setting this to a value<0, will allocate an available one + * if nr>=0 and the device already exists, the ioctl will EEXIST + * if output_nr and capture_nr are the same, only a single device will be created + * NOTE: currently split-devices (where output_nr and capture_nr differ) + * are not implemented yet. + * until then, requesting different device-IDs will result in EINVAL. + * + * V4L2LOOPBACK_CTL_QUERY: + * either both output_nr and capture_nr must refer to the same loopback, + * or one (and only one) of them must be -1 + * + */ + __s32 output_nr; + __s32 unused; /*capture_nr;*/ + + /** + * a nice name for your device + * if (*card_label)==0, an automatic name is assigned + */ + char card_label[32]; + + /** + * allowed frame size + * if too low, default values are used + */ + __u32 min_width; + __u32 max_width; + __u32 min_height; + __u32 max_height; + + /** + * number of buffers to allocate for the queue + * if set to <=0, default values are used + */ + __s32 max_buffers; + + /** + * how many consumers are allowed to open this device concurrently + * if set to <=0, default values are used + */ + __s32 max_openers; + + /** + * set the debugging level for this device + */ + __s32 debug; + + /** + * whether to announce OUTPUT/CAPTURE capabilities exclusively + * for this device or not + * (!exclusive_caps) + * NOTE: this is going to be removed once separate output/capture + * devices are implemented + */ + __s32 announce_all_caps; +}; + +#define V4L2LOOPBACK_CTL_IOCTLMAGIC '~' + +/* a pointer to an (unsigned int) that - on success - will hold + * the version code of the v4l2loopback module + * as returned by KERNEL_VERSION(MAJOR, MINOR, BUGFIX) + */ +#define V4L2LOOPBACK_CTL_VERSION _IOR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 0, __u32) + +/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the + * to-be-created device set. + * if the ptr is NULL, a new device is created with default values at the driver's discretion. + * + * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, + * to get more information on the device) + */ +#define V4L2LOOPBACK_CTL_ADD \ + _IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 1, struct v4l2_loopback_config) + +/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ +#define V4L2LOOPBACK_CTL_REMOVE _IOW(V4L2LOOPBACK_CTL_IOCTLMAGIC, 2, __u32) + +/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set + * (the two values must either refer to video-devices associated with the same loopback device + * or exactly one of them must be <0 + */ +#define V4L2LOOPBACK_CTL_QUERY \ + _IOWR(V4L2LOOPBACK_CTL_IOCTLMAGIC, 3, struct v4l2_loopback_config) + +#endif /* _V4L2LOOPBACK_H */ diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h new file mode 100644 index 00000000000000..d855a379655419 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback_formats.h @@ -0,0 +1,445 @@ +static const struct v4l2l_format formats[] = { +#ifndef V4L2_PIX_FMT_VP9 +#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') +#endif +#ifndef V4L2_PIX_FMT_HEVC +#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') +#endif + + /* here come the packed formats */ + { + .name = "32 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR32, + .depth = 32, + .flags = 0, + }, + { + .name = "32 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB32, + .depth = 32, + .flags = 0, + }, + { + .name = "24 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR24, + .depth = 24, + .flags = 0, + }, + { + .name = "24 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB24, + .depth = 24, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_ABGR32 + { + .name = "32 bpp RGBA, le", + .fourcc = V4L2_PIX_FMT_ABGR32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGBA32 + { + .name = "32 bpp RGBA", + .fourcc = V4L2_PIX_FMT_RGBA32, + .depth = 32, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_RGB332 + { + .name = "8 bpp RGB-3-3-2", + .fourcc = V4L2_PIX_FMT_RGB332, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB332 */ +#ifdef V4L2_PIX_FMT_RGB444 + { + .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", + .fourcc = V4L2_PIX_FMT_RGB444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB444 */ +#ifdef V4L2_PIX_FMT_RGB555 + { + .name = "16 bpp RGB-5-5-5", + .fourcc = V4L2_PIX_FMT_RGB555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555 */ +#ifdef V4L2_PIX_FMT_RGB565 + { + .name = "16 bpp RGB-5-6-5", + .fourcc = V4L2_PIX_FMT_RGB565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565 */ +#ifdef V4L2_PIX_FMT_RGB555X + { + .name = "16 bpp RGB-5-5-5 BE", + .fourcc = V4L2_PIX_FMT_RGB555X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555X */ +#ifdef V4L2_PIX_FMT_RGB565X + { + .name = "16 bpp RGB-5-6-5 BE", + .fourcc = V4L2_PIX_FMT_RGB565X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565X */ +#ifdef V4L2_PIX_FMT_BGR666 + { + .name = "18 bpp BGR-6-6-6", + .fourcc = V4L2_PIX_FMT_BGR666, + .depth = 18, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_BGR666 */ + { + .name = "4:2:2, packed, YUYV", + .fourcc = V4L2_PIX_FMT_YUYV, + .depth = 16, + .flags = 0, + }, + { + .name = "4:2:2, packed, UYVY", + .fourcc = V4L2_PIX_FMT_UYVY, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YVYU + { + .name = "4:2:2, packed YVYU", + .fourcc = V4L2_PIX_FMT_YVYU, + .depth = 16, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_VYUY + { + .name = "4:2:2, packed VYUY", + .fourcc = V4L2_PIX_FMT_VYUY, + .depth = 16, + .flags = 0, + }, +#endif + { + .name = "4:2:2, packed YYUV", + .fourcc = V4L2_PIX_FMT_YYUV, + .depth = 16, + .flags = 0, + }, + { + .name = "YUV-8-8-8-8", + .fourcc = V4L2_PIX_FMT_YUV32, + .depth = 32, + .flags = 0, + }, + { + .name = "8 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_GREY, + .depth = 8, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_Y4 + { + .name = "4 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y4, + .depth = 4, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y4 */ +#ifdef V4L2_PIX_FMT_Y6 + { + .name = "6 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y6, + .depth = 6, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y6 */ +#ifdef V4L2_PIX_FMT_Y10 + { + .name = "10 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y10, + .depth = 10, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y10 */ +#ifdef V4L2_PIX_FMT_Y12 + { + .name = "12 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y12, + .depth = 12, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y12 */ + { + .name = "16 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_Y16, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YUV444 + { + .name = "16 bpp xxxxyyyy uuuuvvvv", + .fourcc = V4L2_PIX_FMT_YUV444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV444 */ +#ifdef V4L2_PIX_FMT_YUV555 + { + .name = "16 bpp YUV-5-5-5", + .fourcc = V4L2_PIX_FMT_YUV555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV555 */ +#ifdef V4L2_PIX_FMT_YUV565 + { + .name = "16 bpp YUV-5-6-5", + .fourcc = V4L2_PIX_FMT_YUV565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV565 */ + +/* bayer formats */ +#ifdef V4L2_PIX_FMT_SRGGB8 + { + .name = "Bayer RGGB 8bit", + .fourcc = V4L2_PIX_FMT_SRGGB8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SRGGB8 */ +#ifdef V4L2_PIX_FMT_SGRBG8 + { + .name = "Bayer GRBG 8bit", + .fourcc = V4L2_PIX_FMT_SGRBG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGRBG8 */ +#ifdef V4L2_PIX_FMT_SGBRG8 + { + .name = "Bayer GBRG 8bit", + .fourcc = V4L2_PIX_FMT_SGBRG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGBRG8 */ +#ifdef V4L2_PIX_FMT_SBGGR8 + { + .name = "Bayer BA81 8bit", + .fourcc = V4L2_PIX_FMT_SBGGR8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SBGGR8 */ + + /* here come the planar formats */ + { + .name = "4:1:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:1:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#ifdef V4L2_PIX_FMT_YUV422P + { + .name = "16 bpp YVU422 planar", + .fourcc = V4L2_PIX_FMT_YUV422P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV422P */ +#ifdef V4L2_PIX_FMT_YUV411P + { + .name = "16 bpp YVU411 planar", + .fourcc = V4L2_PIX_FMT_YUV411P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV411P */ +#ifdef V4L2_PIX_FMT_Y41P + { + .name = "12 bpp YUV 4:1:1", + .fourcc = V4L2_PIX_FMT_Y41P, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_Y41P */ +#ifdef V4L2_PIX_FMT_NV12 + { + .name = "12 bpp Y/CbCr 4:2:0 ", + .fourcc = V4L2_PIX_FMT_NV12, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_NV12 */ + +/* here come the compressed formats */ + +#ifdef V4L2_PIX_FMT_MJPEG + { + .name = "Motion-JPEG", + .fourcc = V4L2_PIX_FMT_MJPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MJPEG */ +#ifdef V4L2_PIX_FMT_JPEG + { + .name = "JFIF JPEG", + .fourcc = V4L2_PIX_FMT_JPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_JPEG */ +#ifdef V4L2_PIX_FMT_DV + { + .name = "DV1394", + .fourcc = V4L2_PIX_FMT_DV, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_DV */ +#ifdef V4L2_PIX_FMT_MPEG + { + .name = "MPEG-1/2/4 Multiplexed", + .fourcc = V4L2_PIX_FMT_MPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG */ +#ifdef V4L2_PIX_FMT_H264 + { + .name = "H264 with start codes", + .fourcc = V4L2_PIX_FMT_H264, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264 */ +#ifdef V4L2_PIX_FMT_H264_NO_SC + { + .name = "H264 without start codes", + .fourcc = V4L2_PIX_FMT_H264_NO_SC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_NO_SC */ +#ifdef V4L2_PIX_FMT_H264_MVC + { + .name = "H264 MVC", + .fourcc = V4L2_PIX_FMT_H264_MVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_MVC */ +#ifdef V4L2_PIX_FMT_H263 + { + .name = "H263", + .fourcc = V4L2_PIX_FMT_H263, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H263 */ +#ifdef V4L2_PIX_FMT_MPEG1 + { + .name = "MPEG-1 ES", + .fourcc = V4L2_PIX_FMT_MPEG1, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG1 */ +#ifdef V4L2_PIX_FMT_MPEG2 + { + .name = "MPEG-2 ES", + .fourcc = V4L2_PIX_FMT_MPEG2, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG2 */ +#ifdef V4L2_PIX_FMT_MPEG4 + { + .name = "MPEG-4 part 2 ES", + .fourcc = V4L2_PIX_FMT_MPEG4, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG4 */ +#ifdef V4L2_PIX_FMT_XVID + { + .name = "Xvid", + .fourcc = V4L2_PIX_FMT_XVID, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_XVID */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_G + { + .name = "SMPTE 421M Annex G compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_L + { + .name = "SMPTE 421M Annex L compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ +#ifdef V4L2_PIX_FMT_VP8 + { + .name = "VP8", + .fourcc = V4L2_PIX_FMT_VP8, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP8 */ +#ifdef V4L2_PIX_FMT_VP9 + { + .name = "VP9", + .fourcc = V4L2_PIX_FMT_VP9, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP9 */ +#ifdef V4L2_PIX_FMT_HEVC + { + .name = "HEVC", + .fourcc = V4L2_PIX_FMT_HEVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_HEVC */ +}; diff --git a/drivers/mfd/exynos-lpass.c b/drivers/mfd/exynos-lpass.c index 6a585173230b13..44797001a4322b 100644 --- a/drivers/mfd/exynos-lpass.c +++ b/drivers/mfd/exynos-lpass.c @@ -104,11 +104,22 @@ static const struct regmap_config exynos_lpass_reg_conf = { .fast_io = true, }; +static void exynos_lpass_disable_lpass(void *data) +{ + struct platform_device *pdev = data; + struct exynos_lpass *lpass = platform_get_drvdata(pdev); + + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + exynos_lpass_disable(lpass); +} + static int exynos_lpass_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct exynos_lpass *lpass; void __iomem *base_top; + int ret; lpass = devm_kzalloc(dev, sizeof(*lpass), GFP_KERNEL); if (!lpass) @@ -122,8 +133,8 @@ static int exynos_lpass_probe(struct platform_device *pdev) if (IS_ERR(lpass->sfr0_clk)) return PTR_ERR(lpass->sfr0_clk); - lpass->top = regmap_init_mmio(dev, base_top, - &exynos_lpass_reg_conf); + lpass->top = devm_regmap_init_mmio(dev, base_top, + &exynos_lpass_reg_conf); if (IS_ERR(lpass->top)) { dev_err(dev, "LPASS top regmap initialization failed\n"); return PTR_ERR(lpass->top); @@ -134,18 +145,11 @@ static int exynos_lpass_probe(struct platform_device *pdev) pm_runtime_enable(dev); exynos_lpass_enable(lpass); - return devm_of_platform_populate(dev); -} - -static void exynos_lpass_remove(struct platform_device *pdev) -{ - struct exynos_lpass *lpass = platform_get_drvdata(pdev); + ret = devm_add_action_or_reset(dev, exynos_lpass_disable_lpass, pdev); + if (ret) + return ret; - exynos_lpass_disable(lpass); - pm_runtime_disable(&pdev->dev); - if (!pm_runtime_status_suspended(&pdev->dev)) - exynos_lpass_disable(lpass); - regmap_exit(lpass->top); + return devm_of_platform_populate(dev); } static int __maybe_unused exynos_lpass_suspend(struct device *dev) @@ -185,7 +189,6 @@ static struct platform_driver exynos_lpass_driver = { .of_match_table = exynos_lpass_of_match, }, .probe = exynos_lpass_probe, - .remove = exynos_lpass_remove, }; module_platform_driver(exynos_lpass_driver); diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index 792236f56399af..b9cc85ea2c4019 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -129,7 +129,7 @@ static const struct spi_device_id stmpe_spi_id[] = { { "stmpe2403", STMPE2403 }, { } }; -MODULE_DEVICE_TABLE(spi, stmpe_id); +MODULE_DEVICE_TABLE(spi, stmpe_spi_id); static struct spi_driver stmpe_spi_driver = { .driver = { diff --git a/drivers/misc/lis3lv02d/Kconfig b/drivers/misc/lis3lv02d/Kconfig index bb2fec4b5880bf..56005243a230d5 100644 --- a/drivers/misc/lis3lv02d/Kconfig +++ b/drivers/misc/lis3lv02d/Kconfig @@ -10,7 +10,7 @@ config SENSORS_LIS3_SPI help This driver provides support for the LIS3LV02Dx accelerometer connected via SPI. The accelerometer data is readable via - /sys/devices/platform/lis3lv02d. + /sys/devices/faux/lis3lv02d. This driver also provides an absolute input class device, allowing the laptop to act as a pinball machine-esque joystick. @@ -26,7 +26,7 @@ config SENSORS_LIS3_I2C help This driver provides support for the LIS3LV02Dx accelerometer connected via I2C. The accelerometer data is readable via - /sys/devices/platform/lis3lv02d. + /sys/devices/faux/lis3lv02d. This driver also provides an absolute input class device, allowing the device to act as a pinball machine-esque joystick. diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c index 04756302b87805..98d3d123004c88 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gpio.c @@ -37,6 +37,7 @@ struct pci1xxxx_gpio { struct auxiliary_device *aux_dev; void __iomem *reg_base; + raw_spinlock_t wa_lock; struct gpio_chip gpio; spinlock_t lock; int irq_base; @@ -167,7 +168,7 @@ static void pci1xxxx_gpio_irq_ack(struct irq_data *data) unsigned long flags; spin_lock_irqsave(&priv->lock, flags); - pci1xxx_assign_bit(priv->reg_base, INTR_STAT_OFFSET(gpio), (gpio % 32), true); + writel(BIT(gpio % 32), priv->reg_base + INTR_STAT_OFFSET(gpio)); spin_unlock_irqrestore(&priv->lock, flags); } @@ -257,6 +258,7 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) struct pci1xxxx_gpio *priv = dev_id; struct gpio_chip *gc = &priv->gpio; unsigned long int_status = 0; + unsigned long wa_flags; unsigned long flags; u8 pincount; int bit; @@ -280,7 +282,9 @@ static irqreturn_t pci1xxxx_gpio_irq_handler(int irq, void *dev_id) writel(BIT(bit), priv->reg_base + INTR_STATUS_OFFSET(gpiobank)); spin_unlock_irqrestore(&priv->lock, flags); irq = irq_find_mapping(gc->irq.domain, (bit + (gpiobank * 32))); - handle_nested_irq(irq); + raw_spin_lock_irqsave(&priv->wa_lock, wa_flags); + generic_handle_irq(irq); + raw_spin_unlock_irqrestore(&priv->wa_lock, wa_flags); } } spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index a5f88ec97df753..bc40b940ae2145 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -117,6 +117,7 @@ #define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */ +#define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ #define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ /* diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index d6ff9d82ae94b3..3f9c60b579ae48 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -124,6 +124,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, /* required last entry */ diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c index 7be1649b19725c..267d0de5fade83 100644 --- a/drivers/misc/mei/vsc-tp.c +++ b/drivers/misc/mei/vsc-tp.c @@ -36,20 +36,24 @@ #define VSC_TP_XFER_TIMEOUT_BYTES 700 #define VSC_TP_PACKET_PADDING_SIZE 1 #define VSC_TP_PACKET_SIZE(pkt) \ - (sizeof(struct vsc_tp_packet) + le16_to_cpu((pkt)->len) + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + le16_to_cpu((pkt)->hdr.len) + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_PACKET_SIZE \ - (sizeof(struct vsc_tp_packet) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) + (sizeof(struct vsc_tp_packet_hdr) + VSC_TP_MAX_MSG_SIZE + VSC_TP_CRC_SIZE) #define VSC_TP_MAX_XFER_SIZE \ (VSC_TP_MAX_PACKET_SIZE + VSC_TP_XFER_TIMEOUT_BYTES) #define VSC_TP_NEXT_XFER_LEN(len, offset) \ - (len + sizeof(struct vsc_tp_packet) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) + (len + sizeof(struct vsc_tp_packet_hdr) + VSC_TP_CRC_SIZE - offset + VSC_TP_PACKET_PADDING_SIZE) -struct vsc_tp_packet { +struct vsc_tp_packet_hdr { __u8 sync; __u8 cmd; __le16 len; __le32 seq; - __u8 buf[] __counted_by(len); +}; + +struct vsc_tp_packet { + struct vsc_tp_packet_hdr hdr; + __u8 buf[VSC_TP_MAX_XFER_SIZE - sizeof(struct vsc_tp_packet_hdr)]; }; struct vsc_tp { @@ -67,8 +71,8 @@ struct vsc_tp { u32 seq; /* command buffer */ - void *tx_buf; - void *rx_buf; + struct vsc_tp_packet *tx_buf; + struct vsc_tp_packet *rx_buf; atomic_t assert_cnt; wait_queue_head_t xfer_wait; @@ -158,12 +162,12 @@ static int vsc_tp_dev_xfer(struct vsc_tp *tp, void *obuf, void *ibuf, size_t len static int vsc_tp_xfer_helper(struct vsc_tp *tp, struct vsc_tp_packet *pkt, void *ibuf, u16 ilen) { - int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet); + int ret, offset = 0, cpy_len, src_len, dst_len = sizeof(struct vsc_tp_packet_hdr); int next_xfer_len = VSC_TP_PACKET_SIZE(pkt) + VSC_TP_XFER_TIMEOUT_BYTES; - u8 *src, *crc_src, *rx_buf = tp->rx_buf; + u8 *src, *crc_src, *rx_buf = (u8 *)tp->rx_buf; int count_down = VSC_TP_MAX_XFER_COUNT; u32 recv_crc = 0, crc = ~0; - struct vsc_tp_packet ack; + struct vsc_tp_packet_hdr ack; u8 *dst = (u8 *)&ack; bool synced = false; @@ -280,10 +284,10 @@ int vsc_tp_xfer(struct vsc_tp *tp, u8 cmd, const void *obuf, size_t olen, guard(mutex)(&tp->mutex); - pkt->sync = VSC_TP_PACKET_SYNC; - pkt->cmd = cmd; - pkt->len = cpu_to_le16(olen); - pkt->seq = cpu_to_le32(++tp->seq); + pkt->hdr.sync = VSC_TP_PACKET_SYNC; + pkt->hdr.cmd = cmd; + pkt->hdr.len = cpu_to_le16(olen); + pkt->hdr.seq = cpu_to_le32(++tp->seq); memcpy(pkt->buf, obuf, olen); crc = ~crc32(~0, (u8 *)pkt, sizeof(pkt) + olen); @@ -320,7 +324,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) guard(mutex)(&tp->mutex); /* rom xfer is big endian */ - cpu_to_be32_array(tp->tx_buf, obuf, words); + cpu_to_be32_array((__be32 *)tp->tx_buf, obuf, words); ret = read_poll_timeout(gpiod_get_value_cansleep, ret, !ret, VSC_TP_ROM_XFER_POLL_DELAY_US, @@ -336,7 +340,7 @@ int vsc_tp_rom_xfer(struct vsc_tp *tp, const void *obuf, void *ibuf, size_t len) return ret; if (ibuf) - be32_to_cpu_array(ibuf, tp->rx_buf, words); + be32_to_cpu_array(ibuf, (__be32 *)tp->rx_buf, words); return ret; } @@ -490,11 +494,11 @@ static int vsc_tp_probe(struct spi_device *spi) if (!tp) return -ENOMEM; - tp->tx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->tx_buf = devm_kzalloc(dev, sizeof(*tp->tx_buf), GFP_KERNEL); if (!tp->tx_buf) return -ENOMEM; - tp->rx_buf = devm_kzalloc(dev, VSC_TP_MAX_XFER_SIZE, GFP_KERNEL); + tp->rx_buf = devm_kzalloc(dev, sizeof(*tp->rx_buf), GFP_KERNEL); if (!tp->rx_buf) return -ENOMEM; diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index d294850a35a12c..c4e5e2c977be27 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -122,7 +122,6 @@ struct pci_endpoint_test { struct pci_endpoint_test_data { enum pci_barno test_reg_bar; size_t alignment; - int irq_type; }; static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test, @@ -948,7 +947,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, test_reg_bar = data->test_reg_bar; test->test_reg_bar = test_reg_bar; test->alignment = data->alignment; - test->irq_type = data->irq_type; } init_completion(&test->irq_raised); @@ -970,10 +968,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_set_master(pdev); - ret = pci_endpoint_test_alloc_irq_vectors(test, test->irq_type); - if (ret) - goto err_disable_irq; - for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { base = pci_ioremap_bar(pdev, bar); @@ -1009,10 +1003,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, goto err_ida_remove; } - ret = pci_endpoint_test_request_irq(test); - if (ret) - goto err_kfree_test_name; - pci_endpoint_test_get_capabilities(test); misc_device = &test->miscdev; @@ -1020,7 +1010,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, misc_device->name = kstrdup(name, GFP_KERNEL); if (!misc_device->name) { ret = -ENOMEM; - goto err_release_irq; + goto err_kfree_test_name; } misc_device->parent = &pdev->dev; misc_device->fops = &pci_endpoint_test_fops; @@ -1036,9 +1026,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, err_kfree_name: kfree(misc_device->name); -err_release_irq: - pci_endpoint_test_release_irq(test); - err_kfree_test_name: kfree(test->name); @@ -1051,8 +1038,6 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, pci_iounmap(pdev, test->bar[bar]); } -err_disable_irq: - pci_endpoint_test_free_irq_vectors(test); pci_release_regions(pdev); err_disable_pdev: @@ -1092,23 +1077,19 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) static const struct pci_endpoint_test_data default_data = { .test_reg_bar = BAR_0, .alignment = SZ_4K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data am654_data = { .test_reg_bar = BAR_2, .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data j721e_data = { .alignment = 256, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; static const struct pci_endpoint_test_data rk3588_data = { .alignment = SZ_64K, - .irq_type = PCITEST_IRQ_TYPE_MSI, }; /* diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c index abe79f6fd2a79b..b64944367ac533 100644 --- a/drivers/misc/vmw_vmci/vmci_host.c +++ b/drivers/misc/vmw_vmci/vmci_host.c @@ -227,6 +227,7 @@ static int drv_cp_harray_to_user(void __user *user_buf_uva, static int vmci_host_setup_notify(struct vmci_ctx *context, unsigned long uva) { + struct page *page; int retval; if (context->notify_page) { @@ -243,13 +244,11 @@ static int vmci_host_setup_notify(struct vmci_ctx *context, /* * Lock physical page backing a given user VA. */ - retval = get_user_pages_fast(uva, 1, FOLL_WRITE, &context->notify_page); - if (retval != 1) { - context->notify_page = NULL; + retval = get_user_pages_fast(uva, 1, FOLL_WRITE, &page); + if (retval != 1) return VMCI_ERROR_GENERIC; - } - if (context->notify_page == NULL) - return VMCI_ERROR_UNAVAILABLE; + + context->notify_page = page; /* * Map the locked page and set up notify pointer. diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 6824131b69b188..264e11fa58eafb 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -691,8 +691,8 @@ config MMC_TMIO_CORE config MMC_SDHI tristate "Renesas SDHI SD/SDIO controller support" depends on SUPERH || ARCH_RENESAS || COMPILE_TEST + depends on (RESET_CONTROLLER && REGULATOR) || !OF select MMC_TMIO_CORE - select RESET_CONTROLLER if ARCH_RENESAS help This provides support for the SDHI SD/SDIO controller found in Renesas SuperH, ARM and ARM64 based SoCs diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index fa6526be36381d..8c83e203c51670 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -1179,7 +1179,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (IS_ERR(rdev)) { dev_err(dev, "regulator register failed err=%ld", PTR_ERR(rdev)); ret = PTR_ERR(rdev); - goto efree; + goto edisclk; } priv->rdev = rdev; } @@ -1243,26 +1243,26 @@ int renesas_sdhi_probe(struct platform_device *pdev, num_irqs = platform_irq_count(pdev); if (num_irqs < 0) { ret = num_irqs; - goto eirq; + goto edisclk; } /* There must be at least one IRQ source */ if (!num_irqs) { ret = -ENXIO; - goto eirq; + goto edisclk; } for (i = 0; i < num_irqs; i++) { irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - goto eirq; + goto edisclk; } ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0, dev_name(&pdev->dev), host); if (ret) - goto eirq; + goto edisclk; } ret = tmio_mmc_host_probe(host); @@ -1274,8 +1274,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, return ret; -eirq: - tmio_mmc_host_remove(host); edisclk: renesas_sdhi_clk_disable(host); efree: diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 09b9ab15e4995f..a20d03fdd6a93e 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -745,6 +746,29 @@ static void dwcmshc_rk35xx_postinit(struct sdhci_host *host, struct dwcmshc_priv } } +static void dwcmshc_rk3576_postinit(struct sdhci_host *host, struct dwcmshc_priv *dwc_priv) +{ + struct device *dev = mmc_dev(host->mmc); + int ret; + + /* + * This works around the design of the RK3576's power domains, which + * makes the PD_NVM power domain, which the sdhci controller on the + * RK3576 is in, never come back the same way once it's run-time + * suspended once. This can happen during early kernel boot if no driver + * is using either PD_NVM or its child power domain PD_SDGMAC for a + * short moment, leading to it being turned off to save power. By + * keeping it on, sdhci suspending won't lead to PD_NVM becoming a + * candidate for getting turned off. + */ + ret = dev_pm_genpd_rpm_always_on(dev, true); + if (ret && ret != -EOPNOTSUPP) + dev_warn(dev, "failed to set PD rpm always on, SoC may hang later: %pe\n", + ERR_PTR(ret)); + + dwcmshc_rk35xx_postinit(host, dwc_priv); +} + static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -1176,6 +1200,18 @@ static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk35xx_pdata = { .postinit = dwcmshc_rk35xx_postinit, }; +static const struct dwcmshc_pltfm_data sdhci_dwcmshc_rk3576_pdata = { + .pdata = { + .ops = &sdhci_dwcmshc_rk35xx_ops, + .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN | + SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | + SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN, + }, + .init = dwcmshc_rk35xx_init, + .postinit = dwcmshc_rk3576_postinit, +}; + static const struct dwcmshc_pltfm_data sdhci_dwcmshc_th1520_pdata = { .pdata = { .ops = &sdhci_dwcmshc_th1520_ops, @@ -1274,6 +1310,10 @@ static const struct of_device_id sdhci_dwcmshc_dt_ids[] = { .compatible = "rockchip,rk3588-dwcmshc", .data = &sdhci_dwcmshc_rk35xx_pdata, }, + { + .compatible = "rockchip,rk3576-dwcmshc", + .data = &sdhci_dwcmshc_rk3576_pdata, + }, { .compatible = "rockchip,rk3568-dwcmshc", .data = &sdhci_dwcmshc_rk35xx_pdata, diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index f75c31815ab00d..73385ff4c0f30b 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -155,6 +155,7 @@ struct sdhci_am654_data { u32 tuning_loop; #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) +#define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1) }; struct window { @@ -166,6 +167,7 @@ struct window { struct sdhci_am654_driver_data { const struct sdhci_pltfm_data *pdata; u32 flags; + u32 quirks; #define IOMUX_PRESENT (1 << 0) #define FREQSEL_2_BIT (1 << 1) #define STRBSEL_4_BIT (1 << 2) @@ -356,6 +358,29 @@ static void sdhci_j721e_4bit_set_clock(struct sdhci_host *host, sdhci_set_clock(host, clock); } +static int sdhci_am654_start_signal_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); + int ret; + + if ((sdhci_am654->quirks & SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA) && + ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) { + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = mmc_regulator_set_vqmmc(mmc, ios); + if (ret < 0) { + pr_err("%s: Switching to 1.8V signalling voltage failed,\n", + mmc_hostname(mmc)); + return -EIO; + } + } + return 0; + } + + return sdhci_start_signal_voltage_switch(mmc, ios); +} + static u8 sdhci_am654_write_power_on(struct sdhci_host *host, u8 val, int reg) { writeb(val, host->ioaddr + reg); @@ -650,6 +675,12 @@ static const struct sdhci_am654_driver_data sdhci_j721e_4bit_drvdata = { .flags = IOMUX_PRESENT, }; +static const struct sdhci_am654_driver_data sdhci_am62_4bit_drvdata = { + .pdata = &sdhci_j721e_4bit_pdata, + .flags = IOMUX_PRESENT, + .quirks = SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA, +}; + static const struct soc_device_attribute sdhci_am654_devices[] = { { .family = "AM65X", .revision = "SR1.0", @@ -872,7 +903,7 @@ static const struct of_device_id sdhci_am654_of_match[] = { }, { .compatible = "ti,am62-sdhci", - .data = &sdhci_j721e_4bit_drvdata, + .data = &sdhci_am62_4bit_drvdata, }, { /* sentinel */ } }; @@ -906,6 +937,7 @@ static int sdhci_am654_probe(struct platform_device *pdev) pltfm_host = sdhci_priv(host); sdhci_am654 = sdhci_pltfm_priv(pltfm_host); sdhci_am654->flags = drvdata->flags; + sdhci_am654->quirks = drvdata->quirks; clk_xin = devm_clk_get(dev, "clk_xin"); if (IS_ERR(clk_xin)) { @@ -940,6 +972,7 @@ static int sdhci_am654_probe(struct platform_device *pdev) goto err_pltfm_free; } + host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch; host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning; pm_runtime_get_noresume(dev); diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c index 9739387cff8c91..58c6e1743f5c65 100644 --- a/drivers/mtd/inftlcore.c +++ b/drivers/mtd/inftlcore.c @@ -482,10 +482,11 @@ static inline u16 INFTL_findwriteunit(struct INFTLrecord *inftl, unsigned block) silly = MAX_LOOPS; while (thisEUN <= inftl->lastEUN) { - inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) + - blockofs, 8, &retlen, (char *)&bci); - - status = bci.Status | bci.Status1; + if (inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) + + blockofs, 8, &retlen, (char *)&bci) < 0) + status = SECTOR_IGNORE; + else + status = bci.Status | bci.Status1; pr_debug("INFTL: status of block %d in EUN %d is %x\n", block , writeEUN, status); diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index db516a45f0c526..44913ff1bf12cc 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -3,11 +3,8 @@ nandcore-objs := core.o bbt.o obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o obj-$(CONFIG_MTD_NAND_ECC_MEDIATEK) += ecc-mtk.o -ifeq ($(CONFIG_SPI_QPIC_SNAND),y) obj-$(CONFIG_SPI_QPIC_SNAND) += qpic_common.o -else obj-$(CONFIG_MTD_NAND_QCOM) += qpic_common.o -endif obj-y += onenand/ obj-y += raw/ obj-y += spi/ diff --git a/drivers/mtd/nand/ecc-mxic.c b/drivers/mtd/nand/ecc-mxic.c index 56b56f726b9983..1bf9a5a64b87a4 100644 --- a/drivers/mtd/nand/ecc-mxic.c +++ b/drivers/mtd/nand/ecc-mxic.c @@ -614,7 +614,7 @@ static int mxic_ecc_finish_io_req_external(struct nand_device *nand, { struct mxic_ecc_engine *mxic = nand_to_mxic(nand); struct mxic_ecc_ctx *ctx = nand_to_ecc_ctx(nand); - int nents, step, ret; + int nents, step, ret = 0; if (req->mode == MTD_OPS_RAW) return 0; diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c index b07c2f8b40350d..918974d088cf65 100644 --- a/drivers/mtd/nand/raw/r852.c +++ b/drivers/mtd/nand/raw/r852.c @@ -387,6 +387,9 @@ static int r852_wait(struct nand_chip *chip) static int r852_ready(struct nand_chip *chip) { struct r852_device *dev = r852_get_dev(nand_to_mtd(chip)); + if (dev->card_unstable) + return 0; + return !(r852_read_reg(dev, R852_CARD_STA) & R852_CARD_STA_BUSY); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 950d8e4d86f8b4..17ae4b819a5977 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -453,13 +453,14 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs) /** * bond_ipsec_add_sa - program device with a security association + * @bond_dev: pointer to the bond net device * @xs: pointer to transformer state struct * @extack: extack point to fill failure reason **/ -static int bond_ipsec_add_sa(struct xfrm_state *xs, +static int bond_ipsec_add_sa(struct net_device *bond_dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { - struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; netdevice_tracker tracker; struct bond_ipsec *ipsec; @@ -495,9 +496,9 @@ static int bond_ipsec_add_sa(struct xfrm_state *xs, goto out; } - xs->xso.real_dev = real_dev; - err = real_dev->xfrmdev_ops->xdo_dev_state_add(xs, extack); + err = real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, xs, extack); if (!err) { + xs->xso.real_dev = real_dev; ipsec->xs = xs; INIT_LIST_HEAD(&ipsec->list); mutex_lock(&bond->ipsec_lock); @@ -539,11 +540,25 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) if (ipsec->xs->xso.real_dev == real_dev) continue; - ipsec->xs->xso.real_dev = real_dev; - if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) { + if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev, + ipsec->xs, NULL)) { slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); - ipsec->xs->xso.real_dev = NULL; + continue; } + + spin_lock_bh(&ipsec->xs->lock); + /* xs might have been killed by the user during the migration + * to the new dev, but bond_ipsec_del_sa() should have done + * nothing, as xso.real_dev is NULL. + * Delete it from the device we just added it to. The pending + * bond_ipsec_free_sa() call will do the rest of the cleanup. + */ + if (ipsec->xs->km.state == XFRM_STATE_DEAD && + real_dev->xfrmdev_ops->xdo_dev_state_delete) + real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, + ipsec->xs); + ipsec->xs->xso.real_dev = real_dev; + spin_unlock_bh(&ipsec->xs->lock); } out: mutex_unlock(&bond->ipsec_lock); @@ -551,54 +566,27 @@ static void bond_ipsec_add_sa_all(struct bonding *bond) /** * bond_ipsec_del_sa - clear out this specific SA + * @bond_dev: pointer to the bond net device * @xs: pointer to transformer state struct **/ -static void bond_ipsec_del_sa(struct xfrm_state *xs) +static void bond_ipsec_del_sa(struct net_device *bond_dev, + struct xfrm_state *xs) { - struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; - netdevice_tracker tracker; - struct bond_ipsec *ipsec; - struct bonding *bond; - struct slave *slave; - if (!bond_dev) + if (!bond_dev || !xs->xso.real_dev) return; - rcu_read_lock(); - bond = netdev_priv(bond_dev); - slave = rcu_dereference(bond->curr_active_slave); - real_dev = slave ? slave->dev : NULL; - netdev_hold(real_dev, &tracker, GFP_ATOMIC); - rcu_read_unlock(); - - if (!slave) - goto out; - - if (!xs->xso.real_dev) - goto out; - - WARN_ON(xs->xso.real_dev != real_dev); + real_dev = xs->xso.real_dev; if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_state_delete || netif_is_bond_master(real_dev)) { slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); - goto out; + return; } - real_dev->xfrmdev_ops->xdo_dev_state_delete(xs); -out: - netdev_put(real_dev, &tracker); - mutex_lock(&bond->ipsec_lock); - list_for_each_entry(ipsec, &bond->ipsec_list, list) { - if (ipsec->xs == xs) { - list_del(&ipsec->list); - kfree(ipsec); - break; - } - } - mutex_unlock(&bond->ipsec_lock); + real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs); } static void bond_ipsec_del_sa_all(struct bonding *bond) @@ -624,46 +612,55 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); - } else { - real_dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); - if (real_dev->xfrmdev_ops->xdo_dev_state_free) - real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs); + continue; } + + spin_lock_bh(&ipsec->xs->lock); + ipsec->xs->xso.real_dev = NULL; + /* Don't double delete states killed by the user. */ + if (ipsec->xs->km.state != XFRM_STATE_DEAD) + real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, + ipsec->xs); + spin_unlock_bh(&ipsec->xs->lock); + + if (real_dev->xfrmdev_ops->xdo_dev_state_free) + real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, + ipsec->xs); } mutex_unlock(&bond->ipsec_lock); } -static void bond_ipsec_free_sa(struct xfrm_state *xs) +static void bond_ipsec_free_sa(struct net_device *bond_dev, + struct xfrm_state *xs) { - struct net_device *bond_dev = xs->xso.dev; struct net_device *real_dev; - netdevice_tracker tracker; + struct bond_ipsec *ipsec; struct bonding *bond; - struct slave *slave; if (!bond_dev) return; - rcu_read_lock(); bond = netdev_priv(bond_dev); - slave = rcu_dereference(bond->curr_active_slave); - real_dev = slave ? slave->dev : NULL; - netdev_hold(real_dev, &tracker, GFP_ATOMIC); - rcu_read_unlock(); - - if (!slave) - goto out; + mutex_lock(&bond->ipsec_lock); if (!xs->xso.real_dev) goto out; - WARN_ON(xs->xso.real_dev != real_dev); + real_dev = xs->xso.real_dev; - if (real_dev && real_dev->xfrmdev_ops && + xs->xso.real_dev = NULL; + if (real_dev->xfrmdev_ops && real_dev->xfrmdev_ops->xdo_dev_state_free) - real_dev->xfrmdev_ops->xdo_dev_state_free(xs); + real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs); out: - netdev_put(real_dev, &tracker); + list_for_each_entry(ipsec, &bond->ipsec_list, list) { + if (ipsec->xs == xs) { + list_del(&ipsec->list); + kfree(ipsec); + break; + } + } + mutex_unlock(&bond->ipsec_lock); } /** @@ -850,8 +847,9 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) { const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - struct ifreq ifr; struct mii_ioctl_data *mii; + struct ifreq ifr; + int ret; if (!reporting && !netif_running(slave_dev)) return 0; @@ -860,9 +858,13 @@ static int bond_check_dev_link(struct bonding *bond, return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; /* Try to get link status using Ethtool first. */ - if (slave_dev->ethtool_ops->get_link) - return slave_dev->ethtool_ops->get_link(slave_dev) ? - BMSR_LSTATUS : 0; + if (slave_dev->ethtool_ops->get_link) { + netdev_lock_ops(slave_dev); + ret = slave_dev->ethtool_ops->get_link(slave_dev); + netdev_unlock_ops(slave_dev); + + return ret ? BMSR_LSTATUS : 0; + } /* Ethtool can't be used, fallback to MII ioctls. */ if (slave_ops->ndo_eth_ioctl) { @@ -2113,15 +2115,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, * set the master's mac address to that of the first slave */ memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len); - ss.ss_family = slave_dev->type; - res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, - extack); - if (res) { - slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); - goto err_restore_mtu; - } + } else if (bond->params.fail_over_mac == BOND_FOM_FOLLOW && + BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && + memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) { + /* Set slave to random address to avoid duplicate mac + * address in later fail over. + */ + eth_random_addr(ss.__data); + } else { + goto skip_mac_set; } + ss.ss_family = slave_dev->type; + res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss, extack); + if (res) { + slave_err(bond_dev, slave_dev, "Error %d calling set_mac_address\n", res); + goto err_restore_mtu; + } + +skip_mac_set: + /* set no_addrconf flag before open to prevent IPv6 addrconf */ slave_dev->priv_flags |= IFF_NO_ADDRCONF; diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index cf0d5180527232..f6921368cd14e9 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -16,6 +16,7 @@ #include #include #include +#include MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Kvaser AB "); @@ -410,10 +411,13 @@ struct kvaser_pciefd_can { void __iomem *reg_base; struct can_berr_counter bec; u8 cmd_seq; + u8 tx_max_count; + u8 tx_idx; + u8 ack_idx; int err_rep_cnt; - int echo_idx; + unsigned int completed_tx_pkts; + unsigned int completed_tx_bytes; spinlock_t lock; /* Locks sensitive registers (e.g. MODE) */ - spinlock_t echo_lock; /* Locks the message echo buffer */ struct timer_list bec_poll_timer; struct completion start_comp, flush_comp; }; @@ -714,6 +718,9 @@ static int kvaser_pciefd_open(struct net_device *netdev) int ret; struct kvaser_pciefd_can *can = netdev_priv(netdev); + can->tx_idx = 0; + can->ack_idx = 0; + ret = open_candev(netdev); if (ret) return ret; @@ -745,21 +752,26 @@ static int kvaser_pciefd_stop(struct net_device *netdev) timer_delete(&can->bec_poll_timer); } can->can.state = CAN_STATE_STOPPED; + netdev_reset_queue(netdev); close_candev(netdev); return ret; } +static unsigned int kvaser_pciefd_tx_avail(const struct kvaser_pciefd_can *can) +{ + return can->tx_max_count - (READ_ONCE(can->tx_idx) - READ_ONCE(can->ack_idx)); +} + static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, - struct kvaser_pciefd_can *can, + struct can_priv *can, u8 seq, struct sk_buff *skb) { struct canfd_frame *cf = (struct canfd_frame *)skb->data; int packet_size; - int seq = can->echo_idx; memset(p, 0, sizeof(*p)); - if (can->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) + if (can->ctrlmode & CAN_CTRLMODE_ONE_SHOT) p->header[1] |= KVASER_PCIEFD_TPACKET_SMS; if (cf->can_id & CAN_RTR_FLAG) @@ -782,7 +794,7 @@ static int kvaser_pciefd_prepare_tx_packet(struct kvaser_pciefd_tx_packet *p, } else { p->header[1] |= FIELD_PREP(KVASER_PCIEFD_RPACKET_DLC_MASK, - can_get_cc_dlc((struct can_frame *)cf, can->can.ctrlmode)); + can_get_cc_dlc((struct can_frame *)cf, can->ctrlmode)); } p->header[1] |= FIELD_PREP(KVASER_PCIEFD_PACKET_SEQ_MASK, seq); @@ -797,22 +809,24 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct kvaser_pciefd_can *can = netdev_priv(netdev); - unsigned long irq_flags; struct kvaser_pciefd_tx_packet packet; + unsigned int seq = can->tx_idx & (can->can.echo_skb_max - 1); + unsigned int frame_len; int nr_words; - u8 count; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; + if (!netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1)) + return NETDEV_TX_BUSY; - nr_words = kvaser_pciefd_prepare_tx_packet(&packet, can, skb); + nr_words = kvaser_pciefd_prepare_tx_packet(&packet, &can->can, seq, skb); - spin_lock_irqsave(&can->echo_lock, irq_flags); /* Prepare and save echo skb in internal slot */ - can_put_echo_skb(skb, netdev, can->echo_idx, 0); - - /* Move echo index to the next slot */ - can->echo_idx = (can->echo_idx + 1) % can->can.echo_skb_max; + WRITE_ONCE(can->can.echo_skb[seq], NULL); + frame_len = can_skb_get_frame_len(skb); + can_put_echo_skb(skb, netdev, seq, frame_len); + netdev_sent_queue(netdev, frame_len); + WRITE_ONCE(can->tx_idx, can->tx_idx + 1); /* Write header to fifo */ iowrite32(packet.header[0], @@ -836,14 +850,7 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, KVASER_PCIEFD_KCAN_FIFO_LAST_REG); } - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); - /* No room for a new message, stop the queue until at least one - * successful transmit - */ - if (count >= can->can.echo_skb_max || can->can.echo_skb[can->echo_idx]) - netif_stop_queue(netdev); - spin_unlock_irqrestore(&can->echo_lock, irq_flags); + netif_subqueue_maybe_stop(netdev, 0, kvaser_pciefd_tx_avail(can), 1, 1); return NETDEV_TX_OK; } @@ -970,6 +977,8 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) can->kv_pcie = pcie; can->cmd_seq = 0; can->err_rep_cnt = 0; + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; can->bec.txerr = 0; can->bec.rxerr = 0; @@ -983,11 +992,10 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) tx_nr_packets_max = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_MAX_MASK, ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + can->tx_max_count = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); can->can.clock.freq = pcie->freq; - can->can.echo_skb_max = min(KVASER_PCIEFD_CAN_TX_MAX_COUNT, tx_nr_packets_max - 1); - can->echo_idx = 0; - spin_lock_init(&can->echo_lock); + can->can.echo_skb_max = roundup_pow_of_two(can->tx_max_count); spin_lock_init(&can->lock); can->can.bittiming_const = &kvaser_pciefd_bittiming_const; @@ -1201,7 +1209,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_canfd_skb(priv->dev, &cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } cf->len = can_fd_dlc2len(dlc); @@ -1213,7 +1221,7 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, skb = alloc_can_skb(priv->dev, (struct can_frame **)&cf); if (!skb) { priv->dev->stats.rx_dropped++; - return -ENOMEM; + return 0; } can_frame_set_cc_len((struct can_frame *)cf, dlc, priv->ctrlmode); } @@ -1231,7 +1239,9 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie, priv->dev->stats.rx_packets++; kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - return netif_rx(skb); + netif_rx(skb); + + return 0; } static void kvaser_pciefd_change_state(struct kvaser_pciefd_can *can, @@ -1510,19 +1520,21 @@ static int kvaser_pciefd_handle_ack_packet(struct kvaser_pciefd *pcie, netdev_dbg(can->can.dev, "Packet was flushed\n"); } else { int echo_idx = FIELD_GET(KVASER_PCIEFD_PACKET_SEQ_MASK, p->header[0]); - int len; - u8 count; + unsigned int len, frame_len = 0; struct sk_buff *skb; + if (echo_idx != (can->ack_idx & (can->can.echo_skb_max - 1))) + return 0; skb = can->can.echo_skb[echo_idx]; - if (skb) - kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); - len = can_get_echo_skb(can->can.dev, echo_idx, NULL); - count = FIELD_GET(KVASER_PCIEFD_KCAN_TX_NR_PACKETS_CURRENT_MASK, - ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NR_PACKETS_REG)); + if (!skb) + return 0; + kvaser_pciefd_set_skb_timestamp(pcie, skb, p->timestamp); + len = can_get_echo_skb(can->can.dev, echo_idx, &frame_len); - if (count < can->can.echo_skb_max && netif_queue_stopped(can->can.dev)) - netif_wake_queue(can->can.dev); + /* Pairs with barrier in kvaser_pciefd_start_xmit() */ + smp_store_release(&can->ack_idx, can->ack_idx + 1); + can->completed_tx_pkts++; + can->completed_tx_bytes += frame_len; if (!one_shot_fail) { can->can.dev->stats.tx_bytes += len; @@ -1638,32 +1650,51 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) { int pos = 0; int res = 0; + unsigned int i; do { res = kvaser_pciefd_read_packet(pcie, &pos, dma_buf); } while (!res && pos > 0 && pos < KVASER_PCIEFD_DMA_SIZE); + /* Report ACKs in this buffer to BQL en masse for correct periods */ + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; + + if (!can->completed_tx_pkts) + continue; + netif_subqueue_completed_wake(can->can.dev, 0, + can->completed_tx_pkts, + can->completed_tx_bytes, + kvaser_pciefd_tx_avail(can), 1); + can->completed_tx_pkts = 0; + can->completed_tx_bytes = 0; + } + return res; } -static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) +static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) { + void __iomem *srb_cmd_reg = KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG; u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) + iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); + + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { kvaser_pciefd_read_buffer(pcie, 0); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, srb_cmd_reg); /* Rearm buffer */ + } - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { kvaser_pciefd_read_buffer(pcie, 1); + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, srb_cmd_reg); /* Rearm buffer */ + } if (unlikely(irq & KVASER_PCIEFD_SRB_IRQ_DOF0 || irq & KVASER_PCIEFD_SRB_IRQ_DOF1 || irq & KVASER_PCIEFD_SRB_IRQ_DUF0 || irq & KVASER_PCIEFD_SRB_IRQ_DUF1)) dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq); - - iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - return irq; } static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can) @@ -1691,29 +1722,22 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev; const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); - u32 srb_irq = 0; - u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) return IRQ_NONE; + iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + if (pci_irq & irq_mask->kcan_rx0) - srb_irq = kvaser_pciefd_receive_irq(pcie); + kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { if (pci_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) - srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; - - if (srb_release) - iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); return IRQ_HANDLED; } @@ -1733,13 +1757,22 @@ static void kvaser_pciefd_teardown_can_ctrls(struct kvaser_pciefd *pcie) } } +static void kvaser_pciefd_disable_irq_srcs(struct kvaser_pciefd *pcie) +{ + unsigned int i; + + /* Masking PCI_IRQ is insufficient as running ISR will unmask it */ + iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); + for (i = 0; i < pcie->nr_channels; ++i) + iowrite32(0, pcie->can[i]->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); +} + static int kvaser_pciefd_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int ret; struct kvaser_pciefd *pcie; const struct kvaser_pciefd_irq_mask *irq_mask; - void __iomem *irq_en_base; pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL); if (!pcie) @@ -1805,8 +1838,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG); /* Enable PCI interrupts */ - irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie); - iowrite32(irq_mask->all, irq_en_base); + iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); /* Ready the DMA buffers */ iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); @@ -1820,8 +1852,7 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return 0; err_free_irq: - /* Disable PCI interrupts */ - iowrite32(0, irq_en_base); + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); err_pci_free_irq_vectors: @@ -1844,35 +1875,26 @@ static int kvaser_pciefd_probe(struct pci_dev *pdev, return ret; } -static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie) -{ - int i; - - for (i = 0; i < pcie->nr_channels; i++) { - struct kvaser_pciefd_can *can = pcie->can[i]; - - if (can) { - iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG); - unregister_candev(can->can.dev); - timer_delete(&can->bec_poll_timer); - kvaser_pciefd_pwm_stop(can); - free_candev(can->can.dev); - } - } -} - static void kvaser_pciefd_remove(struct pci_dev *pdev) { struct kvaser_pciefd *pcie = pci_get_drvdata(pdev); + unsigned int i; - kvaser_pciefd_remove_all_ctrls(pcie); + for (i = 0; i < pcie->nr_channels; ++i) { + struct kvaser_pciefd_can *can = pcie->can[i]; - /* Disable interrupts */ - iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG); - iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie)); + unregister_candev(can->can.dev); + timer_delete(&can->bec_poll_timer); + kvaser_pciefd_pwm_stop(can); + } + kvaser_pciefd_disable_irq_srcs(pcie); free_irq(pcie->pci->irq, pcie); pci_free_irq_vectors(pcie->pci); + + for (i = 0; i < pcie->nr_channels; ++i) + free_candev(pcie->can[i]->can.dev); + pci_iounmap(pdev, pcie->reg_base); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 884a6352c42b7b..c2c116ce1087c0 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2379,6 +2379,7 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev, SET_NETDEV_DEV(net_dev, dev); m_can_of_parse_mram(class_dev, mram_config_vals); + spin_lock_init(&class_dev->tx_handling_spinlock); out: return class_dev; } @@ -2462,9 +2463,9 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + unregister_candev(cdev->net); if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); - unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index 46201c126703ce..c3fb3176ce4221 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -902,15 +902,16 @@ static int rkcanfd_probe(struct platform_device *pdev) priv->can.data_bittiming_const = &rkcanfd_data_bittiming_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_BERR_REPORTING; - if (!(priv->devtype_data.quirks & RKCANFD_QUIRK_CANFD_BROKEN)) - priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; priv->can.do_set_mode = rkcanfd_set_mode; priv->can.do_get_berr_counter = rkcanfd_get_berr_counter; priv->ndev = ndev; match = device_get_match_data(&pdev->dev); - if (match) + if (match) { priv->devtype_data = *(struct rkcanfd_devtype_data *)match; + if (!(priv->devtype_data.quirks & RKCANFD_QUIRK_CANFD_BROKEN)) + priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; + } err = can_rx_offload_add_manual(ndev, &priv->offload, RKCANFD_NAPI_WEIGHT); @@ -936,8 +937,8 @@ static void rkcanfd_remove(struct platform_device *pdev) struct rkcanfd_priv *priv = platform_get_drvdata(pdev); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); rkcanfd_unregister(priv); + can_rx_offload_del(&priv->offload); free_candev(ndev); } diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index 24c6622d36bd85..58ff2ec1d9757e 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -71,12 +71,21 @@ MODULE_AUTHOR("Dario Binacchi "); #define SLCAN_CMD_LEN 1 #define SLCAN_SFF_ID_LEN 3 #define SLCAN_EFF_ID_LEN 8 +#define SLCAN_DATA_LENGTH_LEN 1 +#define SLCAN_ERROR_LEN 1 #define SLCAN_STATE_LEN 1 #define SLCAN_STATE_BE_RXCNT_LEN 3 #define SLCAN_STATE_BE_TXCNT_LEN 3 -#define SLCAN_STATE_FRAME_LEN (1 + SLCAN_CMD_LEN + \ - SLCAN_STATE_BE_RXCNT_LEN + \ - SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_STATE_MSG_LEN (SLCAN_CMD_LEN + \ + SLCAN_STATE_LEN + \ + SLCAN_STATE_BE_RXCNT_LEN + \ + SLCAN_STATE_BE_TXCNT_LEN) +#define SLCAN_ERROR_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_ERROR_LEN + \ + SLCAN_DATA_LENGTH_LEN) +#define SLCAN_FRAME_MSG_LEN_MIN (SLCAN_CMD_LEN + \ + SLCAN_SFF_ID_LEN + \ + SLCAN_DATA_LENGTH_LEN) struct slcan { struct can_priv can; @@ -176,6 +185,9 @@ static void slcan_bump_frame(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; + if (sl->rcount < SLCAN_FRAME_MSG_LEN_MIN) + return; + skb = alloc_can_skb(sl->dev, &cf); if (unlikely(!skb)) { sl->dev->stats.rx_dropped++; @@ -281,7 +293,7 @@ static void slcan_bump_state(struct slcan *sl) return; } - if (state == sl->can.state || sl->rcount < SLCAN_STATE_FRAME_LEN) + if (state == sl->can.state || sl->rcount != SLCAN_STATE_MSG_LEN) return; cmd += SLCAN_STATE_BE_RXCNT_LEN + SLCAN_CMD_LEN + 1; @@ -328,6 +340,9 @@ static void slcan_bump_err(struct slcan *sl) bool rx_errors = false, tx_errors = false, rx_over_errors = false; int i, len; + if (sl->rcount < SLCAN_ERROR_MSG_LEN_MIN) + return; + /* get len from sanitized ASCII value */ len = cmd[1]; if (len >= '0' && len < '9') @@ -456,8 +471,7 @@ static void slcan_bump(struct slcan *sl) static void slcan_unesc(struct slcan *sl, unsigned char s) { if ((s == '\r') || (s == '\a')) { /* CR or BEL ends the pdu */ - if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && - sl->rcount > 4) + if (!test_and_clear_bit(SLF_ERROR, &sl->flags)) slcan_bump(sl); sl->rcount = 0; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3bc56517fe7a99..c30b04f8fc0df8 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -75,6 +75,24 @@ static const struct can_bittiming_const mcp251xfd_data_bittiming_const = { .brp_inc = 1, }; +/* The datasheet of the mcp2518fd (DS20006027B) specifies a range of + * [-64,63] for TDCO, indicating a relative TDCO. + * + * Manual tests have shown, that using a relative TDCO configuration + * results in bus off, while an absolute configuration works. + * + * For TDCO use the max value (63) from the data sheet, but 0 as the + * minimum. + */ +static const struct can_tdc_const mcp251xfd_tdc_const = { + .tdcv_min = 0, + .tdcv_max = 63, + .tdco_min = 0, + .tdco_max = 63, + .tdcf_min = 0, + .tdcf_max = 0, +}; + static const char *__mcp251xfd_get_model_str(enum mcp251xfd_model model) { switch (model) { @@ -510,8 +528,7 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) { const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u32 val = 0; - s8 tdco; + u32 tdcmod, val = 0; int err; /* CAN Control Register @@ -575,11 +592,16 @@ static int mcp251xfd_set_bittiming(const struct mcp251xfd_priv *priv) return err; /* Transmitter Delay Compensation */ - tdco = clamp_t(int, dbt->brp * (dbt->prop_seg + dbt->phase_seg1), - -64, 63); - val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, - MCP251XFD_REG_TDC_TDCMOD_AUTO) | - FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, tdco); + if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_AUTO) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_AUTO; + else if (priv->can.ctrlmode & CAN_CTRLMODE_TDC_MANUAL) + tdcmod = MCP251XFD_REG_TDC_TDCMOD_MANUAL; + else + tdcmod = MCP251XFD_REG_TDC_TDCMOD_DISABLED; + + val = FIELD_PREP(MCP251XFD_REG_TDC_TDCMOD_MASK, tdcmod) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCV_MASK, priv->can.tdc.tdcv) | + FIELD_PREP(MCP251XFD_REG_TDC_TDCO_MASK, priv->can.tdc.tdco); return regmap_write(priv->map_reg, MCP251XFD_REG_TDC, val); } @@ -2083,10 +2105,12 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.do_get_berr_counter = mcp251xfd_get_berr_counter; priv->can.bittiming_const = &mcp251xfd_bittiming_const; priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; + priv->can.tdc_const = &mcp251xfd_tdc_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | - CAN_CTRLMODE_CC_LEN8_DLC; + CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_TDC_AUTO | + CAN_CTRLMODE_TDC_MANUAL; set_bit(MCP251XFD_FLAGS_DOWN, priv->flags); priv->ndev = ndev; priv->spi = spi; @@ -2174,8 +2198,8 @@ static void mcp251xfd_remove(struct spi_device *spi) struct mcp251xfd_priv *priv = spi_get_drvdata(spi); struct net_device *ndev = priv->ndev; - can_rx_offload_del(&priv->offload); mcp251xfd_unregister(priv); + can_rx_offload_del(&priv->offload); spi->max_speed_hz = priv->spi_max_speed_hz_orig; free_candev(ndev); } diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 61d164ffb3ae97..dc2f4adac9bc96 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -326,6 +328,26 @@ static void b53_get_vlan_entry(struct b53_device *dev, u16 vid, } } +static void b53_set_eap_mode(struct b53_device *dev, int port, int mode) +{ + u64 eap_conf; + + if (is5325(dev) || is5365(dev) || dev->chip_id == BCM5389_DEVICE_ID) + return; + + b53_read64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), &eap_conf); + + if (is63xx(dev)) { + eap_conf &= ~EAP_MODE_MASK_63XX; + eap_conf |= (u64)mode << EAP_MODE_SHIFT_63XX; + } else { + eap_conf &= ~EAP_MODE_MASK; + eap_conf |= (u64)mode << EAP_MODE_SHIFT; + } + + b53_write64(dev, B53_EAP_PAGE, B53_PORT_EAP_CONF(port), eap_conf); +} + static void b53_set_forwarding(struct b53_device *dev, int enable) { u8 mgmt; @@ -373,15 +395,17 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, &vc5); } + vc1 &= ~VC1_RX_MCST_FWD_EN; + if (enable) { vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID; - vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN; + vc1 |= VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; if (enable_filtering) { vc4 |= VC4_ING_VID_VIO_DROP << VC4_ING_VID_CHECK_S; vc5 |= VC5_DROP_VTABLE_MISS; } else { - vc4 |= VC4_ING_VID_VIO_FWD << VC4_ING_VID_CHECK_S; + vc4 |= VC4_NO_ING_VID_CHK << VC4_ING_VID_CHECK_S; vc5 &= ~VC5_DROP_VTABLE_MISS; } @@ -393,7 +417,7 @@ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, } else { vc0 &= ~(VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID); - vc1 &= ~(VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN); + vc1 &= ~VC1_RX_MCST_UNTAG_EN; vc4 &= ~VC4_ING_VID_CHECK_MASK; vc5 &= ~VC5_DROP_VTABLE_MISS; @@ -576,6 +600,25 @@ static void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable) b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg); } +int b53_setup_port(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + b53_port_set_ucast_flood(dev, port, true); + b53_port_set_mcast_flood(dev, port, true); + b53_port_set_learning(dev, port, false); + + /* Force all traffic to go to the CPU port to prevent the ASIC from + * trying to forward to bridged ports on matching FDB entries, then + * dropping frames because it isn't allowed to forward there. + */ + if (dsa_is_user_port(ds, port)) + b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED); + + return 0; +} +EXPORT_SYMBOL(b53_setup_port); + int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; @@ -588,10 +631,6 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); - if (dev->ops->irq_enable) ret = dev->ops->irq_enable(dev, port); if (ret) @@ -722,10 +761,6 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); b53_brcm_hdr_setup(dev->ds, port); - - b53_port_set_ucast_flood(dev, port, true); - b53_port_set_mcast_flood(dev, port, true); - b53_port_set_learning(dev, port, false); } static void b53_enable_mib(struct b53_device *dev) @@ -737,6 +772,15 @@ static void b53_enable_mib(struct b53_device *dev) b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); } +static void b53_enable_stp(struct b53_device *dev) +{ + u8 gc; + + b53_read8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, &gc); + gc |= GC_RX_BPDU_EN; + b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); +} + static u16 b53_default_pvid(struct b53_device *dev) { if (is5325(dev) || is5365(dev)) @@ -752,6 +796,22 @@ static bool b53_vlan_port_needs_forced_tagged(struct dsa_switch *ds, int port) return dev->tag_protocol == DSA_TAG_PROTO_NONE && dsa_is_cpu_port(ds, port); } +static bool b53_vlan_port_may_join_untagged(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + struct dsa_port *dp; + + if (!dev->vlan_filtering) + return true; + + dp = dsa_to_port(ds, port); + + if (dsa_port_is_cpu(dp)) + return true; + + return dp->bridge == NULL; +} + int b53_configure_vlan(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; @@ -770,7 +830,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_do_vlan_op(dev, VTA_CMD_CLEAR); } - b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering); + b53_enable_vlan(dev, -1, dev->vlan_enabled, dev->vlan_filtering); /* Create an untagged VLAN entry for the default PVID in case * CONFIG_VLAN_8021Q is disabled and there are no calls to @@ -778,26 +838,39 @@ int b53_configure_vlan(struct dsa_switch *ds) * entry. Do this only when the tagging protocol is not * DSA_TAG_PROTO_NONE */ + v = &dev->vlans[def_vid]; b53_for_each_port(dev, i) { - v = &dev->vlans[def_vid]; - v->members |= BIT(i); + if (!b53_vlan_port_may_join_untagged(ds, i)) + continue; + + vl.members |= BIT(i); if (!b53_vlan_port_needs_forced_tagged(ds, i)) - v->untag = v->members; - b53_write16(dev, B53_VLAN_PAGE, - B53_VLAN_PORT_DEF_TAG(i), def_vid); + vl.untag = vl.members; + b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(i), + def_vid); } + b53_set_vlan_entry(dev, def_vid, &vl); - /* Upon initial call we have not set-up any VLANs, but upon - * system resume, we need to restore all VLAN entries. - */ - for (vid = def_vid; vid < dev->num_vlans; vid++) { - v = &dev->vlans[vid]; + if (dev->vlan_filtering) { + /* Upon initial call we have not set-up any VLANs, but upon + * system resume, we need to restore all VLAN entries. + */ + for (vid = def_vid + 1; vid < dev->num_vlans; vid++) { + v = &dev->vlans[vid]; - if (!v->members) - continue; + if (!v->members) + continue; + + b53_set_vlan_entry(dev, vid, v); + b53_fast_age_vlan(dev, vid); + } - b53_set_vlan_entry(dev, vid, v); - b53_fast_age_vlan(dev, vid); + b53_for_each_port(dev, i) { + if (!dsa_is_cpu_port(ds, i)) + b53_write16(dev, B53_VLAN_PAGE, + B53_VLAN_PORT_DEF_TAG(i), + dev->ports[i].pvid); + } } return 0; @@ -876,6 +949,7 @@ static int b53_switch_reset(struct b53_device *dev) } b53_enable_mib(dev); + b53_enable_stp(dev); return b53_flush_arl(dev, FAST_AGE_STATIC); } @@ -1115,7 +1189,9 @@ EXPORT_SYMBOL(b53_setup_devlink_resources); static int b53_setup(struct dsa_switch *ds) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; unsigned int port; + u16 pvid; int ret; /* Request bridge PVID untagged when DSA_TAG_PROTO_NONE is set @@ -1123,12 +1199,30 @@ static int b53_setup(struct dsa_switch *ds) */ ds->untag_bridge_pvid = dev->tag_protocol == DSA_TAG_PROTO_NONE; + /* The switch does not tell us the original VLAN for untagged + * packets, so keep the CPU port always tagged. + */ + ds->untag_vlan_aware_bridge_pvid = true; + + /* Ageing time is set in seconds */ + ds->ageing_time_min = 1 * 1000; + ds->ageing_time_max = AGE_TIME_MAX * 1000; + ret = b53_reset_switch(dev); if (ret) { dev_err(ds->dev, "failed to reset switch\n"); return ret; } + /* setup default vlan for filtering mode */ + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + b53_for_each_port(dev, port) { + vl->members |= BIT(port); + if (!b53_vlan_port_needs_forced_tagged(ds, port)) + vl->untag |= BIT(port); + } + b53_reset_mib(dev); ret = b53_apply_config(dev); @@ -1229,41 +1323,17 @@ static void b53_adjust_63xx_rgmii(struct dsa_switch *ds, int port, phy_interface_t interface) { struct b53_device *dev = ds->priv; - u8 rgmii_ctrl = 0, off; - - if (port == dev->imp_port) - off = B53_RGMII_CTRL_IMP; - else - off = B53_RGMII_CTRL_P(port); - - b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl); + u8 rgmii_ctrl = 0; - switch (interface) { - case PHY_INTERFACE_MODE_RGMII_ID: - rgmii_ctrl |= (RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); - break; - case PHY_INTERFACE_MODE_RGMII_RXID: - rgmii_ctrl &= ~(RGMII_CTRL_DLL_TXC); - rgmii_ctrl |= RGMII_CTRL_DLL_RXC; - break; - case PHY_INTERFACE_MODE_RGMII_TXID: - rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC); - rgmii_ctrl |= RGMII_CTRL_DLL_TXC; - break; - case PHY_INTERFACE_MODE_RGMII: - default: - rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); - break; - } + b53_read8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), &rgmii_ctrl); + rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); - if (port != dev->imp_port) { - if (is63268(dev)) - rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE; + if (is63268(dev)) + rgmii_ctrl |= RGMII_CTRL_MII_OVERRIDE; - rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII; - } + rgmii_ctrl |= RGMII_CTRL_ENABLE_GMII; - b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl); + b53_write8(dev, B53_CTRL_PAGE, B53_RGMII_CTRL_P(port), rgmii_ctrl); dev_dbg(ds->dev, "Configured port %d for %s\n", port, phy_modes(interface)); @@ -1284,8 +1354,7 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port, * tx_clk aligned timing (restoring to reset defaults) */ b53_read8(dev, B53_CTRL_PAGE, off, &rgmii_ctrl); - rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC | - RGMII_CTRL_TIMING_SEL); + rgmii_ctrl &= ~(RGMII_CTRL_DLL_RXC | RGMII_CTRL_DLL_TXC); /* PHY_INTERFACE_MODE_RGMII_TXID means TX internal delay, make * sure that we enable the port TX clock internal delay to @@ -1305,7 +1374,10 @@ static void b53_adjust_531x5_rgmii(struct dsa_switch *ds, int port, rgmii_ctrl |= RGMII_CTRL_DLL_TXC; if (interface == PHY_INTERFACE_MODE_RGMII) rgmii_ctrl |= RGMII_CTRL_DLL_TXC | RGMII_CTRL_DLL_RXC; - rgmii_ctrl |= RGMII_CTRL_TIMING_SEL; + + if (dev->chip_id != BCM53115_DEVICE_ID) + rgmii_ctrl |= RGMII_CTRL_TIMING_SEL; + b53_write8(dev, B53_CTRL_PAGE, off, rgmii_ctrl); dev_info(ds->dev, "Configured port %d for %s\n", port, @@ -1369,6 +1441,10 @@ static void b53_phylink_get_caps(struct dsa_switch *ds, int port, __set_bit(PHY_INTERFACE_MODE_MII, config->supported_interfaces); __set_bit(PHY_INTERFACE_MODE_REVMII, config->supported_interfaces); + /* BCM63xx RGMII ports support RGMII */ + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) + phy_interface_set_rgmii(config->supported_interfaces); + config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100; @@ -1408,7 +1484,7 @@ static void b53_phylink_mac_config(struct phylink_config *config, struct b53_device *dev = ds->priv; int port = dp->index; - if (is63xx(dev) && port >= B53_63XX_RGMII0) + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) b53_adjust_63xx_rgmii(ds, port, interface); if (mode == MLO_AN_FIXED) { @@ -1482,7 +1558,10 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, { struct b53_device *dev = ds->priv; - b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering); + if (dev->vlan_filtering != vlan_filtering) { + dev->vlan_filtering = vlan_filtering; + b53_apply_config(dev); + } return 0; } @@ -1507,7 +1586,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port, if (vlan->vid >= dev->num_vlans) return -ERANGE; - b53_enable_vlan(dev, port, true, ds->vlan_filtering); + b53_enable_vlan(dev, port, true, dev->vlan_filtering); return 0; } @@ -1520,18 +1599,29 @@ int b53_vlan_add(struct dsa_switch *ds, int port, bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct b53_vlan *vl; + u16 old_pvid, new_pvid; int err; err = b53_vlan_prepare(ds, port, vlan); if (err) return err; - vl = &dev->vlans[vlan->vid]; + if (vlan->vid == 0) + return 0; + + old_pvid = dev->ports[port].pvid; + if (pvid) + new_pvid = vlan->vid; + else if (!pvid && vlan->vid == old_pvid) + new_pvid = b53_default_pvid(dev); + else + new_pvid = old_pvid; + dev->ports[port].pvid = new_pvid; - b53_get_vlan_entry(dev, vlan->vid, vl); + vl = &dev->vlans[vlan->vid]; - if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) - untagged = true; + if (dsa_is_cpu_port(ds, port)) + untagged = false; vl->members |= BIT(port); if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) @@ -1539,13 +1629,16 @@ int b53_vlan_add(struct dsa_switch *ds, int port, else vl->untag &= ~BIT(port); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); - if (pvid && !dsa_is_cpu_port(ds, port)) { + if (!dsa_is_cpu_port(ds, port) && new_pvid != old_pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), - vlan->vid); - b53_fast_age_vlan(dev, vlan->vid); + new_pvid); + b53_fast_age_vlan(dev, old_pvid); } return 0; @@ -1560,20 +1653,25 @@ int b53_vlan_del(struct dsa_switch *ds, int port, struct b53_vlan *vl; u16 pvid; - b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); + if (vlan->vid == 0) + return 0; - vl = &dev->vlans[vlan->vid]; + pvid = dev->ports[port].pvid; - b53_get_vlan_entry(dev, vlan->vid, vl); + vl = &dev->vlans[vlan->vid]; vl->members &= ~BIT(port); if (pvid == vlan->vid) pvid = b53_default_pvid(dev); + dev->ports[port].pvid = pvid; if (untagged && !b53_vlan_port_needs_forced_tagged(ds, port)) vl->untag &= ~(BIT(port)); + if (!dev->vlan_filtering) + return 0; + b53_set_vlan_entry(dev, vlan->vid, vl); b53_fast_age_vlan(dev, vlan->vid); @@ -1906,8 +2004,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack) { struct b53_device *dev = ds->priv; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; - u16 pvlan, reg; + u16 pvlan, reg, pvid; unsigned int i; /* On 7278, port 7 which connects to the ASP should only receive @@ -1916,15 +2015,26 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, if (dev->chip_id == BCM7278_DEVICE_ID && port == 7) return -EINVAL; - /* Make this port leave the all VLANs join since we will have proper - * VLAN entries from now on - */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg &= ~BIT(port); - if ((reg & BIT(cpu_port)) == BIT(cpu_port)) - reg &= ~BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + + if (dev->vlan_filtering) { + /* Make this port leave the all VLANs join since we will have + * proper VLAN entries from now on + */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + ®); + reg &= ~BIT(port); + if ((reg & BIT(cpu_port)) == BIT(cpu_port)) + reg &= ~BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, + reg); + } + + b53_get_vlan_entry(dev, pvid, vl); + vl->members &= ~BIT(port); + b53_set_vlan_entry(dev, pvid, vl); } b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan); @@ -1944,6 +2054,9 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, pvlan |= BIT(i); } + /* Disable redirection of unknown SA to the CPU port */ + b53_set_eap_mode(dev, port, EAP_MODE_BASIC); + /* Configure the local port VLAN control membership to include * remote ports and update the local port bitmask */ @@ -1957,7 +2070,7 @@ EXPORT_SYMBOL(b53_br_join); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) { struct b53_device *dev = ds->priv; - struct b53_vlan *vl = &dev->vlans[0]; + struct b53_vlan *vl; s8 cpu_port = dsa_to_port(ds, port)->cpu_dp->index; unsigned int i; u16 pvlan, reg, pvid; @@ -1979,22 +2092,27 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) pvlan &= ~BIT(i); } + /* Enable redirection of unknown SA to the CPU port */ + b53_set_eap_mode(dev, port, EAP_MODE_SIMPLIFIED); + b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan); dev->ports[port].vlan_ctl_mask = pvlan; pvid = b53_default_pvid(dev); + vl = &dev->vlans[pvid]; + + if (dev->vlan_filtering) { + /* Make this port join all VLANs without VLAN entries */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg |= BIT(port); + if (!(reg & BIT(cpu_port))) + reg |= BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } - /* Make this port join all VLANs without VLAN entries */ - if (is58xx(dev)) { - b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); - reg |= BIT(port); - if (!(reg & BIT(cpu_port))) - reg |= BIT(cpu_port); - b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); - } else { b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); + vl->members |= BIT(port); b53_set_vlan_entry(dev, pvid, vl); } } @@ -2214,6 +2332,9 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) { int ret; + if (!b53_support_eee(ds, port)) + return 0; + ret = phy_init_eee(phy, false); if (ret) return 0; @@ -2228,7 +2349,7 @@ bool b53_support_eee(struct dsa_switch *ds, int port) { struct b53_device *dev = ds->priv; - return !is5325(dev) && !is5365(dev); + return !is5325(dev) && !is5365(dev) && !is63xx(dev); } EXPORT_SYMBOL(b53_support_eee); @@ -2272,6 +2393,28 @@ static int b53_get_max_mtu(struct dsa_switch *ds, int port) return B53_MAX_MTU; } +int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs) +{ + struct b53_device *dev = ds->priv; + u32 atc; + int reg; + + if (is63xx(dev)) + reg = B53_AGING_TIME_CONTROL_63XX; + else + reg = B53_AGING_TIME_CONTROL; + + atc = DIV_ROUND_CLOSEST(msecs, 1000); + + if (!is5325(dev) && !is5365(dev)) + atc |= AGE_CHANGE; + + b53_write32(dev, B53_MGMT_PAGE, reg, atc); + + return 0; +} +EXPORT_SYMBOL_GPL(b53_set_ageing_time); + static const struct phylink_mac_ops b53_phylink_mac_ops = { .mac_select_pcs = b53_phylink_mac_select_pcs, .mac_config = b53_phylink_mac_config, @@ -2290,10 +2433,12 @@ static const struct dsa_switch_ops b53_switch_ops = { .phy_read = b53_phy_read16, .phy_write = b53_phy_write16, .phylink_get_caps = b53_phylink_get_caps, + .port_setup = b53_setup_port, .port_enable = b53_enable_port, .port_disable = b53_disable_port, .support_eee = b53_support_eee, .set_mac_eee = b53_set_mac_eee, + .set_ageing_time = b53_set_ageing_time, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, @@ -2747,6 +2892,7 @@ struct b53_device *b53_switch_alloc(struct device *base, ds->ops = &b53_switch_ops; ds->phylink_mac_ops = &b53_phylink_mac_ops; dev->vlan_enabled = true; + dev->vlan_filtering = false; /* Let DSA handle the case were multiple bridges span the same switch * device and different VLAN awareness settings are requested, which * would be breaking filtering semantics for any of the other bridge diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 0166c37a13a7fc..a5ef7071ba07b1 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -96,6 +96,7 @@ struct b53_pcs { struct b53_port { u16 vlan_ctl_mask; + u16 pvid; struct ethtool_keee eee; }; @@ -147,6 +148,7 @@ struct b53_device { unsigned int num_vlans; struct b53_vlan *vlans; bool vlan_enabled; + bool vlan_filtering; unsigned int num_ports; struct b53_port *ports; @@ -341,6 +343,7 @@ void b53_get_strings(struct dsa_switch *ds, int port, u32 stringset, void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds, int port, int sset); void b53_get_ethtool_phy_stats(struct dsa_switch *ds, int port, uint64_t *data); +int b53_set_ageing_time(struct dsa_switch *ds, unsigned int msecs); int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack); void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); @@ -382,6 +385,7 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +int b53_setup_port(struct dsa_switch *ds, int port); int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); void b53_disable_port(struct dsa_switch *ds, int port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index bfbcb66bef6626..1fbc5a204bc721 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -50,6 +50,9 @@ /* Jumbo Frame Registers */ #define B53_JUMBO_PAGE 0x40 +/* EAP Registers */ +#define B53_EAP_PAGE 0x42 + /* EEE Control Registers Page */ #define B53_EEE_PAGE 0x92 @@ -217,6 +220,13 @@ #define BRCM_HDR_P5_EN BIT(1) /* Enable tagging on port 5 */ #define BRCM_HDR_P7_EN BIT(2) /* Enable tagging on port 7 */ +/* Aging Time control register (32 bit) */ +#define B53_AGING_TIME_CONTROL 0x06 +#define B53_AGING_TIME_CONTROL_63XX 0x08 +#define AGE_CHANGE BIT(20) +#define AGE_TIME_MASK 0x7ffff +#define AGE_TIME_MAX 1048575 + /* Mirror capture control register (16 bit) */ #define B53_MIR_CAP_CTL 0x10 #define CAP_PORT_MASK 0xf @@ -480,6 +490,17 @@ #define JMS_MIN_SIZE 1518 #define JMS_MAX_SIZE 9724 +/************************************************************************* + * EAP Page Registers + *************************************************************************/ +#define B53_PORT_EAP_CONF(i) (0x20 + 8 * (i)) +#define EAP_MODE_SHIFT 51 +#define EAP_MODE_SHIFT_63XX 50 +#define EAP_MODE_MASK (0x3ull << EAP_MODE_SHIFT) +#define EAP_MODE_MASK_63XX (0x3ull << EAP_MODE_SHIFT_63XX) +#define EAP_MODE_BASIC 0 +#define EAP_MODE_SIMPLIFIED 3 + /************************************************************************* * EEE Configuration Page Registers *************************************************************************/ diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index fa2bf3fa90191a..960685596093b6 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1230,10 +1230,12 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .resume = bcm_sf2_sw_resume, .get_wol = bcm_sf2_sw_get_wol, .set_wol = bcm_sf2_sw_set_wol, + .port_setup = b53_setup_port, .port_enable = bcm_sf2_port_setup, .port_disable = bcm_sf2_port_disable, .support_eee = b53_support_eee, .set_mac_eee = b53_set_mac_eee, + .set_ageing_time = b53_set_ageing_time, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 89f0796894af66..f95a9aac56ee1b 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -265,16 +265,70 @@ static void ksz_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); +/** + * ksz_phylink_mac_disable_tx_lpi() - Callback to signal LPI support (Dummy) + * @config: phylink config structure + * + * This function is a dummy handler. See ksz_phylink_mac_enable_tx_lpi() for + * a detailed explanation of EEE/LPI handling in KSZ switches. + */ +static void ksz_phylink_mac_disable_tx_lpi(struct phylink_config *config) +{ +} + +/** + * ksz_phylink_mac_enable_tx_lpi() - Callback to signal LPI support (Dummy) + * @config: phylink config structure + * @timer: timer value before entering LPI (unused) + * @tx_clock_stop: whether to stop the TX clock in LPI mode (unused) + * + * This function signals to phylink that the driver architecture supports + * LPI management, enabling phylink to control EEE advertisement during + * negotiation according to IEEE Std 802.3 (Clause 78). + * + * Hardware Management of EEE/LPI State: + * For KSZ switch ports with integrated PHYs (e.g., KSZ9893R ports 1-2), + * observation and testing suggest that the actual EEE / Low Power Idle (LPI) + * state transitions are managed autonomously by the hardware based on + * the auto-negotiation results. (Note: While the datasheet describes EEE + * operation based on negotiation, it doesn't explicitly detail the internal + * MAC/PHY interaction, so autonomous hardware management of the MAC state + * for LPI is inferred from observed behavior). + * This hardware control, consistent with the switch's ability to operate + * autonomously via strapping, means MAC-level software intervention is not + * required or exposed for managing the LPI state once EEE is negotiated. + * (Ref: KSZ9893R Data Sheet DS00002420D, primarily Section 4.7.5 explaining + * EEE, also Sections 4.1.7 on Auto-Negotiation and 3.2.1 on Configuration + * Straps). + * + * Additionally, ports configured as MAC interfaces (e.g., KSZ9893R port 3) + * lack documented MAC-level LPI control. + * + * Therefore, this callback performs no action and serves primarily to inform + * phylink of LPI awareness and to document the inferred hardware behavior. + * + * Returns: 0 (Always success) + */ +static int ksz_phylink_mac_enable_tx_lpi(struct phylink_config *config, + u32 timer, bool tx_clock_stop) +{ + return 0; +} + static const struct phylink_mac_ops ksz88x3_phylink_mac_ops = { .mac_config = ksz88x3_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz8_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct phylink_mac_ops ksz8_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz8_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops ksz88xx_dev_ops = { @@ -358,6 +412,8 @@ static const struct phylink_mac_ops ksz9477_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz9477_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops ksz9477_dev_ops = { @@ -401,6 +457,8 @@ static const struct phylink_mac_ops lan937x_phylink_mac_ops = { .mac_config = ksz_phylink_mac_config, .mac_link_down = ksz_phylink_mac_link_down, .mac_link_up = ksz9477_phylink_mac_link_up, + .mac_disable_tx_lpi = ksz_phylink_mac_disable_tx_lpi, + .mac_enable_tx_lpi = ksz_phylink_mac_enable_tx_lpi, }; static const struct ksz_dev_ops lan937x_dev_ops = { @@ -2016,6 +2074,18 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port, if (dev->dev_ops->get_caps) dev->dev_ops->get_caps(dev, port, config); + + if (ds->ops->support_eee && ds->ops->support_eee(ds, port)) { + memcpy(config->lpi_interfaces, config->supported_interfaces, + sizeof(config->lpi_interfaces)); + + config->lpi_capabilities = MAC_100FD; + if (dev->info->gbit_capable[port]) + config->lpi_capabilities |= MAC_1000FD; + + /* EEE is fully operational */ + config->eee_enabled_default = true; + } } void ksz_r_mib_stats64(struct ksz_device *dev, int port) @@ -3008,31 +3078,6 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; - case KSZ8567_CHIP_ID: - /* KSZ8567R Errata DS80000752C Module 4 */ - case KSZ8765_CHIP_ID: - case KSZ8794_CHIP_ID: - case KSZ8795_CHIP_ID: - /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */ - case KSZ9477_CHIP_ID: - /* KSZ9477S Errata DS80000754A Module 4 */ - case KSZ9567_CHIP_ID: - /* KSZ9567S Errata DS80000756A Module 4 */ - case KSZ9896_CHIP_ID: - /* KSZ9896C Errata DS80000757A Module 3 */ - case KSZ9897_CHIP_ID: - case LAN9646_CHIP_ID: - /* KSZ9897R Errata DS80000758C Module 4 */ - /* Energy Efficient Ethernet (EEE) feature select must be manually disabled - * The EEE feature is enabled by default, but it is not fully - * operational. It must be manually disabled through register - * controls. If not disabled, the PHY ports can auto-negotiate - * to enable EEE, and this feature can cause link drops when - * linked to another device supporting EEE. - * - * The same item appears in the errata for all switches above. - */ - return MICREL_NO_EEE; } return 0; @@ -3466,6 +3511,20 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port) return -EOPNOTSUPP; } +/** + * ksz_support_eee - Determine Energy Efficient Ethernet (EEE) support for a + * port + * @ds: Pointer to the DSA switch structure + * @port: Port number to check + * + * This function also documents devices where EEE was initially advertised but + * later withdrawn due to reliability issues, as described in official errata + * documents. These devices are explicitly listed to record known limitations, + * even if there is no technical necessity for runtime checks. + * + * Returns: true if the internal PHY on the given port supports fully + * operational EEE, false otherwise. + */ static bool ksz_support_eee(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; @@ -3475,15 +3534,35 @@ static bool ksz_support_eee(struct dsa_switch *ds, int port) switch (dev->chip_id) { case KSZ8563_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9893_CHIP_ID: + return true; case KSZ8567_CHIP_ID: + /* KSZ8567R Errata DS80000752C Module 4 */ + case KSZ8765_CHIP_ID: + case KSZ8794_CHIP_ID: + case KSZ8795_CHIP_ID: + /* KSZ879x/KSZ877x/KSZ876x Errata DS80000687C Module 2 */ case KSZ9477_CHIP_ID: - case KSZ9563_CHIP_ID: + /* KSZ9477S Errata DS80000754A Module 4 */ case KSZ9567_CHIP_ID: - case KSZ9893_CHIP_ID: + /* KSZ9567S Errata DS80000756A Module 4 */ case KSZ9896_CHIP_ID: + /* KSZ9896C Errata DS80000757A Module 3 */ case KSZ9897_CHIP_ID: case LAN9646_CHIP_ID: - return true; + /* KSZ9897R Errata DS80000758C Module 4 */ + /* Energy Efficient Ethernet (EEE) feature select must be + * manually disabled + * The EEE feature is enabled by default, but it is not fully + * operational. It must be manually disabled through register + * controls. If not disabled, the PHY ports can auto-negotiate + * to enable EEE, and this feature can cause link drops when + * linked to another device supporting EEE. + * + * The same item appears in the errata for all switches above. + */ + break; } return false; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d70399bce5b9ee..c5d6628d798075 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2419,6 +2419,9 @@ mt7531_setup_common(struct dsa_switch *ds) struct mt7530_priv *priv = ds->priv; int ret, i; + ds->assisted_learning_on_cpu_port = true; + ds->mtu_enforcement_ingress = true; + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ @@ -2571,9 +2574,6 @@ mt7531_setup(struct dsa_switch *ds) if (ret) return ret; - ds->assisted_learning_on_cpu_port = true; - ds->mtu_enforcement_ingress = true; - return 0; } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 29a89ab4b78946..08db846cda8dec 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1852,6 +1852,8 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, if (!chip->info->ops->vtu_getnext) return -EOPNOTSUPP; + memset(entry, 0, sizeof(*entry)); + entry->vid = vid ? vid - 1 : mv88e6xxx_max_vid(chip); entry->valid = false; @@ -1960,7 +1962,16 @@ static int mv88e6xxx_mst_put(struct mv88e6xxx_chip *chip, u8 sid) struct mv88e6xxx_mst *mst, *tmp; int err; - if (!sid) + /* If the SID is zero, it is for a VLAN mapped to the default MSTI, + * and mv88e6xxx_stu_setup() made sure it is always present, and thus, + * should not be removed here. + * + * If the chip lacks STU support, numerically the "sid" variable will + * happen to also be zero, but we don't want to rely on that fact, so + * we explicitly test that first. In that case, there is also nothing + * to do here. + */ + if (!mv88e6xxx_has_stu(chip) || !sid) return 0; list_for_each_entry_safe(mst, tmp, &chip->msts, node) { diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 795c8df7b6a743..195460a0a0d418 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -736,7 +736,8 @@ void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds) int i; for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++) - dsa_devlink_region_destroy(chip->regions[i]); + if (chip->regions[i]) + dsa_devlink_region_destroy(chip->regions[i]); } void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 940f1b71226d64..7b35d24c38d765 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1543,7 +1543,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) struct tc_taprio_qopt_offload *taprio; struct ocelot_port *ocelot_port; struct timespec64 base_ts; - int port; + int i, port; u32 val; mutex_lock(&ocelot->fwd_domain_lock); @@ -1575,6 +1575,9 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M, QSYS_PARAM_CFG_REG_3); + for (i = 0; i < taprio->num_entries; i++) + vsc9959_tas_gcl_set(ocelot, i, &taprio->entries[i]); + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, QSYS_TAS_PARAM_CFG_CTRL); diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index f8454f3b6f9c5d..f674c400f05b29 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2081,6 +2081,7 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, switch (state) { case BR_STATE_DISABLED: case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: /* From UM10944 description of DRPDTAG (why put this there?): * "Management traffic flows to the port regardless of the state * of the INGRESS flag". So BPDUs are still be allowed to pass. @@ -2090,11 +2091,6 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, mac[port].egress = false; mac[port].dyn_learn = false; break; - case BR_STATE_LISTENING: - mac[port].ingress = true; - mac[port].egress = false; - mac[port].dyn_learn = false; - break; case BR_STATE_LEARNING: mac[port].ingress = true; mac[port].egress = false; diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index d748dc6de92367..af28a9300a15c7 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -67,15 +67,6 @@ static void airoha_qdma_irq_disable(struct airoha_qdma *qdma, int index, airoha_qdma_set_irqmask(qdma, index, mask, 0); } -static bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) -{ - /* GDM1 port on EN7581 SoC is connected to the lan dsa switch. - * GDM{2,3,4} can be used as wan port connected to an external - * phy module. - */ - return port->id == 1; -} - static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr) { struct airoha_eth *eth = port->qdma->eth; @@ -89,6 +80,8 @@ static void airoha_set_macaddr(struct airoha_gdm_port *port, const u8 *addr) val = (addr[3] << 16) | (addr[4] << 8) | addr[5]; airoha_fe_wr(eth, REG_FE_MAC_LMIN(reg), val); airoha_fe_wr(eth, REG_FE_MAC_LMAX(reg), val); + + airoha_ppe_init_upd_mem(port); } static void airoha_set_gdm_port_fwd_cfg(struct airoha_eth *eth, u32 addr, @@ -614,7 +607,6 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) struct airoha_queue_entry *e = &q->entry[q->tail]; struct airoha_qdma_desc *desc = &q->desc[q->tail]; u32 hash, reason, msg1 = le32_to_cpu(desc->msg1); - dma_addr_t dma_addr = le32_to_cpu(desc->addr); struct page *page = virt_to_head_page(e->buf); u32 desc_ctrl = le32_to_cpu(desc->ctrl); struct airoha_gdm_port *port; @@ -623,22 +615,16 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) if (!(desc_ctrl & QDMA_DESC_DONE_MASK)) break; - if (!dma_addr) - break; - - len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); - if (!len) - break; - q->tail = (q->tail + 1) % q->ndesc; q->queued--; - dma_sync_single_for_cpu(eth->dev, dma_addr, + dma_sync_single_for_cpu(eth->dev, e->dma_addr, SKB_WITH_OVERHEAD(q->buf_size), dir); + len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl); data_len = q->skb ? q->buf_size : SKB_WITH_OVERHEAD(q->buf_size); - if (data_len < len) + if (!len || data_len < len) goto free_frag; p = airoha_qdma_get_gdm_port(eth, desc); @@ -701,9 +687,12 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) q->skb = NULL; continue; free_frag: - page_pool_put_full_page(q->page_pool, page, true); - dev_kfree_skb(q->skb); - q->skb = NULL; + if (q->skb) { + dev_kfree_skb(q->skb); + q->skb = NULL; + } else { + page_pool_put_full_page(q->page_pool, page, true); + } } airoha_qdma_fill_rx_queue(q); @@ -1072,7 +1061,7 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) LMGR_INIT_START | LMGR_SRAM_MODE_MASK | HW_FWD_DESC_NUM_MASK, FIELD_PREP(HW_FWD_DESC_NUM_MASK, HW_DSCP_NUM) | - LMGR_INIT_START); + LMGR_INIT_START | LMGR_SRAM_MODE_MASK); return read_poll_timeout(airoha_qdma_rr, status, !(status & LMGR_INIT_START), USEC_PER_MSEC, @@ -2545,7 +2534,15 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, if (err) return err; - return register_netdev(dev); + err = register_netdev(dev); + if (err) + goto free_metadata_dst; + + return 0; + +free_metadata_dst: + airoha_metadata_dst_free(port); + return err; } static int airoha_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index ec8908f904c619..2bf6b1a2dd9b03 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -532,6 +532,15 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val); #define airoha_qdma_clear(qdma, offset, val) \ airoha_rmw((qdma)->regs, (offset), (val), 0) +static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) +{ + /* GDM1 port on EN7581 SoC is connected to the lan dsa switch. + * GDM{2,3,4} can be used as wan port connected to an external + * phy module. + */ + return port->id == 1; +} + bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); @@ -540,6 +549,7 @@ int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); +void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port); struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe, u32 hash); diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 7a5710f9ccf6a4..ead0625e781f57 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -104,12 +104,14 @@ struct ppe_mbox_data { u8 xpon_hal_api; u8 wan_xsi; u8 ct_joyme4; - int ppe_type; - int wan_mode; - int wan_sel; + u8 max_packet; + u8 rsv[3]; + u32 ppe_type; + u32 wan_mode; + u32 wan_sel; } init_info; struct { - int func_id; + u32 func_id; u32 size; u32 data; } set_info; diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index f10dab935cab6f..1b8f21f808890e 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -206,6 +206,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, int dsa_port = airoha_get_dsa_port(&dev); struct airoha_foe_mac_info_common *l2; u32 qdata, ports_pad, val; + u8 smac_id = 0xf; memset(hwe, 0, sizeof(*hwe)); @@ -234,6 +235,14 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, else pse_port = 2; /* uplink relies on GDM2 loopback */ val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port); + + /* For downlink traffic consume SRAM memory for hw forwarding + * descriptors queue. + */ + if (airhoa_is_lan_gdm_port(port)) + val |= AIROHA_FOE_IB2_FAST_PATH; + + smac_id = port->id; } if (is_multicast_ether_addr(data->eth.h_dest)) @@ -274,7 +283,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, hwe->ipv4.l2.src_mac_lo = get_unaligned_be16(data->eth.h_source + 4); } else { - l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, 0xf); + l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, smac_id); } if (data->vlan.num) { @@ -862,6 +871,27 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash) airoha_ppe_foe_insert_entry(ppe, hash); } +void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) +{ + struct airoha_eth *eth = port->qdma->eth; + struct net_device *dev = port->dev; + const u8 *addr = dev->dev_addr; + u32 val; + + val = (addr[2] << 24) | (addr[3] << 16) | (addr[4] << 8) | addr[5]; + airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val); + airoha_fe_wr(eth, REG_UPDMEM_CTRL(0), + FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) | + PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK); + + val = (addr[0] << 8) | addr[1]; + airoha_fe_wr(eth, REG_UPDMEM_DATA(0), val); + airoha_fe_wr(eth, REG_UPDMEM_CTRL(0), + FIELD_PREP(PPE_UPDMEM_ADDR_MASK, port->id) | + FIELD_PREP(PPE_UPDMEM_OFFSET_MASK, 1) | + PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK); +} + int airoha_ppe_init(struct airoha_eth *eth) { struct airoha_ppe *ppe; diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 8146cde4e8ba37..57bff8d2de276b 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -312,6 +312,16 @@ #define REG_PPE_RAM_BASE(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x320) #define REG_PPE_RAM_ENTRY(_m, _n) (REG_PPE_RAM_BASE(_m) + ((_n) << 2)) +#define REG_UPDMEM_CTRL(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x370) +#define PPE_UPDMEM_ACK_MASK BIT(31) +#define PPE_UPDMEM_ADDR_MASK GENMASK(11, 8) +#define PPE_UPDMEM_OFFSET_MASK GENMASK(7, 4) +#define PPE_UPDMEM_SEL_MASK GENMASK(3, 2) +#define PPE_UPDMEM_WR_MASK BIT(1) +#define PPE_UPDMEM_REQ_MASK BIT(0) + +#define REG_UPDMEM_DATA(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x374) + #define REG_FE_GDM_TX_OK_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x280) #define REG_FE_GDM_TX_OK_BYTE_CNT_H(_n) (GDM_BASE(_n) + 0x284) #define REG_FE_GDM_TX_ETH_PKT_CNT_H(_n) (GDM_BASE(_n) + 0x288) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index c83a0a80d5334e..506f682d15c10a 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -5,11 +5,6 @@ #include "core.h" -struct pdsc_wait_context { - struct pdsc_qcq *qcq; - struct completion wait_completion; -}; - static int pdsc_process_notifyq(struct pdsc_qcq *qcq) { union pds_core_notifyq_comp *comp; @@ -109,10 +104,10 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) q_info = &q->info[q->tail_idx]; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); - /* Copy out the completion data */ - memcpy(q_info->dest, comp, sizeof(*comp)); - - complete_all(&q_info->wc->wait_completion); + if (!completion_done(&q_info->completion)) { + memcpy(q_info->dest, comp, sizeof(*comp)); + complete(&q_info->completion); + } if (cq->tail_idx == cq->num_descs - 1) cq->done_color = !cq->done_color; @@ -162,8 +157,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) static int __pdsc_adminq_post(struct pdsc *pdsc, struct pdsc_qcq *qcq, union pds_core_adminq_cmd *cmd, - union pds_core_adminq_comp *comp, - struct pdsc_wait_context *wc) + union pds_core_adminq_comp *comp) { struct pdsc_queue *q = &qcq->q; struct pdsc_q_info *q_info; @@ -205,9 +199,9 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, /* Post the request */ index = q->head_idx; q_info = &q->info[index]; - q_info->wc = wc; q_info->dest = comp; memcpy(q_info->desc, cmd, sizeof(*cmd)); + reinit_completion(&q_info->completion); dev_dbg(pdsc->dev, "head_idx %d tail_idx %d\n", q->head_idx, q->tail_idx); @@ -231,16 +225,13 @@ int pdsc_adminq_post(struct pdsc *pdsc, union pds_core_adminq_comp *comp, bool fast_poll) { - struct pdsc_wait_context wc = { - .wait_completion = - COMPLETION_INITIALIZER_ONSTACK(wc.wait_completion), - }; unsigned long poll_interval = 1; unsigned long poll_jiffies; unsigned long time_limit; unsigned long time_start; unsigned long time_done; unsigned long remaining; + struct completion *wc; int err = 0; int index; @@ -250,20 +241,19 @@ int pdsc_adminq_post(struct pdsc *pdsc, return -ENXIO; } - wc.qcq = &pdsc->adminqcq; - index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); + index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp); if (index < 0) { err = index; goto err_out; } + wc = &pdsc->adminqcq.q.info[index].completion; time_start = jiffies; time_limit = time_start + HZ * pdsc->devcmd_timeout; do { /* Timeslice the actual wait to catch IO errors etc early */ poll_jiffies = msecs_to_jiffies(poll_interval); - remaining = wait_for_completion_timeout(&wc.wait_completion, - poll_jiffies); + remaining = wait_for_completion_timeout(wc, poll_jiffies); if (remaining) break; @@ -292,9 +282,11 @@ int pdsc_adminq_post(struct pdsc *pdsc, dev_dbg(pdsc->dev, "%s: elapsed %d msecs\n", __func__, jiffies_to_msecs(time_done - time_start)); - /* Check the results */ - if (time_after_eq(time_done, time_limit)) + /* Check the results and clear an un-completed timeout */ + if (time_after_eq(time_done, time_limit) && !completion_done(wc)) { err = -ETIMEDOUT; + complete(wc); + } dev_dbg(pdsc->dev, "read admin queue completion idx %d:\n", index); dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1, diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index eeb72b1809eabd..92f359f2b44920 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -107,9 +107,6 @@ int pds_client_adminq_cmd(struct pds_auxiliary_dev *padev, dev_dbg(pf->dev, "%s: %s opcode %d\n", __func__, dev_name(&padev->aux_dev.dev), req->opcode); - if (pf->state) - return -ENXIO; - /* Wrap the client's request */ cmd.client_request.opcode = PDS_AQ_CMD_CLIENT_CMD; cmd.client_request.client_id = cpu_to_le16(padev->client_id); @@ -189,7 +186,6 @@ void pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf, pds_client_unregister(pf, padev->client_id); auxiliary_device_delete(&padev->aux_dev); auxiliary_device_uninit(&padev->aux_dev); - padev->client_id = 0; *pd_ptr = NULL; mutex_unlock(&pf->config_lock); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 1eb0d92786f715..9512aa4083f054 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -167,8 +167,10 @@ static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa) q->base = base; q->base_pa = base_pa; - for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) + for (i = 0, cur = q->info; i < q->num_descs; i++, cur++) { cur->desc = base + (i * q->desc_size); + init_completion(&cur->completion); + } } static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa) @@ -325,10 +327,7 @@ static int pdsc_core_init(struct pdsc *pdsc) size_t sz; int err; - /* Scale the descriptor ring length based on number of CPUs and VFs */ - numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus()); - numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev); - numdescs = roundup_pow_of_two(numdescs); + numdescs = PDSC_ADMINQ_MAX_LENGTH; err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq", PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR, numdescs, diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index 0bf320c4308369..0b53a1fab46d02 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -16,7 +16,7 @@ #define PDSC_WATCHDOG_SECS 5 #define PDSC_QUEUE_NAME_MAX_SZ 16 -#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */ +#define PDSC_ADMINQ_MAX_LENGTH 16 /* must be a power of two */ #define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */ #define PDSC_TEARDOWN_RECOVERY false #define PDSC_TEARDOWN_REMOVING true @@ -96,7 +96,7 @@ struct pdsc_q_info { unsigned int bytes; unsigned int nbufs; struct pdsc_buf_info bufs[PDS_CORE_MAX_FRAGS]; - struct pdsc_wait_context *wc; + struct completion completion; void *dest; }; diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index ac37a4e738ae7d..04c5e3abd8d706 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -154,8 +154,9 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq) debugfs_create_u32("index", 0400, intr_dentry, &intr->index); debugfs_create_u32("vector", 0400, intr_dentry, &intr->vector); - intr_ctrl_regset = kzalloc(sizeof(*intr_ctrl_regset), - GFP_KERNEL); + intr_ctrl_regset = devm_kzalloc(pdsc->dev, + sizeof(*intr_ctrl_regset), + GFP_KERNEL); if (!intr_ctrl_regset) return; intr_ctrl_regset->regs = intr_ctrl_regs; diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index c5c787df61a4ec..d8dc39da4161fb 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -105,7 +105,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, .fw_control.opcode = PDS_CORE_CMD_FW_CONTROL, .fw_control.oper = PDS_CORE_FW_GET_LIST, }; - struct pds_core_fw_list_info fw_list; + struct pds_core_fw_list_info fw_list = {}; struct pdsc *pdsc = devlink_priv(dl); union pds_core_dev_comp comp; char buf[32]; @@ -118,8 +118,6 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (!err) memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); mutex_unlock(&pdsc->devcmd_lock); - if (err && err != -EIO) - return err; listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names)); for (i = 0; i < listlen; i++) { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 230726d7b74f63..d41b58fad37bbf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -373,8 +373,13 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata, } /* Set up the header page info */ - xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, - XGBE_SKB_ALLOC_SIZE); + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + XGBE_SKB_ALLOC_SIZE); + } else { + xgbe_set_buffer_data(&rdata->rx.hdr, &ring->rx_hdr_pa, + pdata->rx_buf_size); + } /* Set up the buffer page info */ xgbe_set_buffer_data(&rdata->rx.buf, &ring->rx_buf_pa, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index f393228d41c7be..f1b0fb02b3cd14 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -320,6 +320,18 @@ static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata) XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE); } +static void xgbe_disable_sph_mode(struct xgbe_prv_data *pdata) +{ + unsigned int i; + + for (i = 0; i < pdata->channel_count; i++) { + if (!pdata->channel[i]->rx_ring) + break; + + XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 0); + } +} + static int xgbe_write_rss_reg(struct xgbe_prv_data *pdata, unsigned int type, unsigned int index, unsigned int val) { @@ -3545,8 +3557,12 @@ static int xgbe_init(struct xgbe_prv_data *pdata) xgbe_config_tx_coalesce(pdata); xgbe_config_rx_buffer_size(pdata); xgbe_config_tso_mode(pdata); - xgbe_config_sph_mode(pdata); - xgbe_config_rss(pdata); + + if (pdata->netdev->features & NETIF_F_RXCSUM) { + xgbe_config_sph_mode(pdata); + xgbe_config_rss(pdata); + } + desc_if->wrapper_tx_desc_init(pdata); desc_if->wrapper_rx_desc_init(pdata); xgbe_enable_dma_interrupts(pdata); @@ -3702,5 +3718,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if) hw_if->disable_vxlan = xgbe_disable_vxlan; hw_if->set_vxlan_id = xgbe_set_vxlan_id; + /* For Split Header*/ + hw_if->enable_sph = xgbe_config_sph_mode; + hw_if->disable_sph = xgbe_disable_sph_mode; + DBGPR("<--xgbe_init_function_ptrs\n"); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index d84a310dfcd40e..8e09ad8fa022a3 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2257,10 +2257,17 @@ static int xgbe_set_features(struct net_device *netdev, if (ret) return ret; - if ((features & NETIF_F_RXCSUM) && !rxcsum) + if ((features & NETIF_F_RXCSUM) && !rxcsum) { + hw_if->enable_sph(pdata); + hw_if->enable_vxlan(pdata); hw_if->enable_rx_csum(pdata); - else if (!(features & NETIF_F_RXCSUM) && rxcsum) + schedule_work(&pdata->restart_work); + } else if (!(features & NETIF_F_RXCSUM) && rxcsum) { + hw_if->disable_sph(pdata); + hw_if->disable_vxlan(pdata); hw_if->disable_rx_csum(pdata); + schedule_work(&pdata->restart_work); + } if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !rxvlan) hw_if->enable_rx_vlan_stripping(pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index d85386cac8d166..ed5d43c16d0e23 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -865,6 +865,10 @@ struct xgbe_hw_if { void (*enable_vxlan)(struct xgbe_prv_data *); void (*disable_vxlan)(struct xgbe_prv_data *); void (*set_vxlan_id)(struct xgbe_prv_data *); + + /* For Split Header */ + void (*enable_sph)(struct xgbe_prv_data *pdata); + void (*disable_sph)(struct xgbe_prv_data *pdata); }; /* This structure represents implementation specific routines for an diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index eeec8bf17cf458..1bd4313215d715 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -143,7 +143,7 @@ config BNX2X depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select ZLIB_INFLATE - select LIBCRC32C + select CRC32 select MDIO help This driver supports Broadcom NetXtremeII 10 gigabit Ethernet cards. @@ -207,7 +207,7 @@ config BNXT depends on PCI depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER - select LIBCRC32C + select CRC32 select NET_DEVLINK select PAGE_POOL select DIMLIB diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8725e1e1390825..6afc2ab6fad228 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -787,7 +787,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); tx_kick_pending: if (BNXT_TX_PTP_IS_SET(lflags)) { - txr->tx_buf_ring[txr->tx_prod].is_ts_pkt = 0; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].is_ts_pkt = 0; atomic64_inc(&bp->ptp_cfg->stats.ts_err); if (!(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) /* set SKB to err so PTP worker will clean up */ @@ -795,7 +795,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); - txr->tx_buf_ring[txr->tx_prod].skb = NULL; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].skb = NULL; dev_core_stats_tx_dropped_inc(dev); return NETDEV_TX_OK; } @@ -2015,6 +2015,7 @@ static struct sk_buff *bnxt_rx_vlan(struct sk_buff *skb, u8 cmp_type, } return skb; vlan_err: + skb_mark_for_recycle(skb); dev_kfree_skb(skb); return NULL; } @@ -3414,6 +3415,9 @@ static void bnxt_free_tx_skbs(struct bnxt *bp) bnxt_free_one_tx_ring_skbs(bp, txr, i); } + + if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) + bnxt_ptp_free_txts_skbs(bp->ptp_cfg); } static void bnxt_free_one_rx_ring(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) @@ -11599,6 +11603,9 @@ static void bnxt_init_napi(struct bnxt *bp) poll_fn = bnxt_poll_p5; else if (BNXT_CHIP_TYPE_NITRO_A0(bp)) cp_nr_rings--; + + set_bit(BNXT_STATE_NAPI_DISABLED, &bp->state); + for (i = 0; i < cp_nr_rings; i++) { bnapi = bp->bnapi[i]; netif_napi_add_config_locked(bp->dev, &bnapi->napi, poll_fn, @@ -12318,12 +12325,15 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp; struct hwrm_func_drv_if_change_input *req; - bool fw_reset = !bp->irq_tbl; bool resc_reinit = false; bool caps_change = false; int rc, retry = 0; + bool fw_reset; u32 flags = 0; + fw_reset = (bp->fw_reset_state == BNXT_FW_RESET_STATE_ABORT); + bp->fw_reset_state = 0; + if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE)) return 0; @@ -12392,13 +12402,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) set_bit(BNXT_STATE_ABORT_ERR, &bp->state); return rc; } + /* IRQ will be initialized later in bnxt_request_irq()*/ bnxt_clear_int_mode(bp); - rc = bnxt_init_int_mode(bp); - if (rc) { - clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state); - netdev_err(bp->dev, "init int mode failed\n"); - return rc; - } } rc = bnxt_cancel_reservations(bp, fw_reset); } @@ -12797,8 +12802,6 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); - if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) - WRITE_ONCE(bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) @@ -14010,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp) netdev_unlock(bp->dev); } +/* Same as bnxt_lock_sp() with additional rtnl_lock */ +static void bnxt_rtnl_lock_sp(struct bnxt *bp) +{ + clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + rtnl_lock(); + netdev_lock(bp->dev); +} + +static void bnxt_rtnl_unlock_sp(struct bnxt *bp) +{ + set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); + netdev_unlock(bp->dev); + rtnl_unlock(); +} + /* Only called from bnxt_sp_task() */ static void bnxt_reset(struct bnxt *bp, bool silent) { - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (test_bit(BNXT_STATE_OPEN, &bp->state)) bnxt_reset_task(bp, silent); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } /* Only called from bnxt_sp_task() */ @@ -14024,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) { int i; - bnxt_lock_sp(bp); + bnxt_rtnl_lock_sp(bp); if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); return; } /* Disable and flush TPA before resetting the RX ring */ @@ -14065,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) } if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, true); - bnxt_unlock_sp(bp); + bnxt_rtnl_unlock_sp(bp); } static void bnxt_fw_fatal_close(struct bnxt *bp) @@ -14833,7 +14851,7 @@ static void bnxt_fw_reset_abort(struct bnxt *bp, int rc) clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); if (bp->fw_reset_state != BNXT_FW_RESET_STATE_POLL_VF) bnxt_dl_health_fw_status_update(bp, false); - bp->fw_reset_state = 0; + bp->fw_reset_state = BNXT_FW_RESET_STATE_ABORT; netif_close(bp->dev); } @@ -14957,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING; fallthrough; case BNXT_FW_RESET_STATE_OPENING: - while (!netdev_trylock(bp->dev)) { + while (!rtnl_trylock()) { bnxt_queue_fw_reset_work(bp, HZ / 10); return; } + netdev_lock(bp->dev); rc = bnxt_open(bp->dev); if (rc) { netdev_err(bp->dev, "bnxt_open() failed during FW reset\n"); bnxt_fw_reset_abort(bp, rc); netdev_unlock(bp->dev); + rtnl_unlock(); goto ulp_start; } @@ -14985,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_dl_health_fw_status_update(bp, true); } netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp, 0); bnxt_reenable_sriov(bp); netdev_lock(bp->dev); @@ -15933,7 +15954,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) rc); napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); - bnxt_reset_task(bp, true); + netif_close(dev); return rc; } @@ -16003,8 +16024,8 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_rdma_aux_device_del(bp); - bnxt_ptp_clear(bp); unregister_netdev(dev); + bnxt_ptp_clear(bp); bnxt_rdma_aux_device_uninit(bp); @@ -16749,6 +16770,7 @@ static int bnxt_resume(struct device *device) struct bnxt *bp = netdev_priv(dev); int rc = 0; + rtnl_lock(); netdev_lock(dev); rc = pci_enable_device(bp->pdev); if (rc) { @@ -16793,6 +16815,7 @@ static int bnxt_resume(struct device *device) resume_exit: netdev_unlock(bp->dev); + rtnl_unlock(); bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); @@ -16931,10 +16954,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) if (!err) result = PCI_ERS_RESULT_RECOVERED; + /* IRQ will be initialized later in bnxt_io_resume */ bnxt_ulp_irq_stop(bp); bnxt_clear_int_mode(bp); - err = bnxt_init_int_mode(bp); - bnxt_ulp_irq_restart(bp, err); } reset_exit: @@ -16959,20 +16981,25 @@ static void bnxt_io_resume(struct pci_dev *pdev) int err; netdev_info(bp->dev, "PCI Slot Resume\n"); + rtnl_lock(); netdev_lock(netdev); err = bnxt_hwrm_func_qcaps(bp); if (!err) { - if (netif_running(netdev)) + if (netif_running(netdev)) { err = bnxt_open(netdev); - else + } else { err = bnxt_reserve_rings(bp, true); + if (!err) + err = bnxt_init_int_mode(bp); + } } if (!err) netif_device_attach(netdev); netdev_unlock(netdev); + rtnl_unlock(); bnxt_ulp_start(bp, err); if (!err) bnxt_reenable_sriov(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 21726cf565866d..bc8b3b7e915d3b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2614,6 +2614,7 @@ struct bnxt { #define BNXT_FW_RESET_STATE_POLL_FW 4 #define BNXT_FW_RESET_STATE_OPENING 5 #define BNXT_FW_RESET_STATE_POLL_FW_DOWN 6 +#define BNXT_FW_RESET_STATE_ABORT 7 u16 fw_reset_min_dsecs; #define BNXT_DFLT_FW_RST_MIN_DSECS 20 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 5576e7cf846317..a000d3f630bd3b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -110,20 +110,30 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, } } + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) + info->dest_buf_size += len; + if (info->dest_buf) { if ((info->seg_start + off + len) <= BNXT_COREDUMP_BUF_LEN(info->buf_len)) { - memcpy(info->dest_buf + off, dma_buf, len); + u16 copylen = min_t(u16, len, + info->dest_buf_size - off); + + memcpy(info->dest_buf + off, dma_buf, copylen); + if (copylen < len) + break; } else { rc = -ENOBUFS; + if (cmn_req->req_type == + cpu_to_le16(HWRM_DBG_COREDUMP_LIST)) { + kfree(info->dest_buf); + info->dest_buf = NULL; + } break; } } - if (cmn_req->req_type == - cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE)) - info->dest_buf_size += len; - if (!(cmn_resp->flags & HWRM_DBG_CMN_FLAGS_MORE)) break; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 48dd5922e4dd8b..f5d490bf997e34 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2062,6 +2062,17 @@ static int bnxt_get_regs_len(struct net_device *dev) return reg_len; } +#define BNXT_PCIE_32B_ENTRY(start, end) \ + { offsetof(struct pcie_ctx_hw_stats, start), \ + offsetof(struct pcie_ctx_hw_stats, end) } + +static const struct { + u16 start; + u16 end; +} bnxt_pcie_32b_entries[] = { + BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]), +}; + static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { @@ -2094,12 +2105,27 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); rc = hwrm_req_send(bp, req); if (!rc) { - __le64 *src = (__le64 *)hw_pcie_stats; - u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN); - int i; - - for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++) - dst[i] = le64_to_cpu(src[i]); + u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN); + u8 *src = (u8 *)hw_pcie_stats; + int i, j; + + for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) { + if (i >= bnxt_pcie_32b_entries[j].start && + i <= bnxt_pcie_32b_entries[j].end) { + u32 *dst32 = (u32 *)(dst + i); + + *dst32 = le32_to_cpu(*(__le32 *)(src + i)); + i += 4; + if (i > bnxt_pcie_32b_entries[j].end && + j < ARRAY_SIZE(bnxt_pcie_32b_entries) - 1) + j++; + } else { + u64 *dst64 = (u64 *)(dst + i); + + *dst64 = le64_to_cpu(*(__le64 *)(src + i)); + i += 8; + } + } } hwrm_req_drop(bp, req); } @@ -4991,6 +5017,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, if (!bp->num_tests || !BNXT_PF(bp)) return; + memset(buf, 0, sizeof(u64) * bp->num_tests); if (etest->flags & ETH_TEST_FL_OFFLINE && bnxt_ulp_registered(bp->edev)) { etest->flags |= ETH_TEST_FL_FAILED; @@ -4998,7 +5025,6 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, return; } - memset(buf, 0, sizeof(u64) * bp->num_tests); if (!netif_running(dev)) { etest->flags |= ETH_TEST_FL_FAILED; return; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 2d4e19b96ee744..0669d43472f51b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -794,6 +794,27 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) return HZ; } +void bnxt_ptp_free_txts_skbs(struct bnxt_ptp_cfg *ptp) +{ + struct bnxt_ptp_tx_req *txts_req; + u16 cons = ptp->txts_cons; + + /* make sure ptp aux worker finished with + * possible BNXT_STATE_OPEN set + */ + ptp_cancel_worker_sync(ptp->ptp_clock); + + ptp->tx_avail = BNXT_MAX_TX_TS; + while (cons != ptp->txts_prod) { + txts_req = &ptp->txts_req[cons]; + if (!IS_ERR_OR_NULL(txts_req->tx_skb)) + dev_kfree_skb_any(txts_req->tx_skb); + cons = NEXT_TXTS(cons); + } + ptp->txts_cons = cons; + ptp_schedule_worker(ptp->ptp_clock, 0); +} + int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod) { spin_lock_bh(&ptp->ptp_tx_lock); @@ -1105,7 +1126,6 @@ int bnxt_ptp_init(struct bnxt *bp) void bnxt_ptp_clear(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - int i; if (!ptp) return; @@ -1117,12 +1137,5 @@ void bnxt_ptp_clear(struct bnxt *bp) kfree(ptp->ptp_info.pin_config); ptp->ptp_info.pin_config = NULL; - for (i = 0; i < BNXT_MAX_TX_TS; i++) { - if (ptp->txts_req[i].tx_skb) { - dev_kfree_skb_any(ptp->txts_req[i].tx_skb); - ptp->txts_req[i].tx_skb = NULL; - } - } - bnxt_unmap_ptp_regs(bp); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index a95f05e9c579b7..0481161d26ef5d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -162,6 +162,7 @@ int bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); +void bnxt_ptp_free_txts_skbs(struct bnxt_ptp_cfg *ptp); int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod); void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index a8e930d5dbb09f..7564705d64783e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "bnxt_hsi.h" #include "bnxt.h" @@ -309,14 +310,12 @@ void bnxt_ulp_irq_stop(struct bnxt *bp) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_stop) return; if (test_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) reset = true; ops->ulp_irq_stop(ulp->handle, reset); - netdev_unlock(bp->dev); } } @@ -335,8 +334,7 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) if (!ulp->msix_requested) return; - netdev_lock(bp->dev); - ops = rcu_dereference(ulp->ulp_ops); + ops = netdev_lock_dereference(ulp->ulp_ops, bp->dev); if (!ops || !ops->ulp_irq_restart) return; @@ -348,7 +346,6 @@ void bnxt_ulp_irq_restart(struct bnxt *bp, int err) bnxt_fill_msix_vecs(bp, ent); } ops->ulp_irq_restart(ulp->handle, ent); - netdev_unlock(bp->dev); kfree(ent); } } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1fe8ec37491b19..e1e8bd2ec155b8 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -997,22 +997,15 @@ static void macb_update_stats(struct macb *bp) static int macb_halt_tx(struct macb *bp) { - unsigned long halt_time, timeout; - u32 status; + u32 status; macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(THALT)); - timeout = jiffies + usecs_to_jiffies(MACB_HALT_TIMEOUT); - do { - halt_time = jiffies; - status = macb_readl(bp, TSR); - if (!(status & MACB_BIT(TGO))) - return 0; - - udelay(250); - } while (time_before(halt_time, timeout)); - - return -ETIMEDOUT; + /* Poll TSR until TGO is cleared or timeout. */ + return read_poll_timeout_atomic(macb_readl, status, + !(status & MACB_BIT(TGO)), + 250, MACB_HALT_TIMEOUT, false, + bp, TSR); } static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb, int budget) diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index ca742cc146d798..7dae5aad3689d6 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -70,8 +70,8 @@ config LIQUIDIO depends on 64BIT && PCI depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + select CRC32 select FW_LOADER - select LIBCRC32C select LIQUIDIO_CORE select NET_DEVLINK help diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 7f3f5afa864f4a..1546c3db08f093 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -2270,6 +2270,7 @@ int cxgb4_init_ethtool_filters(struct adapter *adap) eth_filter->port[i].bmap = bitmap_zalloc(nentries, GFP_KERNEL); if (!eth_filter->port[i].bmap) { ret = -ENOMEM; + kvfree(eth_filter->port[i].loc_array); goto free_eth_finfo; } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 551c279dc14bed..51395c96b2e994 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6480,10 +6480,11 @@ static const struct tlsdev_ops cxgb4_ktls_ops = { #if IS_ENABLED(CONFIG_CHELSIO_IPSEC_INLINE) -static int cxgb4_xfrm_add_state(struct xfrm_state *x, +static int cxgb4_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { - struct adapter *adap = netdev2adap(x->xso.dev); + struct adapter *adap = netdev2adap(dev); int ret; if (!mutex_trylock(&uld_mutex)) { @@ -6494,7 +6495,8 @@ static int cxgb4_xfrm_add_state(struct xfrm_state *x, if (ret) goto out_unlock; - ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(x, extack); + ret = adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_add(dev, x, + extack); out_unlock: mutex_unlock(&uld_mutex); @@ -6502,9 +6504,9 @@ static int cxgb4_xfrm_add_state(struct xfrm_state *x, return ret; } -static void cxgb4_xfrm_del_state(struct xfrm_state *x) +static void cxgb4_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { - struct adapter *adap = netdev2adap(x->xso.dev); + struct adapter *adap = netdev2adap(dev); if (!mutex_trylock(&uld_mutex)) { dev_dbg(adap->pdev_dev, @@ -6514,15 +6516,15 @@ static void cxgb4_xfrm_del_state(struct xfrm_state *x) if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS)) goto out_unlock; - adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(x); + adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_delete(dev, x); out_unlock: mutex_unlock(&uld_mutex); } -static void cxgb4_xfrm_free_state(struct xfrm_state *x) +static void cxgb4_xfrm_free_state(struct net_device *dev, struct xfrm_state *x) { - struct adapter *adap = netdev2adap(x->xso.dev); + struct adapter *adap = netdev2adap(dev); if (!mutex_trylock(&uld_mutex)) { dev_dbg(adap->pdev_dev, @@ -6532,7 +6534,7 @@ static void cxgb4_xfrm_free_state(struct xfrm_state *x) if (chcr_offload_state(adap, CXGB4_XFRMDEV_OPS)) goto out_unlock; - adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(x); + adap->uld[CXGB4_ULD_IPSEC].xfrmdev_ops->xdo_dev_state_free(dev, x); out_unlock: mutex_unlock(&uld_mutex); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index baba96883f48b5..ecd9a0bd5e1822 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -75,9 +75,12 @@ static int ch_ipsec_uld_state_change(void *handle, enum cxgb4_state new_state); static int ch_ipsec_xmit(struct sk_buff *skb, struct net_device *dev); static void *ch_ipsec_uld_add(const struct cxgb4_lld_info *infop); static void ch_ipsec_advance_esn_state(struct xfrm_state *x); -static void ch_ipsec_xfrm_free_state(struct xfrm_state *x); -static void ch_ipsec_xfrm_del_state(struct xfrm_state *x); -static int ch_ipsec_xfrm_add_state(struct xfrm_state *x, +static void ch_ipsec_xfrm_free_state(struct net_device *dev, + struct xfrm_state *x); +static void ch_ipsec_xfrm_del_state(struct net_device *dev, + struct xfrm_state *x); +static int ch_ipsec_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack); static const struct xfrmdev_ops ch_ipsec_xfrmdev_ops = { @@ -223,7 +226,8 @@ static int ch_ipsec_setkey(struct xfrm_state *x, * returns 0 on success, negative error if failed to send message to FPGA * positive error if FPGA returned a bad response */ -static int ch_ipsec_xfrm_add_state(struct xfrm_state *x, +static int ch_ipsec_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { struct ipsec_sa_entry *sa_entry; @@ -302,14 +306,16 @@ static int ch_ipsec_xfrm_add_state(struct xfrm_state *x, return res; } -static void ch_ipsec_xfrm_del_state(struct xfrm_state *x) +static void ch_ipsec_xfrm_del_state(struct net_device *dev, + struct xfrm_state *x) { /* do nothing */ if (!x->xso.offload_handle) return; } -static void ch_ipsec_xfrm_free_state(struct xfrm_state *x) +static void ch_ipsec_xfrm_free_state(struct net_device *dev, + struct xfrm_state *x) { struct ipsec_sa_entry *sa_entry; diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index d88fbecdab4b89..232e839a9d0719 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -352,7 +352,7 @@ parse_eeprom (struct net_device *dev) eth_hw_addr_set(dev, psrom->mac_addr); if (np->chip_id == CHIP_IP1000A) { - np->led_mode = psrom->led_mode; + np->led_mode = le16_to_cpu(psrom->led_mode); return 0; } diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index 195dc6cfd8955c..0e33e2eaae9606 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -335,7 +335,7 @@ typedef struct t_SROM { u16 sub_system_id; /* 0x06 */ u16 pci_base_1; /* 0x08 (IP1000A only) */ u16 pci_base_2; /* 0x0a (IP1000A only) */ - u16 led_mode; /* 0x0c (IP1000A only) */ + __le16 led_mode; /* 0x0c (IP1000A only) */ u16 reserved1[9]; /* 0x0e-0x1f */ u8 mac_addr[6]; /* 0x20-0x25 */ u8 reserved2[10]; /* 0x26-0x2f */ diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 625245b0845c7a..eba73246f98664 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -67,6 +67,8 @@ #define TSNEP_TX_TYPE_XDP_NDO_MAP_PAGE (TSNEP_TX_TYPE_XDP_NDO | TSNEP_TX_TYPE_MAP_PAGE) #define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO) #define TSNEP_TX_TYPE_XSK BIT(12) +#define TSNEP_TX_TYPE_TSTAMP BIT(13) +#define TSNEP_TX_TYPE_SKB_TSTAMP (TSNEP_TX_TYPE_SKB | TSNEP_TX_TYPE_TSTAMP) #define TSNEP_XDP_TX BIT(0) #define TSNEP_XDP_REDIRECT BIT(1) @@ -386,8 +388,7 @@ static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length, if (entry->skb) { entry->properties = length & TSNEP_DESC_LENGTH_MASK; entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; - if ((entry->type & TSNEP_TX_TYPE_SKB) && - (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS)) + if ((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP) entry->properties |= TSNEP_DESC_EXTENDED_WRITEBACK_FLAG; /* toggle user flag to prevent false acknowledge @@ -479,7 +480,8 @@ static int tsnep_tx_map_frag(skb_frag_t *frag, struct tsnep_tx_entry *entry, return mapped; } -static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) +static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count, + bool do_tstamp) { struct device *dmadev = tx->adapter->dmadev; struct tsnep_tx_entry *entry; @@ -505,6 +507,9 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) entry->type = TSNEP_TX_TYPE_SKB_INLINE; mapped = 0; } + + if (do_tstamp) + entry->type |= TSNEP_TX_TYPE_TSTAMP; } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; @@ -558,11 +563,12 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count) static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, struct tsnep_tx *tx) { - int count = 1; struct tsnep_tx_entry *entry; + bool do_tstamp = false; + int count = 1; int length; - int i; int retval; + int i; if (skb_shinfo(skb)->nr_frags > 0) count += skb_shinfo(skb)->nr_frags; @@ -579,7 +585,13 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, entry = &tx->entry[tx->write]; entry->skb = skb; - retval = tsnep_tx_map(skb, tx, count); + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + tx->adapter->hwtstamp_config.tx_type == HWTSTAMP_TX_ON) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + do_tstamp = true; + } + + retval = tsnep_tx_map(skb, tx, count, do_tstamp); if (retval < 0) { tsnep_tx_unmap(tx, tx->write, count); dev_kfree_skb_any(entry->skb); @@ -591,9 +603,6 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, } length = retval; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - for (i = 0; i < count; i++) tsnep_tx_activate(tx, (tx->write + i) & TSNEP_RING_MASK, length, i == count - 1); @@ -844,8 +853,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) length = tsnep_tx_unmap(tx, tx->read, count); - if ((entry->type & TSNEP_TX_TYPE_SKB) && - (skb_shinfo(entry->skb)->tx_flags & SKBTX_IN_PROGRESS) && + if (((entry->type & TSNEP_TX_TYPE_SKB_TSTAMP) == TSNEP_TX_TYPE_SKB_TSTAMP) && (__le32_to_cpu(entry->desc_wb->properties) & TSNEP_DESC_EXTENDED_WRITEBACK_FLAG)) { struct skb_shared_hwtstamps hwtstamps; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 2106861463e40f..3ee52f4b11660a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1850,6 +1850,16 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, } } +static void enetc_bulk_flip_buff(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + enetc_flip_rx_buff(rx_ring, + &rx_ring->rx_swbd[rx_ring_first]); + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } +} + static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, struct napi_struct *napi, int work_limit, struct bpf_prog *prog) @@ -1868,11 +1878,10 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, while (likely(rx_frm_cnt < work_limit)) { union enetc_rx_bd *rxbd, *orig_rxbd; - int orig_i, orig_cleaned_cnt; struct xdp_buff xdp_buff; struct sk_buff *skb; + int orig_i, err; u32 bd_status; - int err; rxbd = enetc_rxbd(rx_ring, i); bd_status = le32_to_cpu(rxbd->r.lstatus); @@ -1887,7 +1896,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, break; orig_rxbd = rxbd; - orig_cleaned_cnt = cleaned_cnt; orig_i = i; enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, @@ -1915,15 +1923,21 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, rx_ring->stats.xdp_drops++; break; case XDP_PASS: - rxbd = orig_rxbd; - cleaned_cnt = orig_cleaned_cnt; - i = orig_i; - - skb = enetc_build_skb(rx_ring, bd_status, &rxbd, - &i, &cleaned_cnt, - ENETC_RXB_DMA_SIZE_XDP); - if (unlikely(!skb)) + skb = xdp_build_skb_from_buff(&xdp_buff); + /* Probably under memory pressure, stop NAPI */ + if (unlikely(!skb)) { + enetc_xdp_drop(rx_ring, orig_i, i); + rx_ring->stats.xdp_drops++; goto out; + } + + enetc_get_offloads(rx_ring, orig_rxbd, skb); + + /* These buffers are about to be owned by the stack. + * Update our buffer cache (the rx_swbd array elements) + * with their other page halves. + */ + enetc_bulk_flip_buff(rx_ring, orig_i, i); napi_gro_receive(napi, skb); break; @@ -1965,11 +1979,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_xdp_drop(rx_ring, orig_i, i); rx_ring->stats.xdp_redirect_failures++; } else { - while (orig_i != i) { - enetc_flip_rx_buff(rx_ring, - &rx_ring->rx_swbd[orig_i]); - enetc_bdr_idx_inc(rx_ring, &orig_i); - } + enetc_bulk_flip_buff(rx_ring, orig_i, i); xdp_redirect_frm_cnt++; rx_ring->stats.xdp_redirect++; } @@ -3362,7 +3372,8 @@ static int enetc_int_vector_init(struct enetc_ndev_priv *priv, int i, bdr->buffer_offset = ENETC_RXB_PAD; priv->rx_ring[i] = bdr; - err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0); + err = __xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0, + ENETC_RXB_DMA_SIZE_XDP); if (err) goto free_vector; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index a86cfebedaa8b5..17e9bddb9ddd58 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -714,7 +714,12 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, txq->bd.cur = bdp; /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); + if (!(fep->quirks & FEC_QUIRK_ERR007885) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active) || + !readl(txq->bd.reg_desc_active)) + writel(0, txq->bd.reg_desc_active); return 0; } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index c3791cf23c876c..d561d45021a581 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2153,7 +2153,7 @@ void gve_handle_report_stats(struct gve_priv *priv) }; stats[stats_idx++] = (struct stats) { .stat_name = cpu_to_be32(RX_BUFFERS_POSTED), - .value = cpu_to_be64(priv->rx[0].fill_cnt), + .value = cpu_to_be64(priv->rx[idx].fill_cnt), .queue_id = cpu_to_be32(idx), }; } diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 2eba868d80370a..f7da7de23d6726 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -763,6 +763,9 @@ static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx, s16 completion_tag; pkt = gve_alloc_pending_packet(tx); + if (!pkt) + return -ENOMEM; + pkt->skb = skb; completion_tag = pkt - tx->dqo.pending_packets; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index f8cdab62bf85ca..7725cb0c5c8a44 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -108,14 +108,16 @@ struct hbg_irq_info { bool re_enable; bool need_print; bool need_reset; - u64 count; - void (*irq_handle)(struct hbg_priv *priv, struct hbg_irq_info *info); + void (*irq_handle)(struct hbg_priv *priv, + const struct hbg_irq_info *info); }; struct hbg_vector { char name[HBG_VECTOR_NUM][32]; - struct hbg_irq_info *info_array; + + u64 *stats_array; + const struct hbg_irq_info *info_array; u32 info_array_len; }; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 5e0ba4d5b08d28..01ad82d2f5cc7f 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -61,7 +61,7 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused) { struct net_device *netdev = dev_get_drvdata(s->private); struct hbg_priv *priv = netdev_priv(netdev); - struct hbg_irq_info *info; + const struct hbg_irq_info *info; u32 i; for (i = 0; i < priv->vectors.info_array_len; i++) { @@ -73,7 +73,7 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused) info->mask)), str_true_false(info->need_reset), str_true_false(info->need_print), - info->count); + priv->vectors.stats_array[i]); } return 0; @@ -106,6 +106,7 @@ static int hbg_dbg_nic_state(struct seq_file *s, void *unused) { struct net_device *netdev = dev_get_drvdata(s->private); struct hbg_priv *priv = netdev_priv(netdev); + bool np_link_fail; seq_printf(s, "event handling state: %s\n", state_str_true_false(priv, HBG_NIC_STATE_EVENT_HANDLING)); @@ -117,8 +118,10 @@ static int hbg_dbg_nic_state(struct seq_file *s, void *unused) reset_type_str[priv->reset_type]); seq_printf(s, "need reset state: %s\n", state_str_true_false(priv, HBG_NIC_STATE_NEED_RESET)); - seq_printf(s, "np_link fail state: %s\n", - state_str_true_false(priv, HBG_NIC_STATE_NP_LINK_FAIL)); + + np_link_fail = !hbg_reg_read_field(priv, HBG_REG_AN_NEG_STATE_ADDR, + HBG_REG_AN_NEG_STATE_NP_LINK_OK_B); + seq_printf(s, "np_link fail state: %s\n", str_true_false(np_link_fail)); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c index d61c03f34ff057..f23fb5920c3cca 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c @@ -234,7 +234,7 @@ static u64 hbg_get_irq_stats(struct hbg_vector *vectors, u32 mask) for (i = 0; i < vectors->info_array_len; i++) if (vectors->info_array[i].mask == mask) - return vectors->info_array[i].count; + return vectors->stats_array[i]; return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 4e8cb66f601c0e..ff3295b60a69a8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -26,12 +26,15 @@ static void hbg_restore_mac_table(struct hbg_priv *priv) static void hbg_restore_user_def_settings(struct hbg_priv *priv) { + /* The index of host mac is always 0. */ + u64 rx_pause_addr = ether_addr_to_u64(priv->filter.mac_table[0].addr); struct ethtool_pauseparam *pause_param = &priv->user_def.pause_param; hbg_restore_mac_table(priv); hbg_hw_set_mtu(priv, priv->netdev->mtu); hbg_hw_set_pause_enable(priv, pause_param->tx_pause, pause_param->rx_pause); + hbg_hw_set_rx_pause_mac_addr(priv, rx_pause_addr); } int hbg_rebuild(struct hbg_priv *priv) @@ -58,6 +61,8 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) return -EBUSY; } + netif_device_detach(priv->netdev); + priv->reset_type = type; set_bit(HBG_NIC_STATE_RESETTING, &priv->state); clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); @@ -88,6 +93,8 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) return ret; } + netif_device_attach(priv->netdev); + dev_info(&priv->pdev->dev, "reset done\n"); return ret; } @@ -114,16 +121,13 @@ void hbg_err_reset(struct hbg_priv *priv) if (running) dev_close(priv->netdev); - hbg_reset(priv); - - /* in hbg_pci_err_detected(), we will detach first, - * so we need to attach before open - */ - if (!netif_device_present(priv->netdev)) - netif_device_attach(priv->netdev); + if (hbg_reset(priv)) + goto err_unlock; if (running) dev_open(priv->netdev, NULL); + +err_unlock: rtnl_unlock(); } @@ -157,7 +161,6 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); hbg_err_reset(priv); - netif_device_attach(netdev); return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index 8f1107b85fbb09..55520053270a5c 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -317,6 +317,9 @@ static void hbg_update_stats_by_info(struct hbg_priv *priv, const struct hbg_ethtool_stats *stats; u32 i; + if (test_bit(HBG_NIC_STATE_RESETTING, &priv->state)) + return; + for (i = 0; i < info_len; i++) { stats = &info[i]; if (!stats->reg) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 74a18033b44412..9b65eef62b3fba 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -234,6 +234,10 @@ void hbg_hw_set_mac_filter_enable(struct hbg_priv *priv, u32 enable) { hbg_reg_write_field(priv, HBG_REG_REC_FILT_CTRL_ADDR, HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B, enable); + + /* only uc filter is supported, so set all bits of mc mask reg to 1 */ + hbg_reg_write64(priv, HBG_REG_STATION_ADDR_LOW_MSK_0, U64_MAX); + hbg_reg_write64(priv, HBG_REG_STATION_ADDR_LOW_MSK_1, U64_MAX); } void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en) @@ -242,6 +246,9 @@ void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en) HBG_REG_PAUSE_ENABLE_TX_B, tx_en); hbg_reg_write_field(priv, HBG_REG_PAUSE_ENABLE_ADDR, HBG_REG_PAUSE_ENABLE_RX_B, rx_en); + + hbg_reg_write_field(priv, HBG_REG_REC_FILT_CTRL_ADDR, + HBG_REG_REC_FILT_CTRL_PAUSE_FRM_PASS_B, rx_en); } void hbg_hw_get_pause_enable(struct hbg_priv *priv, u32 *tx_en, u32 *rx_en) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c index e79e9ab3e5308e..8af0bc4cca2166 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c @@ -6,7 +6,7 @@ #include "hbg_hw.h" static void hbg_irq_handle_err(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { if (irq_info->need_print) dev_err(&priv->pdev->dev, @@ -17,30 +17,30 @@ static void hbg_irq_handle_err(struct hbg_priv *priv, } static void hbg_irq_handle_tx(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { napi_schedule(&priv->tx_ring.napi); } static void hbg_irq_handle_rx(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { napi_schedule(&priv->rx_ring.napi); } static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { priv->stats.rx_fifo_less_empty_thrsld_cnt++; } #define HBG_IRQ_I(name, handle) \ - {#name, HBG_INT_MSK_##name##_B, false, false, false, 0, handle} + {#name, HBG_INT_MSK_##name##_B, false, false, false, handle} #define HBG_ERR_IRQ_I(name, need_print, ndde_reset) \ {#name, HBG_INT_MSK_##name##_B, true, need_print, \ - ndde_reset, 0, hbg_irq_handle_err} + ndde_reset, hbg_irq_handle_err} -static struct hbg_irq_info hbg_irqs[] = { +static const struct hbg_irq_info hbg_irqs[] = { HBG_IRQ_I(RX, hbg_irq_handle_rx), HBG_IRQ_I(TX, hbg_irq_handle_tx), HBG_ERR_IRQ_I(TX_PKT_CPL, true, true), @@ -64,7 +64,7 @@ static struct hbg_irq_info hbg_irqs[] = { static irqreturn_t hbg_irq_handle(int irq_num, void *p) { - struct hbg_irq_info *info; + const struct hbg_irq_info *info; struct hbg_priv *priv = p; u32 status; u32 i; @@ -79,7 +79,7 @@ static irqreturn_t hbg_irq_handle(int irq_num, void *p) hbg_hw_irq_enable(priv, info->mask, false); hbg_hw_irq_clear(priv, info->mask); - info->count++; + priv->vectors.stats_array[i]++; if (info->irq_handle) info->irq_handle(priv, info); @@ -132,6 +132,12 @@ int hbg_irq_init(struct hbg_priv *priv) irq_names_map[i]); } + vectors->stats_array = devm_kcalloc(&priv->pdev->dev, + ARRAY_SIZE(hbg_irqs), + sizeof(u64), GFP_KERNEL); + if (!vectors->stats_array) + return -ENOMEM; + vectors->info_array = hbg_irqs; vectors->info_array_len = ARRAY_SIZE(hbg_irqs); return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 2ac5454338e4de..2e64dc1ab355e3 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -21,7 +21,7 @@ static void hbg_all_irq_enable(struct hbg_priv *priv, bool enabled) { - struct hbg_irq_info *info; + const struct hbg_irq_info *info; u32 i; for (i = 0; i < priv->vectors.info_array_len; i++) { @@ -203,12 +203,12 @@ static int hbg_net_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) return -EBUSY; - hbg_hw_set_mtu(priv, new_mtu); - WRITE_ONCE(netdev->mtu, new_mtu); - dev_dbg(&priv->pdev->dev, "change mtu from %u to %u\n", netdev->mtu, new_mtu); + hbg_hw_set_mtu(priv, new_mtu); + WRITE_ONCE(netdev->mtu, new_mtu); + return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index f29a937ad0879f..42b0083c9193f2 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -2,6 +2,7 @@ // Copyright (c) 2024 Hisilicon Limited. #include +#include #include "hbg_common.h" #include "hbg_hw.h" #include "hbg_mdio.h" @@ -133,12 +134,17 @@ void hbg_fix_np_link_fail(struct hbg_priv *priv) { struct device *dev = &priv->pdev->dev; + rtnl_lock(); + if (priv->stats.np_link_fail_cnt >= HBG_NP_LINK_FAIL_RETRY_TIMES) { dev_err(dev, "failed to fix the MAC link status\n"); priv->stats.np_link_fail_cnt = 0; - return; + goto unlock; } + if (!priv->mac.phydev->link) + goto unlock; + priv->stats.np_link_fail_cnt++; dev_err(dev, "failed to link between MAC and PHY, try to fix...\n"); @@ -147,6 +153,9 @@ void hbg_fix_np_link_fail(struct hbg_priv *priv) */ hbg_phy_stop(priv); hbg_phy_start(priv); + +unlock: + rtnl_unlock(); } static void hbg_phy_adjust_link(struct net_device *netdev) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index cc2cc612770d7c..a6e7f5e62b48aa 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -68,6 +68,7 @@ #define HBG_REG_TRANSMIT_CTRL_AN_EN_B BIT(5) #define HBG_REG_REC_FILT_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x0064) #define HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B BIT(0) +#define HBG_REG_REC_FILT_CTRL_PAUSE_FRM_PASS_B BIT(4) #define HBG_REG_RX_OCTETS_TOTAL_OK_ADDR (HBG_REG_SGMII_BASE + 0x0080) #define HBG_REG_RX_OCTETS_BAD_ADDR (HBG_REG_SGMII_BASE + 0x0084) #define HBG_REG_RX_UC_PKTS_ADDR (HBG_REG_SGMII_BASE + 0x0088) @@ -134,6 +135,8 @@ #define HBG_REG_STATION_ADDR_HIGH_4_ADDR (HBG_REG_SGMII_BASE + 0x0224) #define HBG_REG_STATION_ADDR_LOW_5_ADDR (HBG_REG_SGMII_BASE + 0x0228) #define HBG_REG_STATION_ADDR_HIGH_5_ADDR (HBG_REG_SGMII_BASE + 0x022C) +#define HBG_REG_STATION_ADDR_LOW_MSK_0 (HBG_REG_SGMII_BASE + 0x0230) +#define HBG_REG_STATION_ADDR_LOW_MSK_1 (HBG_REG_SGMII_BASE + 0x0238) /* PCU */ #define HBG_REG_TX_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0420) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 09749e9f739868..4e5d8bc39a1bf3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -61,7 +61,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tm_qset", .cmd = HNAE3_DBG_CMD_TM_QSET, .dentry = HNS3_DBG_DENTRY_TM, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 9ff797fb36c456..b03b8758c7774e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -473,20 +473,14 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector, writel(mask_en, tqp_vector->mask_addr); } -static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_enable(struct hns3_enet_tqp_vector *tqp_vector) { napi_enable(&tqp_vector->napi); enable_irq(tqp_vector->vector_irq); - - /* enable vector */ - hns3_mask_vector_irq(tqp_vector, 1); } -static void hns3_vector_disable(struct hns3_enet_tqp_vector *tqp_vector) +static void hns3_irq_disable(struct hns3_enet_tqp_vector *tqp_vector) { - /* disable vector */ - hns3_mask_vector_irq(tqp_vector, 0); - disable_irq(tqp_vector->vector_irq); napi_disable(&tqp_vector->napi); cancel_work_sync(&tqp_vector->rx_group.dim.work); @@ -707,11 +701,42 @@ static int hns3_set_rx_cpu_rmap(struct net_device *netdev) return 0; } +static void hns3_enable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_enable(&priv->tqp_vector[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 1); + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_enable(h->kinfo.tqp[i]); +} + +static void hns3_disable_irqs_and_tqps(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + u16 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) + hns3_tqp_disable(h->kinfo.tqp[i]); + + for (i = 0; i < priv->vector_num; i++) + hns3_mask_vector_irq(&priv->tqp_vector[i], 0); + + for (i = 0; i < priv->vector_num; i++) + hns3_irq_disable(&priv->tqp_vector[i]); +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; - int i, j; int ret; ret = hns3_nic_reset_all_ring(h); @@ -720,23 +745,13 @@ static int hns3_nic_net_up(struct net_device *netdev) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - /* enable the vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - /* enable rcb */ - for (j = 0; j < h->kinfo.num_tqps; j++) - hns3_tqp_enable(h->kinfo.tqp[j]); + hns3_enable_irqs_and_tqps(netdev); /* start the ae_dev */ ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0; if (ret) { set_bit(HNS3_NIC_STATE_DOWN, &priv->state); - while (j--) - hns3_tqp_disable(h->kinfo.tqp[j]); - - for (j = i - 1; j >= 0; j--) - hns3_vector_disable(&priv->tqp_vector[j]); + hns3_disable_irqs_and_tqps(netdev); } return ret; @@ -823,17 +838,9 @@ static void hns3_reset_tx_queue(struct hnae3_handle *h) static void hns3_nic_net_down(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops; - int i; - /* disable vectors */ - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - /* disable rcb */ - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(netdev); /* stop ae_dev */ ops = priv->ae_handle->ae_algo->ops; @@ -5864,8 +5871,6 @@ int hns3_set_channels(struct net_device *netdev, void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5876,11 +5881,7 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running) netif_carrier_off(ndev); netif_tx_disable(ndev); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_disable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_disable(h->kinfo.tqp[i]); + hns3_disable_irqs_and_tqps(ndev); /* delay ring buffer clearing to hns3_reset_notify_uninit_enet * during reset process, because driver may not be able @@ -5896,7 +5897,6 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) { struct hns3_nic_priv *priv = netdev_priv(ndev); struct hnae3_handle *h = priv->ae_handle; - int i; if (!if_running) return; @@ -5912,11 +5912,7 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running) clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); - for (i = 0; i < priv->vector_num; i++) - hns3_vector_enable(&priv->tqp_vector[i]); - - for (i = 0; i < h->kinfo.num_tqps; i++) - hns3_tqp_enable(h->kinfo.tqp[i]); + hns3_enable_irqs_and_tqps(ndev); netif_tx_wake_all_queues(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 59cc9221185f28..ec581d4b696f59 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -440,6 +440,13 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) ptp->info.settime64 = hclge_ptp_settime; ptp->info.n_alarm = 0; + + spin_lock_init(&ptp->lock); + ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; + ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; + ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; + hdev->ptp = ptp; + ptp->clock = ptp_clock_register(&ptp->info, &hdev->pdev->dev); if (IS_ERR(ptp->clock)) { dev_err(&hdev->pdev->dev, @@ -451,12 +458,6 @@ static int hclge_ptp_create_clock(struct hclge_dev *hdev) return -ENODEV; } - spin_lock_init(&ptp->lock); - ptp->io_base = hdev->hw.hw.io_base + HCLGE_PTP_REG_OFFSET; - ptp->ts_cfg.rx_filter = HWTSTAMP_FILTER_NONE; - ptp->ts_cfg.tx_type = HWTSTAMP_TX_OFF; - hdev->ptp = ptp; - return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 9ba767740a043f..dada42e7e0ec96 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1292,9 +1292,8 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) rtnl_unlock(); } -static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +static int hclgevf_en_hw_strip_rxvtag_cmd(struct hclgevf_dev *hdev, bool enable) { - struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN, @@ -1303,6 +1302,19 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } +static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + + ret = hclgevf_en_hw_strip_rxvtag_cmd(hdev, enable); + if (ret) + return ret; + + hdev->rxvtag_strip_en = enable; + return 0; +} + static int hclgevf_reset_tqp(struct hnae3_handle *handle) { #define HCLGEVF_RESET_ALL_QUEUE_DONE 1U @@ -2204,12 +2216,13 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) tc_valid, tc_size); } -static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) +static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev, + bool rxvtag_strip_en) { struct hnae3_handle *nic = &hdev->nic; int ret; - ret = hclgevf_en_hw_strip_rxvtag(nic, true); + ret = hclgevf_en_hw_strip_rxvtag(nic, rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed to enable rx vlan offload, ret = %d\n", ret); @@ -2879,7 +2892,7 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, hdev->rxvtag_strip_en); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); @@ -2994,7 +3007,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; } - ret = hclgevf_init_vlan_config(hdev); + ret = hclgevf_init_vlan_config(hdev, true); if (ret) { dev_err(&hdev->pdev->dev, "failed(%d) to initialize VLAN config\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index cccef32284616b..0208425ab594f5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -253,6 +253,7 @@ struct hclgevf_dev { int *vector_irq; bool gro_en; + bool rxvtag_strip_en; unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)]; diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3f089c3d47b23b..d8595e84326dbc 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -477,10 +477,6 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) cancel_delayed_work_sync(&adapter->phy_info_task); cancel_delayed_work_sync(&adapter->fifo_stall_task); - - /* Only kill reset task if adapter is not resetting */ - if (!test_bit(__E1000_RESETTING, &adapter->flags)) - cancel_work_sync(&adapter->reset_task); } void e1000_down(struct e1000_adapter *adapter) @@ -1266,6 +1262,10 @@ static void e1000_remove(struct pci_dev *pdev) unregister_netdev(netdev); + /* Only kill reset task if adapter is not resetting */ + if (!test_bit(__E1000_RESETTING, &adapter->flags)) + cancel_work_sync(&adapter->reset_task); + e1000_phy_hw_reset(hw); kfree(adapter->tx_ring); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1120f8e4bb6703..88e6bef69342c2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1546,8 +1546,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * Returns true if the VF is in reset, resets successfully, or resets - * are disabled and false otherwise. + * Return: True if reset was performed successfully or if resets are disabled. + * False if reset is already in progress. **/ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { @@ -1566,7 +1566,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) /* If VF is being reset already we don't need to continue. */ if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - return true; + return false; i40e_trigger_vf_reset(vf, flr); @@ -4328,7 +4328,10 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx)); if (reg & BIT(bit_idx)) /* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */ - i40e_reset_vf(vf, true); + if (!i40e_reset_vf(vf, true)) { + /* At least one VF did not finish resetting, retry next time */ + set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); + } } return 0; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 9de3e0ba37316c..f7a98ff43a57fb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -268,7 +268,6 @@ struct iavf_adapter { struct list_head vlan_filter_list; int num_vlan_filters; struct list_head mac_filter_list; - struct mutex crit_lock; /* Lock to protect accesses to MAC and VLAN lists */ spinlock_t mac_vlan_list_lock; char misc_vector_name[IFNAMSIZ + 9]; diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 288bb5b2e72ef7..2b2b315205b5e0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -4,6 +4,8 @@ #include #include +#include + /* ethtool support for iavf */ #include "iavf.h" @@ -1256,9 +1258,10 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx { struct ethtool_rx_flow_spec *fsp = &cmd->fs; struct iavf_fdir_fltr *fltr; - int count = 50; int err; + netdev_assert_locked(adapter->netdev); + if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED)) return -EOPNOTSUPP; @@ -1277,14 +1280,6 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (!fltr) return -ENOMEM; - while (!mutex_trylock(&adapter->crit_lock)) { - if (--count == 0) { - kfree(fltr); - return -EINVAL; - } - udelay(1); - } - err = iavf_add_fdir_fltr_info(adapter, fsp, fltr); if (!err) err = iavf_fdir_add_fltr(adapter, fltr); @@ -1292,7 +1287,6 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (err) kfree(fltr); - mutex_unlock(&adapter->crit_lock); return err; } @@ -1435,11 +1429,13 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, { struct iavf_adv_rss *rss_old, *rss_new; bool rss_new_add = false; - int count = 50, err = 0; bool symm = false; u64 hash_flds; + int err = 0; u32 hdrs; + netdev_assert_locked(adapter->netdev); + if (!ADV_RSS_SUPPORT(adapter)) return -EOPNOTSUPP; @@ -1463,15 +1459,6 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, return -EINVAL; } - while (!mutex_trylock(&adapter->crit_lock)) { - if (--count == 0) { - kfree(rss_new); - return -EINVAL; - } - - udelay(1); - } - spin_lock_bh(&adapter->adv_rss_lock); rss_old = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs); if (rss_old) { @@ -1500,8 +1487,6 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, if (!err) iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_ADV_RSS_CFG); - mutex_unlock(&adapter->crit_lock); - if (!rss_new_add) kfree(rss_new); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 6d7ba4d67a1933..81d7249d1149c8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1287,11 +1287,11 @@ static void iavf_configure(struct iavf_adapter *adapter) /** * iavf_up_complete - Finish the last steps of bringing up a connection * @adapter: board private structure - * - * Expects to be called while holding crit_lock. - **/ + */ static void iavf_up_complete(struct iavf_adapter *adapter) { + netdev_assert_locked(adapter->netdev); + iavf_change_state(adapter, __IAVF_RUNNING); clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state); @@ -1410,13 +1410,13 @@ static void iavf_clear_adv_rss_conf(struct iavf_adapter *adapter) /** * iavf_down - Shutdown the connection processing * @adapter: board private structure - * - * Expects to be called while holding crit_lock. - **/ + */ void iavf_down(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + netdev_assert_locked(netdev); + if (adapter->state <= __IAVF_DOWN_PENDING) return; @@ -2025,22 +2025,21 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni * iavf_finish_config - do all netdev work that needs RTNL * @work: our work_struct * - * Do work that needs both RTNL and crit_lock. - **/ + * Do work that needs RTNL. + */ static void iavf_finish_config(struct work_struct *work) { struct iavf_adapter *adapter; - bool locks_released = false; + bool netdev_released = false; int pairs, err; adapter = container_of(work, struct iavf_adapter, finish_config); /* Always take RTNL first to prevent circular lock dependency; - * The dev->lock is needed to update the queue number + * the dev->lock (== netdev lock) is needed to update the queue number. */ rtnl_lock(); netdev_lock(adapter->netdev); - mutex_lock(&adapter->crit_lock); if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && adapter->netdev->reg_state == NETREG_REGISTERED && @@ -2059,22 +2058,21 @@ static void iavf_finish_config(struct work_struct *work) netif_set_real_num_tx_queues(adapter->netdev, pairs); if (adapter->netdev->reg_state != NETREG_REGISTERED) { - mutex_unlock(&adapter->crit_lock); netdev_unlock(adapter->netdev); - locks_released = true; + netdev_released = true; err = register_netdevice(adapter->netdev); if (err) { dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n", err); /* go back and try again.*/ - mutex_lock(&adapter->crit_lock); + netdev_lock(adapter->netdev); iavf_free_rss(adapter); iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); - mutex_unlock(&adapter->crit_lock); + netdev_unlock(adapter->netdev); goto out; } } @@ -2090,10 +2088,8 @@ static void iavf_finish_config(struct work_struct *work) } out: - if (!locks_released) { - mutex_unlock(&adapter->crit_lock); + if (!netdev_released) netdev_unlock(adapter->netdev); - } rtnl_unlock(); } @@ -2911,28 +2907,15 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) iavf_change_state(adapter, __IAVF_INIT_FAILED); } -/** - * iavf_watchdog_task - Periodic call-back task - * @work: pointer to work_struct - **/ -static void iavf_watchdog_task(struct work_struct *work) +static const int IAVF_NO_RESCHED = -1; + +/* return: msec delay for requeueing itself */ +static int iavf_watchdog_step(struct iavf_adapter *adapter) { - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - watchdog_task.work); - struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; u32 reg_val; - netdev_lock(netdev); - if (!mutex_trylock(&adapter->crit_lock)) { - if (adapter->state == __IAVF_REMOVE) { - netdev_unlock(netdev); - return; - } - - goto restart_watchdog; - } + netdev_assert_locked(adapter->netdev); if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); @@ -2940,39 +2923,19 @@ static void iavf_watchdog_task(struct work_struct *work) switch (adapter->state) { case __IAVF_STARTUP: iavf_startup(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(30)); - return; + return 30; case __IAVF_INIT_VERSION_CHECK: iavf_init_version_check(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(30)); - return; + return 30; case __IAVF_INIT_GET_RESOURCES: iavf_init_get_resources(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(1)); - return; + return 1; case __IAVF_INIT_EXTENDED_CAPS: iavf_init_process_extended_caps(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(1)); - return; + return 1; case __IAVF_INIT_CONFIG_ADAPTER: iavf_init_config_adapter(adapter); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(1)); - return; + return 1; case __IAVF_INIT_FAILED: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { @@ -2980,27 +2943,18 @@ static void iavf_watchdog_task(struct work_struct *work) * watchdog task, iavf_remove should handle this state * as it can loop forever */ - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - return; + return IAVF_NO_RESCHED; } if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { dev_err(&adapter->pdev->dev, "Failed to communicate with PF; waiting before retry\n"); adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; iavf_shutdown_adminq(hw); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, - &adapter->watchdog_task, (5 * HZ)); - return; + return 5000; } /* Try again from failed step*/ iavf_change_state(adapter, adapter->last_state); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ); - return; + return 1000; case __IAVF_COMM_FAILED: if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { @@ -3010,9 +2964,7 @@ static void iavf_watchdog_task(struct work_struct *work) */ iavf_change_state(adapter, __IAVF_INIT_FAILED); adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - return; + return IAVF_NO_RESCHED; } reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; @@ -3030,18 +2982,9 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, - &adapter->watchdog_task, - msecs_to_jiffies(10)); - return; + return 10; case __IAVF_RESETTING: - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - HZ * 2); - return; + return 2000; case __IAVF_DOWN: case __IAVF_DOWN_PENDING: case __IAVF_TESTING: @@ -3068,9 +3011,7 @@ static void iavf_watchdog_task(struct work_struct *work) break; case __IAVF_REMOVE: default: - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - return; + return IAVF_NO_RESCHED; } /* check for hw reset */ @@ -3080,24 +3021,29 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); - mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); - queue_delayed_work(adapter->wq, - &adapter->watchdog_task, HZ * 2); - return; } - mutex_unlock(&adapter->crit_lock); -restart_watchdog: - netdev_unlock(netdev); + return adapter->aq_required ? 20 : 2000; +} + +static void iavf_watchdog_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + watchdog_task.work); + struct net_device *netdev = adapter->netdev; + int msec_delay; + + netdev_lock(netdev); + msec_delay = iavf_watchdog_step(adapter); + /* note that we schedule a different task */ if (adapter->state >= __IAVF_DOWN) queue_work(adapter->wq, &adapter->adminq_task); - if (adapter->aq_required) - queue_delayed_work(adapter->wq, &adapter->watchdog_task, - msecs_to_jiffies(20)); - else + + if (msec_delay != IAVF_NO_RESCHED) queue_delayed_work(adapter->wq, &adapter->watchdog_task, - HZ * 2); + msecs_to_jiffies(msec_delay)); + netdev_unlock(netdev); } /** @@ -3105,14 +3051,15 @@ static void iavf_watchdog_task(struct work_struct *work) * @adapter: board private structure * * Set communication failed flag and free all resources. - * NOTE: This function is expected to be called with crit_lock being held. - **/ + */ static void iavf_disable_vf(struct iavf_adapter *adapter) { struct iavf_mac_filter *f, *ftmp; struct iavf_vlan_filter *fv, *fvtmp; struct iavf_cloud_filter *cf, *cftmp; + netdev_assert_locked(adapter->netdev); + adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; /* We don't use netif_running() because it may be true prior to @@ -3212,17 +3159,7 @@ static void iavf_reset_task(struct work_struct *work) int i = 0, err; bool running; - /* When device is being removed it doesn't make sense to run the reset - * task, just return in such a case. - */ netdev_lock(netdev); - if (!mutex_trylock(&adapter->crit_lock)) { - if (adapter->state != __IAVF_REMOVE) - queue_work(adapter->wq, &adapter->reset_task); - - netdev_unlock(netdev); - return; - } iavf_misc_irq_disable(adapter); if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { @@ -3267,12 +3204,22 @@ static void iavf_reset_task(struct work_struct *work) dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", reg_val); iavf_disable_vf(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); return; /* Do not attempt to reinit. It's dead, Jim. */ } continue_reset: + /* If we are still early in the state machine, just restart. */ + if (adapter->state <= __IAVF_INIT_FAILED) { + iavf_shutdown_adminq(hw); + iavf_change_state(adapter, __IAVF_STARTUP); + iavf_startup(adapter); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + netdev_unlock(netdev); + return; + } + /* We don't use netif_running() because it may be true prior to * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. @@ -3411,7 +3358,6 @@ static void iavf_reset_task(struct work_struct *work) adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; wake_up(&adapter->reset_waitqueue); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); return; @@ -3422,7 +3368,6 @@ static void iavf_reset_task(struct work_struct *work) } iavf_disable_vf(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); } @@ -3435,6 +3380,7 @@ static void iavf_adminq_task(struct work_struct *work) { struct iavf_adapter *adapter = container_of(work, struct iavf_adapter, adminq_task); + struct net_device *netdev = adapter->netdev; struct iavf_hw *hw = &adapter->hw; struct iavf_arq_event_info event; enum virtchnl_ops v_op; @@ -3442,13 +3388,7 @@ static void iavf_adminq_task(struct work_struct *work) u32 val, oldval; u16 pending; - if (!mutex_trylock(&adapter->crit_lock)) { - if (adapter->state == __IAVF_REMOVE) - return; - - queue_work(adapter->wq, &adapter->adminq_task); - goto out; - } + netdev_lock(netdev); if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) goto unlock; @@ -3515,8 +3455,7 @@ static void iavf_adminq_task(struct work_struct *work) freedom: kfree(event.msg_buf); unlock: - mutex_unlock(&adapter->crit_lock); -out: + netdev_unlock(netdev); /* re-enable Admin queue interrupt cause */ iavf_misc_irq_enable(adapter); } @@ -4209,8 +4148,8 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, struct flow_cls_offload *cls_flower) { int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); - struct iavf_cloud_filter *filter = NULL; - int err = -EINVAL, count = 50; + struct iavf_cloud_filter *filter; + int err; if (tc < 0) { dev_err(&adapter->pdev->dev, "Invalid traffic class\n"); @@ -4220,17 +4159,10 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, filter = kzalloc(sizeof(*filter), GFP_KERNEL); if (!filter) return -ENOMEM; - - while (!mutex_trylock(&adapter->crit_lock)) { - if (--count == 0) { - kfree(filter); - return err; - } - udelay(1); - } - filter->cookie = cls_flower->cookie; + netdev_lock(adapter->netdev); + /* bail out here if filter already exists */ spin_lock_bh(&adapter->cloud_filter_list_lock); if (iavf_find_cf(adapter, &cls_flower->cookie)) { @@ -4264,7 +4196,7 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, if (err) kfree(filter); - mutex_unlock(&adapter->crit_lock); + netdev_unlock(adapter->netdev); return err; } @@ -4568,28 +4500,13 @@ static int iavf_open(struct net_device *netdev) return -EIO; } - while (!mutex_trylock(&adapter->crit_lock)) { - /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock - * is already taken and iavf_open is called from an upper - * device's notifier reacting on NETDEV_REGISTER event. - * We have to leave here to avoid dead lock. - */ - if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) - return -EBUSY; - - usleep_range(500, 1000); - } - - if (adapter->state != __IAVF_DOWN) { - err = -EBUSY; - goto err_unlock; - } + if (adapter->state != __IAVF_DOWN) + return -EBUSY; if (adapter->state == __IAVF_RUNNING && !test_bit(__IAVF_VSI_DOWN, adapter->vsi.state)) { dev_dbg(&adapter->pdev->dev, "VF is already open.\n"); - err = 0; - goto err_unlock; + return 0; } /* allocate transmit descriptors */ @@ -4608,9 +4525,7 @@ static int iavf_open(struct net_device *netdev) goto err_req_irq; spin_lock_bh(&adapter->mac_vlan_list_lock); - iavf_add_filter(adapter, adapter->hw.mac.addr); - spin_unlock_bh(&adapter->mac_vlan_list_lock); /* Restore filters that were removed with IFF_DOWN */ @@ -4623,8 +4538,6 @@ static int iavf_open(struct net_device *netdev) iavf_irq_enable(adapter, true); - mutex_unlock(&adapter->crit_lock); - return 0; err_req_irq: @@ -4634,8 +4547,6 @@ static int iavf_open(struct net_device *netdev) iavf_free_all_rx_resources(adapter); err_setup_tx: iavf_free_all_tx_resources(adapter); -err_unlock: - mutex_unlock(&adapter->crit_lock); return err; } @@ -4659,12 +4570,8 @@ static int iavf_close(struct net_device *netdev) netdev_assert_locked(netdev); - mutex_lock(&adapter->crit_lock); - - if (adapter->state <= __IAVF_DOWN_PENDING) { - mutex_unlock(&adapter->crit_lock); + if (adapter->state <= __IAVF_DOWN_PENDING) return 0; - } set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); /* We cannot send IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS before @@ -4695,7 +4602,6 @@ static int iavf_close(struct net_device *netdev) iavf_change_state(adapter, __IAVF_DOWN_PENDING); iavf_free_traffic_irqs(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); /* We explicitly don't free resources here because the hardware is @@ -4714,11 +4620,10 @@ static int iavf_close(struct net_device *netdev) msecs_to_jiffies(500)); if (!status) netdev_warn(netdev, "Device resources not yet released\n"); - netdev_lock(netdev); - mutex_lock(&adapter->crit_lock); + adapter->aq_required |= aq_to_restore; - mutex_unlock(&adapter->crit_lock); + return 0; } @@ -5227,15 +5132,16 @@ iavf_shaper_set(struct net_shaper_binding *binding, struct iavf_adapter *adapter = netdev_priv(binding->netdev); const struct net_shaper_handle *handle = &shaper->handle; struct iavf_ring *tx_ring; - int ret = 0; + int ret; + + netdev_assert_locked(adapter->netdev); - mutex_lock(&adapter->crit_lock); if (handle->id >= adapter->num_active_queues) - goto unlock; + return 0; ret = iavf_verify_shaper(binding, shaper, extack); if (ret) - goto unlock; + return ret; tx_ring = &adapter->tx_rings[handle->id]; @@ -5245,9 +5151,7 @@ iavf_shaper_set(struct net_shaper_binding *binding, adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; -unlock: - mutex_unlock(&adapter->crit_lock); - return ret; + return 0; } static int iavf_shaper_del(struct net_shaper_binding *binding, @@ -5257,9 +5161,10 @@ static int iavf_shaper_del(struct net_shaper_binding *binding, struct iavf_adapter *adapter = netdev_priv(binding->netdev); struct iavf_ring *tx_ring; - mutex_lock(&adapter->crit_lock); + netdev_assert_locked(adapter->netdev); + if (handle->id >= adapter->num_active_queues) - goto unlock; + return 0; tx_ring = &adapter->tx_rings[handle->id]; tx_ring->q_shaper.bw_min = 0; @@ -5268,8 +5173,6 @@ static int iavf_shaper_del(struct net_shaper_binding *binding, adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES_BW; -unlock: - mutex_unlock(&adapter->crit_lock); return 0; } @@ -5530,10 +5433,6 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_alloc_qos_cap; } - /* set up the locks for the AQ, do this only once in probe - * and destroy them only once in remove - */ - mutex_init(&adapter->crit_lock); mutex_init(&hw->aq.asq_mutex); mutex_init(&hw->aq.arq_mutex); @@ -5596,22 +5495,24 @@ static int iavf_suspend(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct iavf_adapter *adapter = netdev_priv(netdev); + bool running; netif_device_detach(netdev); + running = netif_running(netdev); + if (running) + rtnl_lock(); netdev_lock(netdev); - mutex_lock(&adapter->crit_lock); - if (netif_running(netdev)) { - rtnl_lock(); + if (running) iavf_down(adapter); - rtnl_unlock(); - } + iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); - mutex_unlock(&adapter->crit_lock); netdev_unlock(netdev); + if (running) + rtnl_unlock(); return 0; } @@ -5688,20 +5589,20 @@ static void iavf_remove(struct pci_dev *pdev) * There are flows where register/unregister netdev may race. */ while (1) { - mutex_lock(&adapter->crit_lock); + netdev_lock(netdev); if (adapter->state == __IAVF_RUNNING || adapter->state == __IAVF_DOWN || adapter->state == __IAVF_INIT_FAILED) { - mutex_unlock(&adapter->crit_lock); + netdev_unlock(netdev); break; } /* Simply return if we already went through iavf_shutdown */ if (adapter->state == __IAVF_REMOVE) { - mutex_unlock(&adapter->crit_lock); + netdev_unlock(netdev); return; } - mutex_unlock(&adapter->crit_lock); + netdev_unlock(netdev); usleep_range(500, 1000); } cancel_delayed_work_sync(&adapter->watchdog_task); @@ -5711,7 +5612,6 @@ static void iavf_remove(struct pci_dev *pdev) unregister_netdev(netdev); netdev_lock(netdev); - mutex_lock(&adapter->crit_lock); dev_info(&adapter->pdev->dev, "Removing device\n"); iavf_change_state(adapter, __IAVF_REMOVE); @@ -5727,9 +5627,11 @@ static void iavf_remove(struct pci_dev *pdev) iavf_misc_irq_disable(adapter); /* Shut down all the garbage mashers on the detention level */ + netdev_unlock(netdev); cancel_work_sync(&adapter->reset_task); cancel_delayed_work_sync(&adapter->watchdog_task); cancel_work_sync(&adapter->adminq_task); + netdev_lock(netdev); adapter->aq_required = 0; adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; @@ -5747,8 +5649,6 @@ static void iavf_remove(struct pci_dev *pdev) /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); mutex_destroy(&hw->aq.asq_mutex); - mutex_unlock(&adapter->crit_lock); - mutex_destroy(&adapter->crit_lock); netdev_unlock(netdev); iounmap(hw->hw_addr); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a6f0e5990be250..07f0d0a0f1e28a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -79,6 +79,23 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, return iavf_status_to_errno(status); received_op = (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + + if (received_op == VIRTCHNL_OP_EVENT) { + struct iavf_adapter *adapter = hw->back; + struct virtchnl_pf_event *vpe = + (struct virtchnl_pf_event *)event->msg_buf; + + if (vpe->event != VIRTCHNL_EVENT_RESET_IMPENDING) + continue; + + dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) + iavf_schedule_reset(adapter, + IAVF_FLAG_RESET_PENDING); + + return -EIO; + } + if (op_to_poll == received_op) break; } diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090ac..66e070095d1bbe 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include #include #include #include @@ -14,32 +13,16 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) - -static unsigned long ice_adapter_index(const struct pci_dev *pdev) +static unsigned long ice_adapter_index(u64 dsn) { - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - - switch (pdev->device) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); - default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); - } +#if BITS_PER_LONG == 64 + return dsn; +#else + return (u32)dsn ^ (u32)(dsn >> 32); +#endif } -static struct ice_adapter *ice_adapter_new(void) +static struct ice_adapter *ice_adapter_new(u64 dsn) { struct ice_adapter *adapter; @@ -47,6 +30,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->device_serial_number = dsn; spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +61,26 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->device_serial_number != dsn); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(dsn); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +97,13 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b384..ac15c0d2bc1a47 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @device_serial_number: DSN cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 device_serial_number; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */ diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 69d5b1a28491db..59323c019544fc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2345,15 +2345,15 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, cmd->set_flags |= ICE_AQC_TX_TOPO_FLAGS_SRC_RAM | ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW; - if (hw->mac_type == ICE_MAC_GENERIC_3K_E825) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); } else { ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_tx_topo); cmd->get_flags = ICE_AQC_TX_TOPO_GET_RAM; - } - if (hw->mac_type != ICE_MAC_GENERIC_3K_E825) - desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + if (hw->mac_type == ICE_MAC_E810 || + hw->mac_type == ICE_MAC_GENERIC) + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + } status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd); if (status) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 22371011c24928..2410aee59fb2d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -1321,12 +1321,18 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ if (!primary_lag) { lag->primary = true; + if (!ice_is_switchdev_running(lag->pf)) + return; + /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; } else { u16 swid; + if (!ice_is_switchdev_running(primary_lag->pf)) + return; + swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d390157b59fe18..82d472f1d781a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2740,6 +2740,27 @@ void ice_map_xdp_rings(struct ice_vsi *vsi) } } +/** + * ice_unmap_xdp_rings - Unmap XDP rings from interrupt vectors + * @vsi: the VSI with XDP rings being unmapped + */ +static void ice_unmap_xdp_rings(struct ice_vsi *vsi) +{ + int v_idx; + + ice_for_each_q_vector(vsi, v_idx) { + struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) + if (!ring->tx_buf || !ice_ring_is_xdp(ring)) + break; + + /* restore the value of last node prior to XDP setup */ + q_vector->tx.tx_ring = ring; + } +} + /** * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP * @vsi: VSI to bring up Tx rings used by XDP @@ -2803,7 +2824,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, if (status) { dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n", status); - goto clear_xdp_rings; + goto unmap_xdp_rings; } /* assign the prog only when it's not already present on VSI; @@ -2819,6 +2840,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, ice_vsi_assign_bpf_prog(vsi, prog); return 0; +unmap_xdp_rings: + ice_unmap_xdp_rings(vsi); clear_xdp_rings: ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { @@ -2835,6 +2858,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, mutex_unlock(&pf->avail_q_mutex); devm_kfree(dev, vsi->xdp_rings); + vsi->xdp_rings = NULL; + return -ENOMEM; } @@ -2850,7 +2875,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; - int i, v_idx; + int i; /* q_vectors are freed in reset path so there's no point in detaching * rings @@ -2858,17 +2883,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) if (cfg_type == ICE_XDP_CFG_PART) goto free_qmap; - ice_for_each_q_vector(vsi, v_idx) { - struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) - if (!ring->tx_buf || !ice_ring_is_xdp(ring)) - break; - - /* restore the value of last node prior to XDP setup */ - q_vector->tx.tx_ring = ring; - } + ice_unmap_xdp_rings(vsi); free_qmap: mutex_lock(&pf->avail_q_mutex); @@ -3013,11 +3028,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_vsi_determine_xdp_res(vsi); if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); + goto resume_if; } else { xdp_ring_err = ice_prepare_xdp_rings(vsi, prog, ICE_XDP_CFG_FULL); - if (xdp_ring_err) + if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + goto resume_if; + } } xdp_features_set_redirect_target(vsi->netdev, true); /* reallocate Rx queues that are used for zero-copy */ @@ -3035,6 +3053,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); } +resume_if: if (if_running) ret = ice_up(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 1fd1ae03eb9096..11ed48a62b5360 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2307,6 +2307,7 @@ static int ice_capture_crosststamp(ktime_t *device, ts = ((u64)ts_hi << 32) | ts_lo; system->cycles = ts; system->cs_id = CSID_X86_ART; + system->use_nsecs = true; /* Read Device source clock time */ ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 6ca13c5dcb14e7..d9d09296d1d481 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -84,6 +84,27 @@ ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid) return NULL; } +/** + * ice_sched_find_next_vsi_node - find the next node for a given VSI + * @vsi_node: VSI support node to start search with + * + * Return: Next VSI support node, or NULL. + * + * The function returns a pointer to the next node from the VSI layer + * assigned to the given VSI, or NULL if there is no such a node. + */ +static struct ice_sched_node * +ice_sched_find_next_vsi_node(struct ice_sched_node *vsi_node) +{ + unsigned int vsi_handle = vsi_node->vsi_handle; + + while ((vsi_node = vsi_node->sibling) != NULL) + if (vsi_node->vsi_handle == vsi_handle) + break; + + return vsi_node; +} + /** * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd * @hw: pointer to the HW struct @@ -1084,8 +1105,10 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi, if (parent->num_children < max_child_nodes) { new_num_nodes = max_child_nodes - parent->num_children; } else { - /* This parent is full, try the next sibling */ - parent = parent->sibling; + /* This parent is full, + * try the next available sibling. + */ + parent = ice_sched_find_next_vsi_node(parent); /* Don't modify the first node TEID memory if the * first node was added already in the above call. * Instead send some temp memory for all other @@ -1528,12 +1551,23 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc, /* get the first queue group node from VSI sub-tree */ qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); while (qgrp_node) { + struct ice_sched_node *next_vsi_node; + /* make sure the qgroup node is part of the VSI subtree */ if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node)) if (qgrp_node->num_children < max_children && qgrp_node->owner == owner) break; qgrp_node = qgrp_node->sibling; + if (qgrp_node) + continue; + + next_vsi_node = ice_sched_find_next_vsi_node(vsi_node); + if (!next_vsi_node) + break; + + vsi_node = next_vsi_node; + qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer); } /* Select the best queue group */ @@ -1604,16 +1638,16 @@ ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node, /** * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes * @hw: pointer to the HW struct - * @num_qs: number of queues + * @num_new_qs: number of new queues that will be added to the tree * @num_nodes: num nodes array * * This function calculates the number of VSI child nodes based on the * number of queues. */ static void -ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes) +ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_new_qs, u16 *num_nodes) { - u16 num = num_qs; + u16 num = num_new_qs; u8 i, qgl, vsil; qgl = ice_sched_get_qgrp_layer(hw); @@ -1779,7 +1813,11 @@ ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle, if (!parent) return -EIO; - if (i == vsil) + /* Do not modify the VSI handle for already existing VSI nodes, + * (if no new VSI node was added to the tree). + * Assign the VSI handle only to newly added VSI nodes. + */ + if (i == vsil && num_added) parent->vsi_handle = vsi_handle; } @@ -1812,6 +1850,41 @@ ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc) num_nodes); } +/** + * ice_sched_recalc_vsi_support_nodes - recalculate VSI support nodes count + * @hw: pointer to the HW struct + * @vsi_node: pointer to the leftmost VSI node that needs to be extended + * @new_numqs: new number of queues that has to be handled by the VSI + * @new_num_nodes: pointer to nodes count table to modify the VSI layer entry + * + * This function recalculates the number of supported nodes that need to + * be added after adding more Tx queues for a given VSI. + * The number of new VSI support nodes that shall be added will be saved + * to the @new_num_nodes table for the VSI layer. + */ +static void +ice_sched_recalc_vsi_support_nodes(struct ice_hw *hw, + struct ice_sched_node *vsi_node, + unsigned int new_numqs, u16 *new_num_nodes) +{ + u32 vsi_nodes_cnt = 1; + u32 max_queue_cnt = 1; + u32 qgl, vsil; + + qgl = ice_sched_get_qgrp_layer(hw); + vsil = ice_sched_get_vsi_layer(hw); + + for (u32 i = vsil; i <= qgl; i++) + max_queue_cnt *= hw->max_children[i]; + + while ((vsi_node = ice_sched_find_next_vsi_node(vsi_node)) != NULL) + vsi_nodes_cnt++; + + if (new_numqs > (max_queue_cnt * vsi_nodes_cnt)) + new_num_nodes[vsil] = DIV_ROUND_UP(new_numqs, max_queue_cnt) - + vsi_nodes_cnt; +} + /** * ice_sched_update_vsi_child_nodes - update VSI child nodes * @pi: port information structure @@ -1863,15 +1936,25 @@ ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle, return status; } - if (new_numqs) - ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes); - /* Keep the max number of queue configuration all the time. Update the - * tree only if number of queues > previous number of queues. This may + ice_sched_recalc_vsi_support_nodes(hw, vsi_node, + new_numqs, new_num_nodes); + ice_sched_calc_vsi_child_nodes(hw, new_numqs - prev_numqs, + new_num_nodes); + + /* Never decrease the number of queues in the tree. Update the tree + * only if number of queues > previous number of queues. This may * leave some extra nodes in the tree if number of queues < previous * number but that wouldn't harm anything. Removing those extra nodes * may complicate the code if those nodes are part of SRL or * individually rate limited. + * Also, add the required VSI support nodes if the existing ones cannot + * handle the requested new number of queues. */ + status = ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node, + new_num_nodes); + if (status) + return status; + status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node, new_num_nodes, owner); if (status) @@ -2012,6 +2095,58 @@ static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node) return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF); } +/** + * ice_sched_rm_vsi_subtree - remove all nodes assigned to a given VSI + * @pi: port information structure + * @vsi_node: pointer to the leftmost node of the VSI to be removed + * @owner: LAN or RDMA + * @tc: TC number + * + * Return: Zero in case of success, or -EBUSY if the VSI has leaf nodes in TC. + * + * This function removes all the VSI support nodes associated with a given VSI + * and its LAN or RDMA children nodes from the scheduler tree. + */ +static int +ice_sched_rm_vsi_subtree(struct ice_port_info *pi, + struct ice_sched_node *vsi_node, u8 owner, u8 tc) +{ + u16 vsi_handle = vsi_node->vsi_handle; + bool all_vsi_nodes_removed = true; + int j = 0; + + while (vsi_node) { + struct ice_sched_node *next_vsi_node; + + if (ice_sched_is_leaf_node_present(vsi_node)) { + ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", tc); + return -EBUSY; + } + while (j < vsi_node->num_children) { + if (vsi_node->children[j]->owner == owner) + ice_free_sched_node(pi, vsi_node->children[j]); + else + j++; + } + + next_vsi_node = ice_sched_find_next_vsi_node(vsi_node); + + /* remove the VSI if it has no children */ + if (!vsi_node->num_children) + ice_free_sched_node(pi, vsi_node); + else + all_vsi_nodes_removed = false; + + vsi_node = next_vsi_node; + } + + /* clean up aggregator related VSI info if any */ + if (all_vsi_nodes_removed) + ice_sched_rm_agg_vsi_info(pi, vsi_handle); + + return 0; +} + /** * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes * @pi: port information structure @@ -2038,7 +2173,6 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) ice_for_each_traffic_class(i) { struct ice_sched_node *vsi_node, *tc_node; - u8 j = 0; tc_node = ice_sched_get_tc_node(pi, i); if (!tc_node) @@ -2048,31 +2182,12 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner) if (!vsi_node) continue; - if (ice_sched_is_leaf_node_present(vsi_node)) { - ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i); - status = -EBUSY; + status = ice_sched_rm_vsi_subtree(pi, vsi_node, owner, i); + if (status) goto exit_sched_rm_vsi_cfg; - } - while (j < vsi_node->num_children) { - if (vsi_node->children[j]->owner == owner) { - ice_free_sched_node(pi, vsi_node->children[j]); - /* reset the counter again since the num - * children will be updated after node removal - */ - j = 0; - } else { - j++; - } - } - /* remove the VSI if it has no children */ - if (!vsi_node->num_children) { - ice_free_sched_node(pi, vsi_node); - vsi_ctx->sched.vsi_node[i] = NULL; + vsi_ctx->sched.vsi_node[i] = NULL; - /* clean up aggregator related VSI info if any */ - ice_sched_rm_agg_vsi_info(pi, vsi_handle); - } if (owner == ICE_SCHED_NODE_OWNER_LAN) vsi_ctx->sched.max_lanq[i] = 0; else diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 7c3006eb68dd07..6446d0fcc05286 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -4275,7 +4275,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) } ice_vfhw_mac_add(vf, &al->list[i]); - vf->num_mac++; break; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 7752920d7a8ee4..1cca9b2262e866 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -2097,6 +2097,11 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) pf = vf->pf; dev = ice_pf_to_dev(pf); vf_vsi = ice_get_vf_vsi(vf); + if (!vf_vsi) { + dev_err(dev, "Can not get FDIR vf_vsi for VF %u\n", vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err_exit; + } #define ICE_VF_MAX_FDIR_FILTERS 128 if (!ice_fdir_num_avail_fltr(&pf->hw, vf_vsi) || diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 66544faab710aa..70dbf80f3bb75b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -143,6 +143,7 @@ enum idpf_vport_state { * @vport_id: Vport identifier * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index + * @max_tx_hdr_size: Max header length hardware can support * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -153,6 +154,7 @@ struct idpf_netdev_priv { u32 vport_id; u32 link_speed_mbps; u16 vport_idx; + u16 max_tx_hdr_size; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; @@ -629,13 +631,13 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |\ VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6) -#define IDPF_CAP_RX_CSUM_L4V4 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP) +#define IDPF_CAP_TX_CSUM_L4V4 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP) -#define IDPF_CAP_RX_CSUM_L4V6 (\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) +#define IDPF_CAP_TX_CSUM_L4V6 (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP) #define IDPF_CAP_RX_CSUM (\ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |\ @@ -644,11 +646,9 @@ bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) -#define IDPF_CAP_SCTP_CSUM (\ +#define IDPF_CAP_TX_SCTP_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |\ - VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP) + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP) #define IDPF_CAP_TUNNEL_TX_CSUM (\ VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |\ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index aa755dedb41d98..2ed801398971cc 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -703,8 +703,10 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; struct idpf_vport_config *vport_config; + netdev_features_t other_offloads = 0; + netdev_features_t csum_offloads = 0; + netdev_features_t tso_offloads = 0; netdev_features_t dflt_features; - netdev_features_t offloads = 0; struct idpf_netdev_priv *np; struct net_device *netdev; u16 idx = vport->idx; @@ -721,6 +723,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport = vport; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); vport->netdev = netdev; return idpf_init_mac_addr(vport, netdev); @@ -738,6 +741,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->adapter = adapter; np->vport_idx = vport->idx; np->vport_id = vport->vport_id; + np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); spin_lock_init(&np->stats_lock); @@ -766,53 +770,32 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) dflt_features |= NETIF_F_RXHASH; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4)) - dflt_features |= NETIF_F_IP_CSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6)) - dflt_features |= NETIF_F_IPV6_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V4)) + csum_offloads |= NETIF_F_IP_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_CSUM_L4V6)) + csum_offloads |= NETIF_F_IPV6_CSUM; if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM)) - dflt_features |= NETIF_F_RXCSUM; - if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM)) - dflt_features |= NETIF_F_SCTP_CRC; + csum_offloads |= NETIF_F_RXCSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_TX_SCTP_CSUM)) + csum_offloads |= NETIF_F_SCTP_CRC; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP)) - dflt_features |= NETIF_F_TSO; + tso_offloads |= NETIF_F_TSO; if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP)) - dflt_features |= NETIF_F_TSO6; + tso_offloads |= NETIF_F_TSO6; if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_UDP | VIRTCHNL2_CAP_SEG_IPV6_UDP)) - dflt_features |= NETIF_F_GSO_UDP_L4; + tso_offloads |= NETIF_F_GSO_UDP_L4; if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC)) - offloads |= NETIF_F_GRO_HW; - /* advertise to stack only if offloads for encapsulated packets is - * supported - */ - if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS, - VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) { - offloads |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM | - NETIF_F_GSO_PARTIAL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_IPXIP4 | - NETIF_F_GSO_IPXIP6 | - 0; - - if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS, - IDPF_CAP_TUNNEL_TX_CSUM)) - netdev->gso_partial_features |= - NETIF_F_GSO_UDP_TUNNEL_CSUM; - - netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; - offloads |= NETIF_F_TSO_MANGLEID; - } + other_offloads |= NETIF_F_GRO_HW; if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK)) - offloads |= NETIF_F_LOOPBACK; + other_offloads |= NETIF_F_LOOPBACK; - netdev->features |= dflt_features; - netdev->hw_features |= dflt_features | offloads; - netdev->hw_enc_features |= dflt_features | offloads; + netdev->features |= dflt_features | csum_offloads | tso_offloads; + netdev->hw_features |= netdev->features | other_offloads; + netdev->vlan_features |= netdev->features | other_offloads; + netdev->hw_enc_features |= dflt_features | other_offloads; idpf_set_ethtool_ops(netdev); netif_set_affinity_auto(netdev); SET_NETDEV_DEV(netdev, &adapter->pdev->dev); @@ -1132,11 +1115,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, num_max_q = max(max_q->max_txq, max_q->max_rxq); vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL); - if (!vport->q_vector_idxs) { - kfree(vport); + if (!vport->q_vector_idxs) + goto free_vport; - return NULL; - } idpf_vport_init(vport, max_q); /* This alloc is done separate from the LUT because it's not strictly @@ -1146,11 +1127,9 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, */ rss_data = &adapter->vport_config[idx]->user_config.rss_data; rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL); - if (!rss_data->rss_key) { - kfree(vport); + if (!rss_data->rss_key) + goto free_vector_idxs; - return NULL; - } /* Initialize default rss key */ netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size); @@ -1163,6 +1142,13 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, adapter->next_vport = idpf_get_free_slot(adapter); return vport; + +free_vector_idxs: + kfree(vport->q_vector_idxs); +free_vport: + kfree(vport); + + return NULL; } /** @@ -1830,11 +1816,19 @@ void idpf_vc_event_task(struct work_struct *work) if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) return; - if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) || - test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { - set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); - idpf_init_hard_reset(adapter); - } + if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags)) + goto func_reset; + + if (test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) + goto drv_load; + + return; + +func_reset: + idpf_vc_xn_shutdown(adapter->vcxn_mngr); +drv_load: + set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + idpf_init_hard_reset(adapter); } /** @@ -2219,8 +2213,8 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); - struct idpf_adapter *adapter = vport->adapter; + struct idpf_netdev_priv *np = netdev_priv(netdev); + u16 max_tx_hdr_size = np->max_tx_hdr_size; size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -2243,7 +2237,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, goto unsupported; len = skb_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; if (!skb->encapsulation) @@ -2256,7 +2250,7 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, /* IPLEN can support at most 127 dwords */ len = skb_inner_network_header_len(skb); - if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + if (unlikely(len > max_tx_hdr_size)) goto unsupported; /* No need to validate L4LEN as TCP is the only protocol with a diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index bec4a02c53733e..b35713036a54ab 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -89,6 +89,7 @@ static void idpf_shutdown(struct pci_dev *pdev) { struct idpf_adapter *adapter = pci_get_drvdata(pdev); + cancel_delayed_work_sync(&adapter->serv_task); cancel_delayed_work_sync(&adapter->vc_event_task); idpf_vc_core_deinit(adapter); idpf_deinit_dflt_mbx(adapter); diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index eae1b6f474e624..6ade54e213259c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -362,17 +362,18 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; + int csum, tso, needed; unsigned int count; __be16 protocol; - int csum, tso; count = idpf_tx_desc_count_required(tx_q, skb); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); - if (idpf_tx_maybe_stop_common(tx_q, - count + IDPF_TX_DESCS_PER_CACHE_LINE + - IDPF_TX_DESCS_FOR_CTX)) { + needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX; + if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + IDPF_DESC_UNUSED(tx_q), + needed, needed)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); u64_stats_update_begin(&tx_q->stats_sync); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index bdf52cef3891b8..aa16e4c1edbb8b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -2132,6 +2132,19 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } +/* Global conditions to tell whether the txq (and related resources) + * has room to allow the use of "size" descriptors. + */ +static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) +{ + if (IDPF_DESC_UNUSED(tx_q) < size || + IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > + IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || + IDPF_TX_BUF_RSV_LOW(tx_q)) + return 0; + return 1; +} + /** * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions * @tx_q: the queue to be checked @@ -2142,29 +2155,11 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, unsigned int descs_needed) { - if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) - goto out; - - /* If there are too many outstanding completions expected on the - * completion queue, stop the TX queue to give the device some time to - * catch up - */ - if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > - IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq))) - goto splitq_stop; - - /* Also check for available book keeping buffers; if we are low, stop - * the queue to wait for more completions - */ - if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q))) - goto splitq_stop; - - return 0; - -splitq_stop: - netif_stop_subqueue(tx_q->netdev, tx_q->idx); + if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, + idpf_txq_has_room(tx_q, descs_needed), + 1, 1)) + return 0; -out: u64_stats_update_begin(&tx_q->stats_sync); u64_stats_inc(&tx_q->q_stats.q_busy); u64_stats_update_end(&tx_q->stats_sync); @@ -2190,12 +2185,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); tx_q->next_to_use = val; - if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) { - u64_stats_update_begin(&tx_q->stats_sync); - u64_stats_inc(&tx_q->q_stats.q_busy); - u64_stats_update_end(&tx_q->stats_sync); - } - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -4025,6 +4014,14 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that. + */ + if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, + q_vector->tx[0]))) + return budget; + work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might @@ -4035,15 +4032,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) else idpf_vport_intr_set_wb_on_itr(q_vector); - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that - */ - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, - q_vector->tx[0]))) - return budget; - else - return work_done; + return work_done; } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index b029f566e57cd6..c192a6c547dd32 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1037,12 +1037,4 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, u16 cleaned_count); int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); -static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, - u32 needed) -{ - return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, - IDPF_DESC_UNUSED(tx_q), - needed, needed); -} - #endif /* !_IDPF_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 3d2413b8684fca..5d2ca007f6828e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -376,7 +376,7 @@ static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr) * All waiting threads will be woken-up and their transaction aborted. Further * operations on that object will fail. */ -static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) +void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) { int i; diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h index 83da5d8da56bf2..23271cf0a21605 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -66,5 +66,6 @@ int idpf_send_get_stats_msg(struct idpf_vport *vport); int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs); int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get); int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get); +void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr); #endif /* _IDPF_VIRTCHNL_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index c35cc5cb118569..2f265c0959c7a0 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -319,6 +319,7 @@ struct igc_adapter { struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ ktime_t ptp_reset_start; /* Reset time in clock mono */ struct system_time_snapshot snapshot; + struct mutex ptm_lock; /* Only allow one PTM transaction at a time */ char fw_version[32]; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 8e449904aa7dbd..d19325b0e6e0ba 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -574,7 +574,10 @@ #define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */ +/* A short cycle time of 1us theoretically should work, but appears to be too + * short in practice. + */ +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */ #define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */ #define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */ @@ -593,6 +596,7 @@ #define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */ #define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */ #define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */ +#define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */ /* PCIe PTM Cycle Control */ #define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f1330379e6bbc5..b1669d7cf43591 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7231,6 +7231,7 @@ static int igc_probe(struct pci_dev *pdev, err_register: igc_release_hw_control(adapter); + igc_ptp_stop(adapter); err_eeprom: if (!igc_check_reset_block(hw)) igc_reset_phy(hw); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 946edbad43022c..efc7b30e421133 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -974,45 +974,62 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat) } } +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */ +static void igc_ptm_trigger(struct igc_hw *hw) +{ + u32 ctrl; + + /* To "manually" start the PTM cycle we need to set the + * trigger (TRIG) bit + */ + ctrl = rd32(IGC_PTM_CTRL); + ctrl |= IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Perform flush after write to CTRL register otherwise + * transaction may not start + */ + wrfl(); +} + +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */ +static void igc_ptm_reset(struct igc_hw *hw) +{ + u32 ctrl; + + ctrl = rd32(IGC_PTM_CTRL); + ctrl &= ~IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Write to clear all status */ + wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL); +} + static int igc_phc_get_syncdevicetime(ktime_t *device, struct system_counterval_t *system, void *ctx) { - u32 stat, t2_curr_h, t2_curr_l, ctrl; struct igc_adapter *adapter = ctx; struct igc_hw *hw = &adapter->hw; + u32 stat, t2_curr_h, t2_curr_l; int err, count = 100; ktime_t t1, t2_curr; - /* Get a snapshot of system clocks to use as historic value. */ - ktime_get_snapshot(&adapter->snapshot); - + /* Doing this in a loop because in the event of a + * badly timed (ha!) system clock adjustment, we may + * get PTM errors from the PCI root, but these errors + * are transitory. Repeating the process returns valid + * data eventually. + */ do { - /* Doing this in a loop because in the event of a - * badly timed (ha!) system clock adjustment, we may - * get PTM errors from the PCI root, but these errors - * are transitory. Repeating the process returns valid - * data eventually. - */ + /* Get a snapshot of system clocks to use as historic value. */ + ktime_get_snapshot(&adapter->snapshot); - /* To "manually" start the PTM cycle we need to clear and - * then set again the TRIG bit. - */ - ctrl = rd32(IGC_PTM_CTRL); - ctrl &= ~IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - ctrl |= IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - - /* The cycle only starts "for real" when software notifies - * that it has read the registers, this is done by setting - * VALID bit. - */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, stat, IGC_PTM_STAT_SLEEP, IGC_PTM_STAT_TIMEOUT); + igc_ptm_reset(hw); + if (err < 0) { netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); return err; @@ -1021,15 +1038,7 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID) break; - if (stat & ~IGC_PTM_STAT_VALID) { - /* An error occurred, log it. */ - igc_ptm_log_error(adapter, stat); - /* The STAT register is write-1-to-clear (W1C), - * so write the previous error status to clear it. - */ - wr32(IGC_PTM_STAT, stat); - continue; - } + igc_ptm_log_error(adapter, stat); } while (--count); if (!count) { @@ -1061,9 +1070,16 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp, { struct igc_adapter *adapter = container_of(ptp, struct igc_adapter, ptp_caps); + int ret; + + /* This blocks until any in progress PTM transactions complete */ + mutex_lock(&adapter->ptm_lock); + + ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime, + adapter, &adapter->snapshot, cts); + mutex_unlock(&adapter->ptm_lock); - return get_device_system_crosststamp(igc_phc_get_syncdevicetime, - adapter, &adapter->snapshot, cts); + return ret; } static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp, @@ -1162,6 +1178,7 @@ void igc_ptp_init(struct igc_adapter *adapter) spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->free_timer_lock); spin_lock_init(&adapter->tmreg_lock); + mutex_init(&adapter->ptm_lock); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -1174,6 +1191,7 @@ void igc_ptp_init(struct igc_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; netdev_err(netdev, "ptp_clock_register failed\n"); + mutex_destroy(&adapter->ptm_lock); } else if (adapter->ptp_clock) { netdev_info(netdev, "PHC added\n"); adapter->ptp_flags |= IGC_PTP_ENABLED; @@ -1203,10 +1221,12 @@ static void igc_ptm_stop(struct igc_adapter *adapter) struct igc_hw *hw = &adapter->hw; u32 ctrl; + mutex_lock(&adapter->ptm_lock); ctrl = rd32(IGC_PTM_CTRL); ctrl &= ~IGC_PTM_CTRL_EN; wr32(IGC_PTM_CTRL, ctrl); + mutex_unlock(&adapter->ptm_lock); } /** @@ -1237,13 +1257,18 @@ void igc_ptp_suspend(struct igc_adapter *adapter) **/ void igc_ptp_stop(struct igc_adapter *adapter) { + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + igc_ptp_suspend(adapter); + adapter->ptp_flags &= ~IGC_PTP_ENABLED; if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); netdev_info(adapter->netdev, "PHC removed\n"); adapter->ptp_flags &= ~IGC_PTP_ENABLED; } + mutex_destroy(&adapter->ptm_lock); } /** @@ -1255,13 +1280,18 @@ void igc_ptp_stop(struct igc_adapter *adapter) void igc_ptp_reset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - u32 cycle_ctrl, ctrl; + u32 cycle_ctrl, ctrl, stat; unsigned long flags; u32 timadj; + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + mutex_lock(&adapter->ptm_lock); + spin_lock_irqsave(&adapter->tmreg_lock, flags); switch (adapter->hw.mac.type) { @@ -1290,14 +1320,19 @@ void igc_ptp_reset(struct igc_adapter *adapter) ctrl = IGC_PTM_CTRL_EN | IGC_PTM_CTRL_START_NOW | IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) | - IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) | - IGC_PTM_CTRL_TRIG; + IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT); wr32(IGC_PTM_CTRL, ctrl); /* Force the first cycle to run. */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); + if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat, + stat, IGC_PTM_STAT_SLEEP, + IGC_PTM_STAT_TIMEOUT)) + netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); + + igc_ptm_reset(hw); break; default: /* No work to do. */ @@ -1314,5 +1349,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) out: spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptm_lock); + wrfl(); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 07ea1954a276ed..796e90d741f022 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -9,7 +9,7 @@ #define IXGBE_IPSEC_KEY_BITS 160 static const char aes_gcm_name[] = "rfc4106(gcm(aes))"; -static void ixgbe_ipsec_del_sa(struct xfrm_state *xs); +static void ixgbe_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs); /** * ixgbe_ipsec_set_tx_sa - set the Tx SA registers @@ -321,7 +321,7 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) if (r->used) { if (r->mode & IXGBE_RXTXMOD_VF) - ixgbe_ipsec_del_sa(r->xs); + ixgbe_ipsec_del_sa(adapter->netdev, r->xs); else ixgbe_ipsec_set_rx_sa(hw, i, r->xs->id.spi, r->key, r->salt, @@ -330,7 +330,7 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) if (t->used) { if (t->mode & IXGBE_RXTXMOD_VF) - ixgbe_ipsec_del_sa(t->xs); + ixgbe_ipsec_del_sa(adapter->netdev, t->xs); else ixgbe_ipsec_set_tx_sa(hw, i, t->key, t->salt); } @@ -417,6 +417,7 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec, /** * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol + * @dev: pointer to net device * @xs: pointer to xfrm_state struct * @mykey: pointer to key array to populate * @mysalt: pointer to salt value to populate @@ -424,10 +425,10 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec, * This copies the protocol keys and salt to our own data tables. The * 82599 family only supports the one algorithm. **/ -static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, +static int ixgbe_ipsec_parse_proto_keys(struct net_device *dev, + struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { - struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -473,11 +474,12 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs, /** * ixgbe_ipsec_check_mgmt_ip - make sure there is no clash with mgmt IP filters + * @dev: pointer to net device * @xs: pointer to transformer state struct **/ -static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) +static int ixgbe_ipsec_check_mgmt_ip(struct net_device *dev, + struct xfrm_state *xs) { - struct net_device *dev = xs->xso.real_dev; struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; u32 mfval, manc, reg; @@ -556,13 +558,14 @@ static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs) /** * ixgbe_ipsec_add_sa - program device with a security association + * @dev: pointer to device to program * @xs: pointer to transformer state struct * @extack: extack point to fill failure reason **/ -static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, +static int ixgbe_ipsec_add_sa(struct net_device *dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { - struct net_device *dev = xs->xso.real_dev; struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct ixgbe_hw *hw = &adapter->hw; @@ -581,7 +584,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, return -EINVAL; } - if (ixgbe_ipsec_check_mgmt_ip(xs)) { + if (ixgbe_ipsec_check_mgmt_ip(dev, xs)) { NL_SET_ERR_MSG_MOD(extack, "IPsec IP addr clash with mgmt filters"); return -EINVAL; } @@ -615,7 +618,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, rsa.decrypt = xs->ealg || xs->aead; /* get the key and salt */ - ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt); + ret = ixgbe_ipsec_parse_proto_keys(dev, xs, rsa.key, &rsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Rx SA table"); return ret; @@ -724,7 +727,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, if (xs->id.proto & IPPROTO_ESP) tsa.encrypt = xs->ealg || xs->aead; - ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt); + ret = ixgbe_ipsec_parse_proto_keys(dev, xs, tsa.key, &tsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Tx SA table"); memset(&tsa, 0, sizeof(tsa)); @@ -752,11 +755,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs, /** * ixgbe_ipsec_del_sa - clear out this specific SA + * @dev: pointer to device to program * @xs: pointer to transformer state struct **/ -static void ixgbe_ipsec_del_sa(struct xfrm_state *xs) +static void ixgbe_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs) { - struct net_device *dev = xs->xso.real_dev; struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ipsec *ipsec = adapter->ipsec; struct ixgbe_hw *hw = &adapter->hw; @@ -841,7 +844,8 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf) continue; if (ipsec->rx_tbl[i].mode & IXGBE_RXTXMOD_VF && ipsec->rx_tbl[i].vf == vf) - ixgbe_ipsec_del_sa(ipsec->rx_tbl[i].xs); + ixgbe_ipsec_del_sa(adapter->netdev, + ipsec->rx_tbl[i].xs); } /* search tx sa table */ @@ -850,7 +854,8 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf) continue; if (ipsec->tx_tbl[i].mode & IXGBE_RXTXMOD_VF && ipsec->tx_tbl[i].vf == vf) - ixgbe_ipsec_del_sa(ipsec->tx_tbl[i].xs); + ixgbe_ipsec_del_sa(adapter->netdev, + ipsec->tx_tbl[i].xs); } } @@ -930,7 +935,7 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) memcpy(xs->aead->alg_name, aes_gcm_name, sizeof(aes_gcm_name)); /* set up the HW offload */ - err = ixgbe_ipsec_add_sa(xs, NULL); + err = ixgbe_ipsec_add_sa(adapter->netdev, xs, NULL); if (err) goto err_aead; @@ -1034,7 +1039,7 @@ int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) xs = ipsec->tx_tbl[sa_idx].xs; } - ixgbe_ipsec_del_sa(xs); + ixgbe_ipsec_del_sa(adapter->netdev, xs); /* remove the xs that was made-up in the add request */ kfree_sensitive(xs); diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 8ba037e3d9c270..65580b9cb06f21 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -201,6 +201,7 @@ struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec, /** * ixgbevf_ipsec_parse_proto_keys - find the key and salt based on the protocol + * @dev: pointer to net device to program * @xs: pointer to xfrm_state struct * @mykey: pointer to key array to populate * @mysalt: pointer to salt value to populate @@ -208,10 +209,10 @@ struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec, * This copies the protocol keys and salt to our own data tables. The * 82599 family only supports the one algorithm. **/ -static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs, +static int ixgbevf_ipsec_parse_proto_keys(struct net_device *dev, + struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { - struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -256,13 +257,14 @@ static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs, /** * ixgbevf_ipsec_add_sa - program device with a security association + * @dev: pointer to net device to program * @xs: pointer to transformer state struct * @extack: extack point to fill failure reason **/ -static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, +static int ixgbevf_ipsec_add_sa(struct net_device *dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { - struct net_device *dev = xs->xso.real_dev; struct ixgbevf_adapter *adapter; struct ixgbevf_ipsec *ipsec; u16 sa_idx; @@ -310,7 +312,8 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, rsa.decrypt = xs->ealg || xs->aead; /* get the key and salt */ - ret = ixgbevf_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt); + ret = ixgbevf_ipsec_parse_proto_keys(dev, xs, rsa.key, + &rsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Rx SA table"); return ret; @@ -363,7 +366,8 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, if (xs->id.proto & IPPROTO_ESP) tsa.encrypt = xs->ealg || xs->aead; - ret = ixgbevf_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt); + ret = ixgbevf_ipsec_parse_proto_keys(dev, xs, tsa.key, + &tsa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Tx SA table"); memset(&tsa, 0, sizeof(tsa)); @@ -388,11 +392,12 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs, /** * ixgbevf_ipsec_del_sa - clear out this specific SA + * @dev: pointer to net device to program * @xs: pointer to transformer state struct **/ -static void ixgbevf_ipsec_del_sa(struct xfrm_state *xs) +static void ixgbevf_ipsec_del_sa(struct net_device *dev, + struct xfrm_state *xs) { - struct net_device *dev = xs->xso.real_dev; struct ixgbevf_adapter *adapter; struct ixgbevf_ipsec *ipsec; u16 sa_idx; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 0a679e95196fed..24499bb36c0057 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1223,7 +1223,7 @@ static void octep_hb_timeout_task(struct work_struct *work) miss_cnt); rtnl_lock(); if (netif_running(oct->netdev)) - octep_stop(oct->netdev); + dev_close(oct->netdev); rtnl_unlock(); } diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c index 18c922dd5fc64d..ccb69bc5c95292 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -835,7 +835,9 @@ static void octep_vf_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct octep_vf_device *oct = netdev_priv(netdev); netdev_hold(netdev, NULL, GFP_ATOMIC); - schedule_work(&oct->tx_timeout_task); + if (!schedule_work(&oct->tx_timeout_task)) + netdev_put(netdev, NULL); + } static int octep_vf_set_mac(struct net_device *netdev, void *p) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 0b27a695008bdb..971993586fb49d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -717,6 +717,11 @@ int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat) if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; + + /* pass lmac as 0 for CGX_CMR_RX_STAT9-12 */ + if (idx >= CGX_RX_STAT_GLOBAL_INDEX) + lmac_id = 0; + *rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8)); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index 655dd4726d36ef..0277d226293e9c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -143,6 +143,8 @@ static int mcs_notify_pfvf(struct mcs_intr_event *event, struct rvu *rvu) otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pf); + otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pf); + mutex_unlock(&rvu->mbox_lock); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 992fa0b82e8d2d..ebb56eb0d18cfd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -272,6 +272,8 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pfid); + otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pfid); + mutex_unlock(&rvu->mbox_lock); } while (pfmap); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c index 7fa98aeb3663c0..4a3370a40dd887 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c @@ -13,19 +13,26 @@ /* RVU LMTST */ #define LMT_TBL_OP_READ 0 #define LMT_TBL_OP_WRITE 1 -#define LMT_MAP_TABLE_SIZE (128 * 1024) #define LMT_MAPTBL_ENTRY_SIZE 16 +#define LMT_MAX_VFS 256 + +#define LMT_MAP_ENTRY_ENA BIT_ULL(20) +#define LMT_MAP_ENTRY_LINES GENMASK_ULL(18, 16) /* Function to perform operations (read/write) on lmtst map table */ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, int lmt_tbl_op) { void __iomem *lmt_map_base; - u64 tbl_base; + u64 tbl_base, cfg; + int pfs, vfs; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + cfg = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + vfs = 1 << (cfg & 0xF); + pfs = 1 << ((cfg >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, LMT_MAP_TABLE_SIZE); + lmt_map_base = ioremap_wc(tbl_base, pfs * vfs * LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); return -ENOMEM; @@ -35,6 +42,13 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, *val = readq(lmt_map_base + index); } else { writeq((*val), (lmt_map_base + index)); + + cfg = FIELD_PREP(LMT_MAP_ENTRY_ENA, 0x1); + /* 2048 LMTLINES */ + cfg |= FIELD_PREP(LMT_MAP_ENTRY_LINES, 0x6); + + writeq(cfg, (lmt_map_base + (index + 8))); + /* Flushing the AP interceptor cache to make APR_LMT_MAP_ENTRY_S * changes effective. Write 1 for flush and read is being used as a * barrier and sets up a data dependency. Write to 0 after a write @@ -52,7 +66,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val, #define LMT_MAP_TBL_W1_OFF 8 static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc) { - return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) + + return ((rvu_get_pf(pcifunc) * LMT_MAX_VFS) + (pcifunc & RVU_PFVF_FUNC_MASK)) * LMT_MAPTBL_ENTRY_SIZE; } @@ -69,7 +83,7 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc, mutex_lock(&rvu->rsrc_lock); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_ADDR_REQ, iova); - pf = rvu_get_pf(pcifunc) & 0x1F; + pf = rvu_get_pf(pcifunc) & RVU_PFVF_PF_MASK; val = BIT_ULL(63) | BIT_ULL(14) | BIT_ULL(13) | pf << 8 | ((pcifunc & RVU_PFVF_FUNC_MASK) & 0xFF); rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TXN_REQ, val); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1f9ec03c2ce69..c827da62647126 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -553,6 +553,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, u64 lmt_addr, val, tbl_base; int pf, vf, num_vfs, hw_vfs; void __iomem *lmt_map_base; + int apr_pfs, apr_vfs; int buf_size = 10240; size_t off = 0; int index = 0; @@ -568,8 +569,12 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, return -ENOMEM; tbl_base = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_MAP_BASE); + val = rvu_read64(rvu, BLKADDR_APR, APR_AF_LMT_CFG); + apr_vfs = 1 << (val & 0xF); + apr_pfs = 1 << ((val >> 4) & 0x7); - lmt_map_base = ioremap_wc(tbl_base, 128 * 1024); + lmt_map_base = ioremap_wc(tbl_base, apr_pfs * apr_vfs * + LMT_MAPTBL_ENTRY_SIZE); if (!lmt_map_base) { dev_err(rvu->dev, "Failed to setup lmt map table mapping!!\n"); kfree(buf); @@ -591,7 +596,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d \t\t\t", pf); - index = pf * rvu->hw->total_vfs * LMT_MAPTBL_ENTRY_SIZE; + index = pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE; off += scnprintf(&buf[off], buf_size - 1 - off, " 0x%llx\t\t", (tbl_base + index)); lmt_addr = readq(lmt_map_base + index); @@ -604,7 +609,7 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp, /* Reading num of VFs per PF */ rvu_get_pf_numvfs(rvu, pf, &num_vfs, &hw_vfs); for (vf = 0; vf < num_vfs; vf++) { - index = (pf * rvu->hw->total_vfs * 16) + + index = (pf * apr_vfs * LMT_MAPTBL_ENTRY_SIZE) + ((vf + 1) * LMT_MAPTBL_ENTRY_SIZE); off += scnprintf(&buf[off], buf_size - 1 - off, "PF%d:VF%d \t\t", pf, vf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c index 052ae5923e3a85..32953cca108c80 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c @@ -60,6 +60,8 @@ static int rvu_rep_up_notify(struct rvu *rvu, struct rep_event *event) otx2_mbox_msg_send_up(&rvu->afpf_wq_info.mbox_up, pf); + otx2_mbox_wait_for_rsp(&rvu->afpf_wq_info.mbox_up, pf); + mutex_unlock(&rvu->mbox_lock); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index fc59e50bafce66..a6500e3673f248 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -663,10 +663,10 @@ static int cn10k_ipsec_inb_add_state(struct xfrm_state *x, return -EOPNOTSUPP; } -static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, +static int cn10k_ipsec_outb_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xso.dev; struct cn10k_tx_sa_s *sa_entry; struct qmem *sa_info; struct otx2_nic *pf; @@ -676,7 +676,7 @@ static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, if (err) return err; - pf = netdev_priv(netdev); + pf = netdev_priv(dev); err = qmem_alloc(pf->dev, &sa_info, pf->ipsec.sa_size, OTX2_ALIGN); if (err) @@ -700,18 +700,18 @@ static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, return 0; } -static int cn10k_ipsec_add_state(struct xfrm_state *x, +static int cn10k_ipsec_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) return cn10k_ipsec_inb_add_state(x, extack); else - return cn10k_ipsec_outb_add_state(x, extack); + return cn10k_ipsec_outb_add_state(dev, x, extack); } -static void cn10k_ipsec_del_state(struct xfrm_state *x) +static void cn10k_ipsec_del_state(struct net_device *dev, struct xfrm_state *x) { - struct net_device *netdev = x->xso.dev; struct cn10k_tx_sa_s *sa_entry; struct qmem *sa_info; struct otx2_nic *pf; @@ -720,7 +720,7 @@ static void cn10k_ipsec_del_state(struct xfrm_state *x) if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) return; - pf = netdev_priv(netdev); + pf = netdev_priv(dev); sa_info = (struct qmem *)x->xso.offload_handle; sa_entry = (struct cn10k_tx_sa_s *)sa_info->base; @@ -732,7 +732,7 @@ static void cn10k_ipsec_del_state(struct xfrm_state *x) err = cn10k_outb_write_sa(pf, sa_info); if (err) - netdev_err(netdev, "Error (%d) deleting SA\n", err); + netdev_err(dev, "Error (%d) deleting SA\n", err); x->xso.offload_handle = 0; qmem_free(pf->dev, sa_info); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index f3b9daffaec3c2..4c7e0f345cb5ba 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -531,7 +531,8 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, if (sw_tx_sc->encrypt) sectag_tci |= (MCS_TCI_E | MCS_TCI_C); - policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU, secy->netdev->mtu); + policy = FIELD_PREP(MCS_TX_SECY_PLCY_MTU, + pfvf->netdev->mtu + OTX2_ETH_HLEN); /* Write SecTag excluding AN bits(1..0) */ policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_TCI, sectag_tci >> 2); policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_OFFSET, tag_offset); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 1e88422825be7a..d6b4b74e4002b1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -356,6 +356,7 @@ struct otx2_flow_config { struct list_head flow_list_tc; u8 ucast_flt_cnt; bool ntuple; + u16 ntuple_cnt; }; struct dev_hw_ops { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 33ec9a7f7c0339..e13ae5484c19cb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -41,6 +41,7 @@ static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id, if (!pfvf->flow_cfg) return 0; + pfvf->flow_cfg->ntuple_cnt = ctx->val.vu16; otx2_alloc_mcam_entries(pfvf, ctx->val.vu16); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 010385b2998869..45b8c9230184d8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -315,7 +315,7 @@ static void otx2_get_pauseparam(struct net_device *netdev, struct otx2_nic *pfvf = netdev_priv(netdev); struct cgx_pause_frm_cfg *req, *rsp; - if (is_otx2_lbkvf(pfvf->pdev)) + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return; mutex_lock(&pfvf->mbox.lock); @@ -347,7 +347,7 @@ static int otx2_set_pauseparam(struct net_device *netdev, if (pause->autoneg) return -EOPNOTSUPP; - if (is_otx2_lbkvf(pfvf->pdev)) + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return -EOPNOTSUPP; if (pause->rx_pause) @@ -941,8 +941,8 @@ static u32 otx2_get_link(struct net_device *netdev) { struct otx2_nic *pfvf = netdev_priv(netdev); - /* LBK link is internal and always UP */ - if (is_otx2_lbkvf(pfvf->pdev)) + /* LBK and SDP links are internal and always UP */ + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) return 1; return pfvf->linfo.link_up; } @@ -1413,7 +1413,7 @@ static int otx2vf_get_link_ksettings(struct net_device *netdev, { struct otx2_nic *pfvf = netdev_priv(netdev); - if (is_otx2_lbkvf(pfvf->pdev)) { + if (is_otx2_lbkvf(pfvf->pdev) || is_otx2_sdp_rep(pfvf->pdev)) { cmd->base.duplex = DUPLEX_FULL; cmd->base.speed = SPEED_100000; } else { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 47bfd1fb37d4bc..64c6d9162ef644 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -247,7 +247,7 @@ int otx2_mcam_entry_init(struct otx2_nic *pfvf) mutex_unlock(&pfvf->mbox.lock); /* Allocate entries for Ntuple filters */ - count = otx2_alloc_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT); + count = otx2_alloc_mcam_entries(pfvf, flow_cfg->ntuple_cnt); if (count <= 0) { otx2_clear_ntuple_flow_info(pfvf, flow_cfg); return 0; @@ -307,6 +307,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf) INIT_LIST_HEAD(&pf->flow_cfg->flow_list_tc); pf->flow_cfg->ucast_flt_cnt = OTX2_DEFAULT_UNICAST_FLOWS; + pf->flow_cfg->ntuple_cnt = OTX2_DEFAULT_FLOWCOUNT; /* Allocate bare minimum number of MCAM entries needed for * unicast and ntuple filters. diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 7ef3ba477d4964..9b28be4c4a5d6c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -729,9 +729,12 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } #ifdef CONFIG_DCB - err = otx2_dcbnl_set_ops(netdev); - if (err) - goto err_free_zc_bmap; + /* Priority flow control is not supported for LBK and SDP vf(s) */ + if (!(is_otx2_lbkvf(vf->pdev) || is_otx2_sdp_rep(vf->pdev))) { + err = otx2_dcbnl_set_ops(netdev); + if (err) + goto err_free_zc_bmap; + } #endif otx2_qos_init(vf, qos_txqs); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c index 0f844c14485a0e..5765bac119f0e7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -165,6 +165,11 @@ static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); } else if (level == NIX_TXSCH_LVL_TL2) { + /* configure parent txschq */ + cfg->reg[num_regs] = NIX_AF_TL2X_PARENT(node->schq); + cfg->regval[num_regs] = (u64)hw->tx_link << 16; + num_regs++; + /* configure link cfg */ if (level == pfvf->qos.link_cfg_lvl) { cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link); @@ -1633,6 +1638,7 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force if (!node->is_static) dwrr_del_node = true; + WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER); /* destroy the leaf node */ otx2_qos_disable_sq(pfvf, qid); otx2_qos_destroy_node(pfvf, node); @@ -1677,9 +1683,6 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force } kfree(new_cfg); - /* update tx_real_queues */ - otx2_qos_update_tx_netdev_queues(pfvf); - return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c index c5dbae0e513b64..58d572ce08eff5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -256,6 +256,26 @@ int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx) return err; } +static int otx2_qos_nix_npa_ndc_sync(struct otx2_nic *pfvf) +{ + struct ndc_sync_op *req; + int rc; + + mutex_lock(&pfvf->mbox.lock); + + req = otx2_mbox_alloc_msg_ndc_sync_op(&pfvf->mbox); + if (!req) { + mutex_unlock(&pfvf->mbox.lock); + return -ENOMEM; + } + + req->nix_lf_tx_sync = true; + req->npa_lf_sync = true; + rc = otx2_sync_mbox_msg(&pfvf->mbox); + mutex_unlock(&pfvf->mbox.lock); + return rc; +} + void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx) { struct otx2_qset *qset = &pfvf->qset; @@ -285,6 +305,8 @@ void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx) otx2_qos_sqb_flush(pfvf, sq_idx); otx2_smq_flush(pfvf, otx2_get_smq_idx(pfvf, sq_idx)); + /* NIX/NPA NDC sync */ + otx2_qos_nix_npa_ndc_sync(pfvf); otx2_cleanup_tx_cqes(pfvf, cq); mutex_lock(&pfvf->mbox.lock); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 04e08e06f30ff2..7153a71dfc860e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -67,6 +67,8 @@ static int rvu_rep_mcam_flow_init(struct rep_dev *rep) rsp = (struct npc_mcam_alloc_entry_rsp *)otx2_mbox_get_rsp (&priv->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp)) + goto exit; for (ent = 0; ent < rsp->count; ent++) rep->flow_cfg->flow_ent[ent + allocated] = rsp->entry_list[ent]; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 43197b28b3e745..6c92072b4c2808 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -269,12 +269,8 @@ static const char * const mtk_clks_source_name[] = { "ethwarp_wocpu2", "ethwarp_wocpu1", "ethwarp_wocpu0", - "top_usxgmii0_sel", - "top_usxgmii1_sel", "top_sgm0_sel", "top_sgm1_sel", - "top_xfi_phy0_xtal_sel", - "top_xfi_phy1_xtal_sel", "top_eth_gmii_sel", "top_eth_refck_50m_sel", "top_eth_sys_200m_sel", @@ -734,7 +730,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: @@ -757,13 +753,13 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: val |= MTK_QTX_SCH_MAX_RATE_EN | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 10) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 6) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10); break; default: @@ -871,9 +867,25 @@ static const struct phylink_mac_ops mtk_phylink_ops = { .mac_enable_tx_lpi = mtk_mac_enable_tx_lpi, }; +static void mtk_mdio_config(struct mtk_eth *eth) +{ + u32 val; + + /* Configure MDC Divider */ + val = FIELD_PREP(PPSC_MDC_CFG, eth->mdc_divider); + + /* Configure MDC Turbo Mode */ + if (mtk_is_netsys_v3_or_greater(eth)) + mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); + else + val |= PPSC_MDC_TURBO; + + mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); +} + static int mtk_mdio_init(struct mtk_eth *eth) { - unsigned int max_clk = 2500000, divider; + unsigned int max_clk = 2500000; struct device_node *mii_np; int ret; u32 val; @@ -908,20 +920,9 @@ static int mtk_mdio_init(struct mtk_eth *eth) } max_clk = val; } - divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); - - /* Configure MDC Turbo Mode */ - if (mtk_is_netsys_v3_or_greater(eth)) - mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); - - /* Configure MDC Divider */ - val = FIELD_PREP(PPSC_MDC_CFG, divider); - if (!mtk_is_netsys_v3_or_greater(eth)) - val |= PPSC_MDC_TURBO; - mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); - - dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / divider); - + eth->mdc_divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); + mtk_mdio_config(eth); + dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / eth->mdc_divider); ret = of_mdiobus_register(eth->mii_bus, mii_np); err_put_node: @@ -2247,14 +2248,18 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, ring->data[idx] = new_data; rxd->rxd1 = (unsigned int)dma_addr; release_desc: + if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { + if (unlikely(dma_addr == DMA_MAPPING_ERROR)) + addr64 = FIELD_GET(RX_DMA_ADDR64_MASK, + rxd->rxd2); + else + addr64 = RX_DMA_PREP_ADDR64(dma_addr); + } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) rxd->rxd2 = RX_DMA_LSO; else - rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size); - - if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA) && - likely(dma_addr != DMA_MAPPING_ERROR)) - rxd->rxd2 |= RX_DMA_PREP_ADDR64(dma_addr); + rxd->rxd2 = RX_DMA_PREP_PLEN0(ring->buf_size) | addr64; ring->calc_idx = idx; done++; @@ -3181,11 +3186,19 @@ static int mtk_dma_init(struct mtk_eth *eth) static void mtk_dma_free(struct mtk_eth *eth) { const struct mtk_soc_data *soc = eth->soc; - int i; + int i, j, txqs = 1; + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + txqs = MTK_QDMA_NUM_QUEUES; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + for (j = 0; j < txqs; j++) + netdev_tx_reset_subqueue(eth->netdev[i], j); + } - for (i = 0; i < MTK_MAX_DEVS; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); if (!MTK_HAS_CAPS(soc->caps, MTK_SRAM) && eth->scratch_ring) { dma_free_coherent(eth->dma_dev, MTK_QDMA_RING_SIZE * soc->tx.desc_size, @@ -3315,7 +3328,7 @@ static int mtk_start_dma(struct mtk_eth *eth) if (mtk_is_netsys_v2_or_greater(eth)) val |= MTK_MUTLI_CNT | MTK_RESV_BUF | MTK_WCOMP_EN | MTK_DMAD_WR_WDONE | - MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN; + MTK_CHK_DDONE_EN; else val |= MTK_RX_BT_32DWORDS; mtk_w32(eth, val, reg_map->qdma.glo_cfg); @@ -3460,9 +3473,6 @@ static int mtk_open(struct net_device *dev) } mtk_gdm_config(eth, target_mac->id, gdm_config); } - /* Reset and enable PSE */ - mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); - mtk_w32(eth, 0, MTK_RST_GL); napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); @@ -3974,6 +3984,10 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) else mtk_hw_reset(eth); + /* No MT7628/88 support yet */ + if (reset && !MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) + mtk_mdio_config(eth); + if (mtk_is_netsys_v3_or_greater(eth)) { /* Set FE to PDMAv2 if necessary */ val = mtk_r32(eth, MTK_FE_GLO_MISC); @@ -4034,11 +4048,27 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); if (mtk_is_netsys_v3_or_greater(eth)) { - /* PSE should not drop port1, port8 and port9 packets */ - mtk_w32(eth, 0x00000302, PSE_DROP_CFG); + /* PSE dummy page mechanism */ + mtk_w32(eth, PSE_DUMMY_WORK_GDM(1) | PSE_DUMMY_WORK_GDM(2) | + PSE_DUMMY_WORK_GDM(3) | DUMMY_PAGE_THR, PSE_DUMY_REQ); + + /* PSE free buffer drop threshold */ + mtk_w32(eth, 0x00600009, PSE_IQ_REV(8)); + + /* PSE should not drop port8, port9 and port13 packets from + * WDMA Tx + */ + mtk_w32(eth, 0x00002300, PSE_DROP_CFG); + + /* PSE should drop packets to port8, port9 and port13 on WDMA Rx + * ring full + */ + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(0)); + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(1)); + mtk_w32(eth, 0x00002300, PSE_PPE_DROP(2)); /* GDM and CDM Threshold */ - mtk_w32(eth, 0x00000707, MTK_CDMW0_THRES); + mtk_w32(eth, 0x08000707, MTK_CDMW0_THRES); mtk_w32(eth, 0x00000077, MTK_CDMW1_THRES); /* Disable GDM1 RX CRC stripping */ @@ -4055,7 +4085,7 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) mtk_w32(eth, 0x00000300, PSE_DROP_CFG); /* PSE should drop packets to port 8/9 on WDMA Rx ring full */ - mtk_w32(eth, 0x00000300, PSE_PPE0_DROP); + mtk_w32(eth, 0x00000300, PSE_PPE_DROP(0)); /* PSE Free Queue Flow Control */ mtk_w32(eth, 0x01fa01f4, PSE_FQFC_CFG2); @@ -4718,7 +4748,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) } if (mtk_is_netsys_v3_or_greater(mac->hw) && - MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW_BIT) && + MTK_HAS_CAPS(mac->hw->soc->caps, MTK_ESW) && id == MTK_GMAC1_ID) { mac->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 90a377ab4359ea..88ef2e9c50fc19 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -151,7 +151,15 @@ #define PSE_FQFC_CFG1 0x100 #define PSE_FQFC_CFG2 0x104 #define PSE_DROP_CFG 0x108 -#define PSE_PPE0_DROP 0x110 +#define PSE_PPE_DROP(x) (0x110 + ((x) * 0x4)) + +/* PSE Last FreeQ Page Request Control */ +#define PSE_DUMY_REQ 0x10C +/* PSE_DUMY_REQ is not a typo but actually called like that also in + * MediaTek's datasheet + */ +#define PSE_DUMMY_WORK_GDM(x) BIT(16 + (x)) +#define DUMMY_PAGE_THR 0x1 /* PSE Input Queue Reservation Register*/ #define PSE_IQ_REV(x) (0x140 + (((x) - 1) << 2)) @@ -1271,6 +1279,7 @@ struct mtk_eth { struct clk *clks[MTK_CLK_MAX]; struct mii_bus *mii_bus; + unsigned int mdc_divider; struct work_struct pending_work; unsigned long state; diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 76f202d7f05537..b83886a4112105 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1163,6 +1163,7 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) struct net_device *ndev = priv->ndev; unsigned int head = ring->head; unsigned int entry = ring->tail; + unsigned long flags; while (entry != head && count < (MTK_STAR_RING_NUM_DESCS - 1)) { ret = mtk_star_tx_complete_one(priv); @@ -1182,9 +1183,9 @@ static int mtk_star_tx_poll(struct napi_struct *napi, int budget) netif_wake_queue(ndev); if (napi_complete(napi)) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, false, true); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return 0; @@ -1341,16 +1342,16 @@ static int mtk_star_rx(struct mtk_star_priv *priv, int budget) static int mtk_star_rx_poll(struct napi_struct *napi, int budget) { struct mtk_star_priv *priv; + unsigned long flags; int work_done = 0; priv = container_of(napi, struct mtk_star_priv, rx_napi); work_done = mtk_star_rx(priv, budget); - if (work_done < budget) { - napi_complete_done(napi, work_done); - spin_lock(&priv->lock); + if (work_done < budget && napi_complete_done(napi, work_done)) { + spin_lock_irqsave(&priv->lock, flags); mtk_star_enable_dma_irq(priv, true, false); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } return work_done; @@ -1462,6 +1463,8 @@ static __maybe_unused int mtk_star_suspend(struct device *dev) if (netif_running(ndev)) mtk_star_disable(ndev); + netif_device_detach(ndev); + clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks); return 0; @@ -1486,6 +1489,8 @@ static __maybe_unused int mtk_star_resume(struct device *dev) clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks); } + netif_device_attach(ndev); + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index cd754cd76bde1b..d73a2044dc2662 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -249,7 +249,7 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; - u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles = freq_khz * 1000ULL * MLX4_EN_WRAP_AROUND_SEC; u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 532c7fa94d172a..dbd9482359e1ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -176,6 +176,7 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) priv = ptpsq->txqsq.priv; + rtnl_lock(); mutex_lock(&priv->state_lock); chs = &priv->channels; netdev = priv->netdev; @@ -183,22 +184,19 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); - rtnl_lock(); mlx5e_deactivate_priv_channels(priv); - rtnl_unlock(); mlx5e_ptp_close(chs->ptp); err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); - rtnl_lock(); mlx5e_activate_priv_channels(priv); - rtnl_unlock(); /* return carrier back if needed */ if (carrier_ok) netif_carrier_on(netdev); mutex_unlock(&priv->state_lock); + rtnl_unlock(); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 5c762a71818db0..7a18a469961db8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -165,9 +165,6 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, struct flow_match_enc_keyid enc_keyid; void *misc_c, *misc_v; - misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) return 0; @@ -182,6 +179,30 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f); if (err) return err; + + /* We can't mix custom tunnel headers with symbolic ones and we + * don't have a symbolic field name for GBP, so we use custom + * tunnel headers in this case. We need hardware support to + * match on custom tunnel headers, but we already know it's + * supported because the previous call successfully checked for + * that. + */ + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_5); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_5); + + /* Shift by 8 to account for the reserved bits in the vxlan + * header after the VNI. + */ + MLX5_SET(fte_match_set_misc5, misc_c, tunnel_header_1, + be32_to_cpu(enc_keyid.mask->keyid) << 8); + MLX5_SET(fte_match_set_misc5, misc_v, tunnel_header_1, + be32_to_cpu(enc_keyid.key->keyid) << 8); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + return 0; } /* match on VNI is required */ @@ -195,6 +216,11 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index f803e1c9359006..5ce1b463b7a8dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -707,8 +707,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); page = xdpi.page.page; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as we know this is a page_pool page. + /* No need to check page_pool_page_is_pp() as we + * know this is a page_pool page. */ page_pool_recycle_direct(page->pp, page); } while (++n < num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 2dd842aac6fc47..77f61cd28a7993 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -259,8 +259,7 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); - struct xfrm_state *x = sa_entry->x; - struct net_device *netdev; + struct net_device *netdev = sa_entry->dev; struct neighbour *n; u8 addr[ETH_ALEN]; const void *pkey; @@ -270,8 +269,6 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->type != XFRM_DEV_OFFLOAD_PACKET) return; - netdev = x->xso.real_dev; - mlx5_query_mac_address(mdev, addr); switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: @@ -692,17 +689,17 @@ static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) return 0; } -static int mlx5e_xfrm_add_state(struct xfrm_state *x, +static int mlx5e_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; - struct net_device *netdev = x->xso.real_dev; struct mlx5e_ipsec *ipsec; struct mlx5e_priv *priv; gfp_t gfp; int err; - priv = netdev_priv(netdev); + priv = netdev_priv(dev); if (!priv->ipsec) return -EOPNOTSUPP; @@ -713,6 +710,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return -ENOMEM; sa_entry->x = x; + sa_entry->dev = dev; sa_entry->ipsec = ipsec; /* Check if this SA is originated from acquire flow temporary SA */ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) @@ -809,7 +807,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return err; } -static void mlx5e_xfrm_del_state(struct xfrm_state *x) +static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; @@ -822,7 +820,7 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) WARN_ON(old != sa_entry); } -static void mlx5e_xfrm_free_state(struct xfrm_state *x) +static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; @@ -855,8 +853,6 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5e_ipsec *ipsec; struct neighbour *n = ptr; - struct net_device *netdev; - struct xfrm_state *x; unsigned long idx; if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) @@ -876,11 +872,9 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, continue; } - x = sa_entry->x; - netdev = x->xso.real_dev; data = sa_entry->work->data; - neigh_ha_snapshot(data->addr, n, netdev); + neigh_ha_snapshot(data->addr, n, sa_entry->dev); queue_work(ipsec->wq, &sa_entry->work->work); } @@ -996,8 +990,8 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) size_t headers; lockdep_assert(lockdep_is_held(&x->lock) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock)); + lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) || + lockdep_is_held(&net->xfrm.xfrm_state_lock)); if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) return; @@ -1170,7 +1164,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xdo.real_dev; + struct net_device *netdev = x->xdo.dev; struct mlx5e_ipsec_pol_entry *pol_entry; struct mlx5e_priv *priv; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index a63c2289f8af92..ffcd0cdeb77544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -274,6 +274,7 @@ struct mlx5e_ipsec_limits { struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_esn_state esn_state; struct xfrm_state *x; + struct net_device *dev; struct mlx5e_ipsec *ipsec; struct mlx5_accel_esp_xfrm_attrs attrs; void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fdf9e9bb99ace6..6253ea4e99a44f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,7 +43,6 @@ #include "en/fs_ethtool.h" #define LANES_UNKNOWN 0 -#define MAX_LANES 8 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -1098,10 +1097,8 @@ static void get_link_properties(struct net_device *netdev, speed = info->speed; lanes = info->lanes; duplex = DUPLEX_FULL; - } else if (data_rate_oper) { + } else if (data_rate_oper) speed = 100 * data_rate_oper; - lanes = MAX_LANES; - } out: link_ksettings->base.duplex = duplex; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3506024c245391..9bd166f489e7ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4349,6 +4349,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + features &= ~NETIF_F_HW_MACSEC; + if (netdev->features & NETIF_F_HW_MACSEC) + netdev_warn(netdev, "Disabling HW MACsec offload, not supported in switchdev mode\n"); + return features; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9ba99609999f4f..fef418e1ed1a08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1750,9 +1750,6 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr !list_is_first(&attr->list, &flow->attrs)) return 0; - if (flow_flag_test(flow, SLOW)) - return 0; - esw_attr = attr->esw_attr; if (!esw_attr->split_count || esw_attr->split_count == esw_attr->out_count - 1) @@ -1766,7 +1763,7 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { /* external dest with encap is considered as internal by firmware */ if (esw_attr->dests[i].vport == MLX5_VPORT_UPLINK && - !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) + !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) ext_dest = true; else int_dest = true; @@ -2031,9 +2028,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return err; } -static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec) { - struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -2072,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } complete_all(&flow->del_hw_done); - if (mlx5_flow_has_geneve_opt(flow)) + if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); if (flow->decap_route) @@ -2577,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); if (err) { - kvfree(tmp_spec); NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); - return err; + } else { + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); } - err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + if (mlx5_flow_has_geneve_opt(tmp_spec)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); kvfree(tmp_spec); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7fb8a3381f849e..4917d185d0c352 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, - enabled_events); - if (ret) - goto ec_vf_err; - } + } + + /* Enable ECVF vports */ + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; } /* Enable VF vports */ @@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs); + if (mlx5_ecpf_vport_exists(esw->dev)) { - if (mlx5_core_ec_sriov_enabled(esw->dev)) - mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a6a8eea5980ca3..0e3a977d533298 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3533,7 +3533,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); + err = mlx5_rdma_enable_roce(esw->dev); + if (err) + goto err_roce; err = mlx5_esw_host_number_init(esw); if (err) @@ -3594,6 +3596,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) esw_offloads_metadata_uninit(esw); err_metadata: mlx5_rdma_disable_roce(esw->dev); +err_roce: mutex_destroy(&esw->offloads.termtbl_mutex); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6163bc98d94a94..445301ea70426d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2207,6 +2207,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; struct match_list *iter; bool take_write = false; + bool try_again = false; struct fs_fte *fte; u64 version = 0; int err; @@ -2271,6 +2272,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); if (!g->node.active) { + try_again = true; up_write_ref_node(&g->node, false); continue; } @@ -2292,7 +2294,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, tree_put_node(&fte->node, false); return rule; } - rule = ERR_PTR(-ENOENT); + err = try_again ? -EAGAIN : -ENOENT; + rule = ERR_PTR(err); out: kmem_cache_free(steering->ftes_cache, fte); return rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index eb3bd9c7f66ebe..ca9ecec358b20d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -637,10 +637,6 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -654,7 +650,16 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &inner_ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &inner_ttc_groups[TTC_GROUPS_DEFAULT]; @@ -710,10 +715,6 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -727,7 +728,16 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &ttc_groups[TTC_GROUPS_DEFAULT]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 972e8e9df585ba..9bc9bd83c2324c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); - int nid = dev_to_node(device); + int nid = dev->priv.numa_node; struct page *page; u64 zero_addr = 1; u64 addr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index a42f6cd99b7448..5c552b71e371c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -118,8 +118,8 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) { + u8 mac[ETH_ALEN] = {}; union ib_gid gid; - u8 mac[ETH_ALEN]; mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, @@ -140,17 +140,17 @@ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) mlx5_nic_vport_disable_roce(dev); } -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { int err; if (!MLX5_CAP_GEN(dev, roce)) - return; + return 0; err = mlx5_nic_vport_enable_roce(dev); if (err) { mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); - return; + return err; } err = mlx5_rdma_add_roce_addr(dev); @@ -165,10 +165,11 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) goto del_roce_addr; } - return; + return err; del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h index 750cff2a71a4bb..3d9e76c3d42fb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -8,12 +8,12 @@ #ifdef CONFIG_MLX5_ESWITCH -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index b5332c54d4fb0f..17b8a3beb11732 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -1361,8 +1361,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; struct mlx5hws_cmd_forward_tbl *fw_island; struct mlx5hws_action *action; - u32 i /*, packet_reformat_id*/; - int ret; + int ret, last_dest_idx = -1; + u32 i; if (num_dest <= 1) { mlx5hws_err(ctx, "Action must have multiple dests\n"); @@ -1392,11 +1392,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte_attr.ignore_flow_level = ignore_flow_level; - /* ToDo: In SW steering we have a handling of 'go to WIRE' - * destination here by upper layer setting 'is_wire_ft' flag - * if the destination is wire. - * This is because uplink should be last dest in the list. - */ + if (dests[i].is_wire_ft) + last_dest_idx = i; break; case MLX5HWS_ACTION_TYP_VPORT: dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -1420,6 +1417,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, } } + if (last_dest_idx != -1) + swap(dest_list[last_dest_idx], dest_list[num_dest - 1]); + fte_attr.dests_num = num_dest; fte_attr.dests = dest_list; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 19dce1ba512d42..32de8bfc7644f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -90,13 +90,19 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, bwc_matcher->priority = priority; bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array, + sizeof(*bwc_matcher->at), GFP_KERNEL); + if (!bwc_matcher->at) + goto free_bwc_matcher_rules; + /* create dummy action template */ bwc_matcher->at[0] = mlx5hws_action_template_create(action_types ? action_types : init_action_types); if (!bwc_matcher->at[0]) { mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n"); - goto free_bwc_matcher_rules; + goto free_bwc_matcher_at_array; } bwc_matcher->num_of_at = 1; @@ -126,6 +132,8 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, mlx5hws_match_template_destroy(bwc_matcher->mt); free_at: mlx5hws_action_template_destroy(bwc_matcher->at[0]); +free_bwc_matcher_at_array: + kfree(bwc_matcher->at); free_bwc_matcher_rules: kfree(bwc_matcher->rules); err: @@ -192,6 +200,7 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) for (i = 0; i < bwc_matcher->num_of_at; i++) mlx5hws_action_template_destroy(bwc_matcher->at[i]); + kfree(bwc_matcher->at); mlx5hws_match_template_destroy(bwc_matcher->mt); kfree(bwc_matcher->rules); @@ -520,6 +529,23 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_rule_action rule_actions[]) { enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS]; + void *p; + + if (unlikely(bwc_matcher->num_of_at >= bwc_matcher->size_of_at_array)) { + if (bwc_matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + bwc_matcher->size_of_at_array *= 2; + p = krealloc(bwc_matcher->at, + bwc_matcher->size_of_at_array * + sizeof(*bwc_matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + bwc_matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + bwc_matcher->at = p; + } hws_bwc_rule_actions_to_action_types(rule_actions, action_types); @@ -777,6 +803,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_rule_attr rule_attr; struct mutex *queue_lock; /* Protect the queue */ u32 num_of_rules; + bool need_rehash; int ret = 0; int at_idx; @@ -803,10 +830,14 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, at_idx = bwc_matcher->num_of_at - 1; ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); + bwc_matcher->at[at_idx], + &need_rehash); if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. + hws_bwc_unlock_all_queues(ctx); + return ret; + } + if (unlikely(need_rehash)) { + /* The new action template requires more action STEs. * Need to attempt creating new matcher with all * the action templates, including the new one. */ @@ -942,6 +973,7 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_rule_attr rule_attr; struct mutex *queue_lock; /* Protect the queue */ + bool need_rehash; int at_idx, ret; u16 idx; @@ -973,12 +1005,17 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, at_idx = bwc_matcher->num_of_at - 1; ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); + bwc_matcher->at[at_idx], + &need_rehash); if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. + hws_bwc_unlock_all_queues(ctx); + return ret; + } + if (unlikely(need_rehash)) { + /* The new action template requires more action + * STEs. Need to attempt creating new matcher + * with all the action templates, including the + * new one. */ ret = hws_bwc_matcher_rehash_at(bwc_matcher); if (unlikely(ret)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 47f7ed1415535f..bb0cf4b922ceba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -10,9 +10,7 @@ #define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32 /* Max number of AT attach operations for the same matcher. - * When the limit is reached, next attempt to attach new AT - * will result in creation of a new matcher and moving all - * the rules to this matcher. + * When the limit is reached, a larger buffer is allocated for the ATs. */ #define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8 @@ -23,10 +21,11 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; - struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; - u32 priority; + struct mlx5hws_action_template **at; u8 num_of_at; + u8 size_of_at_array; u8 size_log; + u32 priority; atomic_t num_of_rules; struct list_head *rules; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index c8cc0c8115f537..293459458cc5f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -559,6 +559,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O, outer_headers.ip_protocol, eth_l3_outer.protocol_next_header); + HWS_SET_HDR(fc, match_param, IP_VERSION_O, + outer_headers.ip_version, + eth_l3_outer.ip_version); HWS_SET_HDR(fc, match_param, IP_TTL_O, outer_headers.ttl_hoplimit, eth_l3_outer.time_to_live_hop_limit); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 1b787cd66e6fd3..29c5e00af1aa06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -966,6 +966,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, switch (attr->type) { case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst); + if (dst->dest_attr.ft->flags & + MLX5_FLOW_TABLE_UPLINK_VPORT) + dest_actions[num_dest_actions].is_wire_ft = true; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index b61864b320536d..37a4497048a6fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -905,18 +905,48 @@ static int hws_matcher_uninit(struct mlx5hws_matcher *matcher) return 0; } +static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher) +{ + void *p; + + if (matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + + matcher->size_of_at_array *= 2; + p = krealloc(matcher->at, + matcher->size_of_at_array * sizeof(*matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + matcher->at = p; + + return 0; +} + int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, - struct mlx5hws_action_template *at) + struct mlx5hws_action_template *at, + bool *need_rehash) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); struct mlx5hws_context *ctx = matcher->tbl->ctx; u32 required_stes; int ret; - if (!matcher->attr.max_num_of_at_attach) { - mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n", - matcher->num_of_at); - return -EOPNOTSUPP; + *need_rehash = false; + + if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) { + ret = hws_matcher_grow_at_array(matcher); + if (ret) + return ret; + + if (matcher->col_matcher) { + ret = hws_matcher_grow_at_array(matcher->col_matcher); + if (ret) + return ret; + } } ret = hws_matcher_check_and_process_at(matcher, at); @@ -927,12 +957,11 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, if (matcher->action_ste.max_stes < required_stes) { mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n", required_stes, matcher->action_ste.max_stes); - return -ENOMEM; + *need_rehash = true; } matcher->at[matcher->num_of_at] = *at; matcher->num_of_at += 1; - matcher->attr.max_num_of_at_attach -= 1; if (matcher->col_matcher) matcher->col_matcher->num_of_at = matcher->num_of_at; @@ -960,8 +989,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher, if (!matcher->mt) return -ENOMEM; - matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach, - sizeof(*matcher->at), + matcher->size_of_at_array = + num_of_at + matcher->attr.max_num_of_at_attach; + matcher->at = kvcalloc(matcher->size_of_at_array, sizeof(*matcher->at), GFP_KERNEL); if (!matcher->at) { mlx5hws_err(ctx, "Failed to allocate action template array\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h index 020de70270c501..20b32012c418be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h @@ -23,6 +23,9 @@ */ #define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1 +/* Maximum number of action templates that can be attached to a matcher. */ +#define MLX5HWS_MATCHER_MAX_AT 128 + enum mlx5hws_matcher_offset { MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12, MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13, @@ -72,6 +75,7 @@ struct mlx5hws_matcher { struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at; u8 num_of_at; + u8 size_of_at_array; u8 num_of_mt; /* enum mlx5hws_matcher_flags */ u8 flags; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 5121951f2778a8..173f7ed1c17c3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -211,6 +211,7 @@ struct mlx5hws_action_dest_attr { struct mlx5hws_action *dest; /* Optional reformat action */ struct mlx5hws_action *reformat; + bool is_wire_ft; }; /** @@ -399,11 +400,14 @@ int mlx5hws_matcher_destroy(struct mlx5hws_matcher *matcher); * * @matcher: Matcher to attach the action template to. * @at: Action template to be attached to the matcher. + * @need_rehash: Output parameter that tells callers if the matcher needs to be + * rehashed. * * Return: Zero on success, non-zero otherwise. */ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, - struct mlx5hws_action_template *at); + struct mlx5hws_action_template *at, + bool *need_rehash); /** * mlx5hws_matcher_resize_set_target - Link two matchers and enable moving rules. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 464821dd492dae..a2033837182e86 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3014,6 +3014,9 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp, .rif = rif, }; + if (!mlxsw_sp_dev_lower_is_port(mlxsw_sp_rif_dev(rif))) + return 0; + neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); if (rms.err) goto err_arp; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 4ca7b99ef13153..de6b1a340f5580 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -154,14 +154,14 @@ struct fbnic_dev *fbnic_devlink_alloc(struct pci_dev *pdev); void fbnic_devlink_register(struct fbnic_dev *fbd); void fbnic_devlink_unregister(struct fbnic_dev *fbd); -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd); -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd); +int fbnic_fw_request_mbx(struct fbnic_dev *fbd); +void fbnic_fw_free_mbx(struct fbnic_dev *fbd); void fbnic_hwmon_register(struct fbnic_dev *fbd); void fbnic_hwmon_unregister(struct fbnic_dev *fbd); -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd); -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd); +int fbnic_pcs_request_irq(struct fbnic_dev *fbd); +void fbnic_pcs_free_irq(struct fbnic_dev *fbd); void fbnic_napi_name_irqs(struct fbnic_dev *fbd); int fbnic_napi_request_irq(struct fbnic_dev *fbd, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index 3b12a0ab59065e..51bee8072420b7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -796,8 +796,10 @@ enum { /* PUL User Registers */ #define FBNIC_CSR_START_PUL_USER 0x31000 /* CSR section delimiter */ #define FBNIC_PUL_OB_TLP_HDR_AW_CFG 0x3103d /* 0xc40f4 */ +#define FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME CSR_BIT(18) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG 0x3103e /* 0xc40f8 */ +#define FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH CSR_BIT(19) #define FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME CSR_BIT(18) #define FBNIC_PUL_USER_OB_RD_TLP_CNT_31_0 \ 0x3106e /* 0xc41b8 */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 88db3dacb94059..3d9636a6c968ec 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -17,11 +17,29 @@ static void __fbnic_mbx_wr_desc(struct fbnic_dev *fbd, int mbx_idx, { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + /* Write the upper 32b and then the lower 32b. Doing this the + * FW can then read lower, upper, lower to verify that the state + * of the descriptor wasn't changed mid-transaction. + */ fw_wr32(fbd, desc_offset + 1, upper_32_bits(desc)); fw_wrfl(fbd); fw_wr32(fbd, desc_offset, lower_32_bits(desc)); } +static void __fbnic_mbx_invalidate_desc(struct fbnic_dev *fbd, int mbx_idx, + int desc_idx, u32 desc) +{ + u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); + + /* For initialization we write the lower 32b of the descriptor first. + * This way we can set the state to mark it invalid before we clear the + * upper 32b. + */ + fw_wr32(fbd, desc_offset, desc); + fw_wrfl(fbd); + fw_wr32(fbd, desc_offset + 1, 0); +} + static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) { u32 desc_offset = FBNIC_IPC_MBX(mbx_idx, desc_idx); @@ -33,29 +51,41 @@ static u64 __fbnic_mbx_rd_desc(struct fbnic_dev *fbd, int mbx_idx, int desc_idx) return desc; } -static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_reset_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int desc_idx; + /* Disable DMA transactions from the device, + * and flush any transactions triggered during cleaning + */ + switch (mbx_idx) { + case FBNIC_IPC_MBX_RX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_FLUSH); + break; + case FBNIC_IPC_MBX_TX_IDX: + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_FLUSH); + break; + } + + wrfl(fbd); + /* Initialize first descriptor to all 0s. Doing this gives us a * solid stop for the firmware to hit when it is done looping * through the ring. */ - __fbnic_mbx_wr_desc(fbd, mbx_idx, 0, 0); - - fw_wrfl(fbd); + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, 0, 0); /* We then fill the rest of the ring starting at the end and moving * back toward descriptor 0 with skip descriptors that have no * length nor address, and tell the firmware that they can skip * them and just move past them to the one we initialized to 0. */ - for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) { - __fbnic_mbx_wr_desc(fbd, mbx_idx, desc_idx, - FBNIC_IPC_MBX_DESC_FW_CMPL | - FBNIC_IPC_MBX_DESC_HOST_CMPL); - fw_wrfl(fbd); - } + for (desc_idx = FBNIC_IPC_MBX_DESC_LEN; --desc_idx;) + __fbnic_mbx_invalidate_desc(fbd, mbx_idx, desc_idx, + FBNIC_IPC_MBX_DESC_FW_CMPL | + FBNIC_IPC_MBX_DESC_HOST_CMPL); } void fbnic_mbx_init(struct fbnic_dev *fbd) @@ -76,7 +106,7 @@ void fbnic_mbx_init(struct fbnic_dev *fbd) wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_init_desc_ring(fbd, i); + fbnic_mbx_reset_desc_ring(fbd, i); } static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx, @@ -141,7 +171,7 @@ static void fbnic_mbx_clean_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { int i; - fbnic_mbx_init_desc_ring(fbd, mbx_idx); + fbnic_mbx_reset_desc_ring(fbd, mbx_idx); for (i = FBNIC_IPC_MBX_DESC_LEN; i--;) fbnic_mbx_unmap_and_free_msg(fbd, mbx_idx, i); @@ -322,67 +352,41 @@ static int fbnic_fw_xmit_simple_msg(struct fbnic_dev *fbd, u32 msg_type) return err; } -/** - * fbnic_fw_xmit_cap_msg - Allocate and populate a FW capabilities message - * @fbd: FBNIC device structure - * - * Return: NULL on failure to allocate, error pointer on error, or pointer - * to new TLV test message. - * - * Sends a single TLV header indicating the host wants the firmware to - * confirm the capabilities and version. - **/ -static int fbnic_fw_xmit_cap_msg(struct fbnic_dev *fbd) -{ - int err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); - - /* Return 0 if we are not calling this on ASIC */ - return (err == -EOPNOTSUPP) ? 0 : err; -} - -static void fbnic_mbx_postinit_desc_ring(struct fbnic_dev *fbd, int mbx_idx) +static void fbnic_mbx_init_desc_ring(struct fbnic_dev *fbd, int mbx_idx) { struct fbnic_fw_mbx *mbx = &fbd->mbx[mbx_idx]; - /* This is a one time init, so just exit if it is completed */ - if (mbx->ready) - return; - mbx->ready = true; switch (mbx_idx) { case FBNIC_IPC_MBX_RX_IDX: + /* Enable DMA writes from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, + FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); + /* Make sure we have a page for the FW to write to */ fbnic_mbx_alloc_rx_msgs(fbd); break; case FBNIC_IPC_MBX_TX_IDX: - /* Force version to 1 if we successfully requested an update - * from the firmware. This should be overwritten once we get - * the actual version from the firmware in the capabilities - * request message. - */ - if (!fbnic_fw_xmit_cap_msg(fbd) && - !fbd->fw_cap.running.mgmt.version) - fbd->fw_cap.running.mgmt.version = 1; + /* Enable DMA reads from the device */ + wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, + FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); break; } } -static void fbnic_mbx_postinit(struct fbnic_dev *fbd) +static bool fbnic_mbx_event(struct fbnic_dev *fbd) { - int i; - - /* We only need to do this on the first interrupt following init. + /* We only need to do this on the first interrupt following reset. * this primes the mailbox so that we will have cleared all the * skip descriptors. */ if (!(rd32(fbd, FBNIC_INTR_STATUS(0)) & (1u << FBNIC_FW_MSIX_ENTRY))) - return; + return false; wr32(fbd, FBNIC_INTR_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); - for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) - fbnic_mbx_postinit_desc_ring(fbd, i); + return true; } /** @@ -859,7 +863,7 @@ static void fbnic_mbx_process_rx_msgs(struct fbnic_dev *fbd) void fbnic_mbx_poll(struct fbnic_dev *fbd) { - fbnic_mbx_postinit(fbd); + fbnic_mbx_event(fbd); fbnic_mbx_process_tx_msgs(fbd); fbnic_mbx_process_rx_msgs(fbd); @@ -867,60 +871,97 @@ void fbnic_mbx_poll(struct fbnic_dev *fbd) int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd) { - struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; + unsigned long timeout = jiffies + 10 * HZ + 1; + int err, i; - /* Immediate fail if BAR4 isn't there */ - if (!fbnic_fw_present(fbd)) - return -ENODEV; + do { + if (!time_is_after_jiffies(timeout)) + return -ETIMEDOUT; - tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - while (!tx_mbx->ready && --attempts) { /* Force the firmware to trigger an interrupt response to * avoid the mailbox getting stuck closed if the interrupt * is reset. */ - fbnic_mbx_init_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); + fbnic_mbx_reset_desc_ring(fbd, FBNIC_IPC_MBX_TX_IDX); - msleep(200); + /* Immediate fail if BAR4 went away */ + if (!fbnic_fw_present(fbd)) + return -ENODEV; - fbnic_mbx_poll(fbd); - } + msleep(20); + } while (!fbnic_mbx_event(fbd)); + + /* FW has shown signs of life. Enable DMA and start Tx/Rx */ + for (i = 0; i < FBNIC_IPC_MBX_INDICES; i++) + fbnic_mbx_init_desc_ring(fbd, i); + + /* Request an update from the firmware. This should overwrite + * mgmt.version once we get the actual version from the firmware + * in the capabilities request message. + */ + err = fbnic_fw_xmit_simple_msg(fbd, FBNIC_TLV_MSG_ID_HOST_CAP_REQ); + if (err) + goto clean_mbx; + + /* Use "1" to indicate we entered the state waiting for a response */ + fbd->fw_cap.running.mgmt.version = 1; + + return 0; +clean_mbx: + /* Cleanup Rx buffers and disable mailbox */ + fbnic_mbx_clean(fbd); + return err; +} + +static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) +{ + cmpl_data->result = -EPIPE; + complete(&cmpl_data->done); +} - return attempts ? 0 : -ETIMEDOUT; +static void fbnic_mbx_evict_all_cmpl(struct fbnic_dev *fbd) +{ + if (fbd->cmpl_data) { + __fbnic_fw_evict_cmpl(fbd->cmpl_data); + fbd->cmpl_data = NULL; + } } void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) { + unsigned long timeout = jiffies + 10 * HZ + 1; struct fbnic_fw_mbx *tx_mbx; - int attempts = 50; - u8 count = 0; - - /* Nothing to do if there is no mailbox */ - if (!fbnic_fw_present(fbd)) - return; + u8 tail; /* Record current Rx stats */ tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; - /* Nothing to do if mailbox never got to ready */ - if (!tx_mbx->ready) - return; + spin_lock_irq(&fbd->fw_tx_lock); + + /* Clear ready to prevent any further attempts to transmit */ + tx_mbx->ready = false; + + /* Read tail to determine the last tail state for the ring */ + tail = tx_mbx->tail; + + /* Flush any completions as we are no longer processing Rx */ + fbnic_mbx_evict_all_cmpl(fbd); + + spin_unlock_irq(&fbd->fw_tx_lock); /* Give firmware time to process packet, - * we will wait up to 10 seconds which is 50 waits of 200ms. + * we will wait up to 10 seconds which is 500 waits of 20ms. */ do { u8 head = tx_mbx->head; - if (head == tx_mbx->tail) + /* Tx ring is empty once head == tail */ + if (head == tail) break; - msleep(200); + msleep(20); fbnic_mbx_process_tx_msgs(fbd); - - count += (tx_mbx->head - head) % FBNIC_IPC_MBX_DESC_LEN; - } while (count < FBNIC_IPC_MBX_DESC_LEN && --attempts); + } while (time_is_after_jiffies(timeout)); } void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 1bbc0e56f3a039..1c88a2bf3a7a78 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -19,69 +19,105 @@ static irqreturn_t fbnic_fw_msix_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } +static int __fbnic_fw_enable_mbx(struct fbnic_dev *fbd, int vector) +{ + int err; + + /* Initialize mailbox and attempt to poll it into ready state */ + fbnic_mbx_init(fbd); + err = fbnic_mbx_poll_tx_ready(fbd); + if (err) { + dev_warn(fbd->dev, "FW mailbox did not enter ready state\n"); + return err; + } + + /* Enable interrupt and unmask the vector */ + enable_irq(vector); + fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); + + return 0; +} + /** - * fbnic_fw_enable_mbx - Configure and initialize Firmware Mailbox + * fbnic_fw_request_mbx - Configure and initialize Firmware Mailbox * @fbd: Pointer to device to initialize * - * This function will initialize the firmware mailbox rings, enable the IRQ - * and initialize the communication between the Firmware and the host. The - * firmware is expected to respond to the initialization by sending an - * interrupt essentially notifying the host that it has seen the - * initialization and is now synced up. + * This function will allocate the IRQ and then reinitialize the mailbox + * starting communication between the host and firmware. * * Return: non-zero on failure. **/ -int fbnic_fw_enable_mbx(struct fbnic_dev *fbd) +int fbnic_fw_request_mbx(struct fbnic_dev *fbd) { - u32 vector = fbd->fw_msix_vector; - int err; + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; + + WARN_ON(fbd->fw_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); + if (vector < 0) + return vector; /* Request the IRQ for FW Mailbox vector. */ err = request_threaded_irq(vector, NULL, &fbnic_fw_msix_intr, - IRQF_ONESHOT, dev_name(fbd->dev), fbd); + IRQF_ONESHOT | IRQF_NO_AUTOEN, + dev_name(fbd->dev), fbd); if (err) return err; /* Initialize mailbox and attempt to poll it into ready state */ - fbnic_mbx_init(fbd); - err = fbnic_mbx_poll_tx_ready(fbd); - if (err) { - dev_warn(fbd->dev, "FW mailbox did not enter ready state\n"); + err = __fbnic_fw_enable_mbx(fbd, vector); + if (err) free_irq(vector, fbd); - return err; - } - /* Enable interrupts */ - fbnic_wr32(fbd, FBNIC_INTR_MASK_CLEAR(0), 1u << FBNIC_FW_MSIX_ENTRY); + fbd->fw_msix_vector = vector; - return 0; + return err; } /** - * fbnic_fw_disable_mbx - Disable mailbox and place it in standby state - * @fbd: Pointer to device to disable + * fbnic_fw_disable_mbx - Temporarily place mailbox in standby state + * @fbd: Pointer to device * - * This function will disable the mailbox interrupt, free any messages still - * in the mailbox and place it into a standby state. The firmware is - * expected to see the update and assume that the host is in the reset state. + * Shutdown the mailbox by notifying the firmware to stop sending us logs, mask + * and synchronize the IRQ, and then clean up the rings. **/ -void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) +static void fbnic_fw_disable_mbx(struct fbnic_dev *fbd) { - /* Disable interrupt and free vector */ - fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); + /* Disable interrupt and synchronize the IRQ */ + disable_irq(fbd->fw_msix_vector); - /* Free the vector */ - free_irq(fbd->fw_msix_vector, fbd); + /* Mask the vector */ + fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_FW_MSIX_ENTRY); /* Make sure disabling logs message is sent, must be done here to * avoid risk of completing without a running interrupt. */ fbnic_mbx_flush_tx(fbd); - - /* Reset the mailboxes to the initialized state */ fbnic_mbx_clean(fbd); } +/** + * fbnic_fw_free_mbx - Disable mailbox and place it in standby state + * @fbd: Pointer to device to disable + * + * This function will disable the mailbox interrupt, free any messages still + * in the mailbox and place it into a disabled state. The firmware is + * expected to see the update and assume that the host is in the reset state. + **/ +void fbnic_fw_free_mbx(struct fbnic_dev *fbd) +{ + /* Vector has already been freed */ + if (!fbd->fw_msix_vector) + return; + + fbnic_fw_disable_mbx(fbd); + + /* Free the vector */ + free_irq(fbd->fw_msix_vector, fbd); + fbd->fw_msix_vector = 0; +} + static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) { struct fbnic_dev *fbd = data; @@ -101,7 +137,7 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) } /** - * fbnic_pcs_irq_enable - Configure the MAC to enable it to advertise link + * fbnic_pcs_request_irq - Configure the PCS to enable it to advertise link * @fbd: Pointer to device to initialize * * This function provides basic bringup for the MAC/PCS IRQ. For now the IRQ @@ -109,41 +145,61 @@ static irqreturn_t fbnic_pcs_msix_intr(int __always_unused irq, void *data) * * Return: non-zero on failure. **/ -int fbnic_pcs_irq_enable(struct fbnic_dev *fbd) +int fbnic_pcs_request_irq(struct fbnic_dev *fbd) { - u32 vector = fbd->pcs_msix_vector; - int err; + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int vector, err; - /* Request the IRQ for MAC link vector. - * Map MAC cause to it, and unmask it + WARN_ON(fbd->pcs_msix_vector); + + vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); + if (vector < 0) + return vector; + + /* Request the IRQ for PCS link vector. + * Map PCS cause to it, and unmask it */ err = request_irq(vector, &fbnic_pcs_msix_intr, 0, fbd->netdev->name, fbd); if (err) return err; + /* Map and enable interrupt, unmask vector after link is configured */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY | FBNIC_INTR_MSIX_CTRL_ENABLE); + fbd->pcs_msix_vector = vector; + return 0; } /** - * fbnic_pcs_irq_disable - Teardown the MAC IRQ to prepare for stopping + * fbnic_pcs_free_irq - Teardown the PCS IRQ to prepare for stopping * @fbd: Pointer to device that is stopping * - * This function undoes the work done in fbnic_pcs_irq_enable and prepares + * This function undoes the work done in fbnic_pcs_request_irq and prepares * the device to no longer receive traffic on the host interface. **/ -void fbnic_pcs_irq_disable(struct fbnic_dev *fbd) +void fbnic_pcs_free_irq(struct fbnic_dev *fbd) { + /* Vector has already been freed */ + if (!fbd->pcs_msix_vector) + return; + /* Disable interrupt */ fbnic_wr32(fbd, FBNIC_INTR_MSIX_CTRL(FBNIC_INTR_MSIX_CTRL_PCS_IDX), FBNIC_PCS_MSIX_ENTRY); + fbnic_wrfl(fbd); + + /* Synchronize IRQ to prevent race that would unmask vector */ + synchronize_irq(fbd->pcs_msix_vector); + + /* Mask the vector */ fbnic_wr32(fbd, FBNIC_INTR_MASK_SET(0), 1u << FBNIC_PCS_MSIX_ENTRY); /* Free the vector */ free_irq(fbd->pcs_msix_vector, fbd); + fbd->pcs_msix_vector = 0; } void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr) @@ -226,9 +282,6 @@ void fbnic_free_irqs(struct fbnic_dev *fbd) { struct pci_dev *pdev = to_pci_dev(fbd->dev); - fbd->pcs_msix_vector = 0; - fbd->fw_msix_vector = 0; - fbd->num_irqs = 0; pci_free_irq_vectors(pdev); @@ -254,8 +307,5 @@ int fbnic_alloc_irqs(struct fbnic_dev *fbd) fbd->num_irqs = num_irqs; - fbd->pcs_msix_vector = pci_irq_vector(pdev, FBNIC_PCS_MSIX_ENTRY); - fbd->fw_msix_vector = pci_irq_vector(pdev, FBNIC_FW_MSIX_ENTRY); - return 0; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index 14291401f46321..dde4a37116e20e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -79,12 +79,6 @@ static void fbnic_mac_init_axi(struct fbnic_dev *fbd) fbnic_init_readrq(fbd, FBNIC_QM_RNI_RBP_CTL, cls, readrq); fbnic_init_mps(fbd, FBNIC_QM_RNI_RDE_CTL, cls, mps); fbnic_init_mps(fbd, FBNIC_QM_RNI_RCM_CTL, cls, mps); - - /* Enable XALI AR/AW outbound */ - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AW_CFG, - FBNIC_PUL_OB_TLP_HDR_AW_CFG_BME); - wr32(fbd, FBNIC_PUL_OB_TLP_HDR_AR_CFG, - FBNIC_PUL_OB_TLP_HDR_AR_CFG_BME); } static void fbnic_mac_init_qm(struct fbnic_dev *fbd) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 79a01fdd1dd168..2524d9b88d591f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -44,9 +44,10 @@ int __fbnic_open(struct fbnic_net *fbn) if (err) goto time_stop; - err = fbnic_pcs_irq_enable(fbd); + err = fbnic_pcs_request_irq(fbd); if (err) goto time_stop; + /* Pull the BMC config and initialize the RPC */ fbnic_bmc_rpc_init(fbd); fbnic_rss_reinit(fbd, fbn); @@ -82,7 +83,7 @@ static int fbnic_stop(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); fbnic_down(fbn); - fbnic_pcs_irq_disable(fbn->fbd); + fbnic_pcs_free_irq(fbn->fbd); fbnic_time_stop(fbn); fbnic_fw_xmit_ownership_msg(fbn->fbd, false); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 6cbbc2ee3e1f98..4e8595239c0faa 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -283,7 +283,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto free_irqs; } - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) { dev_err(&pdev->dev, "Firmware mailbox initialization failure\n"); @@ -363,7 +363,7 @@ static void fbnic_remove(struct pci_dev *pdev) fbnic_hwmon_unregister(fbd); fbnic_dbg_fbd_exit(fbd); fbnic_devlink_unregister(fbd); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); fbnic_free_irqs(fbd); fbnic_devlink_free(fbd); @@ -387,7 +387,7 @@ static int fbnic_pm_suspend(struct device *dev) rtnl_unlock(); null_uc_addr: - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); /* Free the IRQs so they aren't trying to occupy sleeping CPUs */ fbnic_free_irqs(fbd); @@ -420,7 +420,7 @@ static int __fbnic_pm_resume(struct device *dev) fbd->mac->init_regs(fbd); /* Re-enable mailbox */ - err = fbnic_fw_enable_mbx(fbd); + err = fbnic_fw_request_mbx(fbd); if (err) goto err_free_irqs; @@ -438,15 +438,15 @@ static int __fbnic_pm_resume(struct device *dev) if (netif_running(netdev)) { err = __fbnic_open(fbn); if (err) - goto err_disable_mbx; + goto err_free_mbx; } rtnl_unlock(); return 0; -err_disable_mbx: +err_free_mbx: rtnl_unlock(); - fbnic_fw_disable_mbx(fbd); + fbnic_fw_free_mbx(fbd); err_free_irqs: fbnic_free_irqs(fbd); err_invalidate_uc_addr: diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 23760b613d3ecf..7f36443832ada3 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1330,7 +1330,7 @@ static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu) } /* PHY */ -static int lan743x_phy_reset(struct lan743x_adapter *adapter) +static int lan743x_hw_reset_phy(struct lan743x_adapter *adapter) { u32 data; @@ -1346,11 +1346,6 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter) 50000, 1000000); } -static int lan743x_phy_init(struct lan743x_adapter *adapter) -{ - return lan743x_phy_reset(adapter); -} - static void lan743x_phy_interface_select(struct lan743x_adapter *adapter) { u32 id_rev; @@ -1815,6 +1810,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, if (nr_frags <= 0) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_first; } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); @@ -1884,6 +1880,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, tx->frame_first = 0; tx->frame_data0 = 0; tx->frame_tail = 0; + tx->frame_last = 0; return -ENOMEM; } @@ -1924,16 +1921,18 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, TX_DESC_DATA0_DTYPE_DATA_) { tx->frame_data0 |= TX_DESC_DATA0_LS_; tx->frame_data0 |= TX_DESC_DATA0_IOC_; + tx->frame_last = tx->frame_tail; } - tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - buffer_info = &tx->buffer_info[tx->frame_tail]; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_last]; + buffer_info = &tx->buffer_info[tx->frame_last]; buffer_info->skb = skb; if (time_stamp) buffer_info->flags |= TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED; if (ignore_sync) buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC; + tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); tx->last_tail = tx->frame_tail; @@ -3491,6 +3490,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, struct pci_dev *pdev) { struct lan743x_tx *tx; + u32 sgmii_ctl; int index; int ret; @@ -3503,6 +3503,15 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, spin_lock_init(&adapter->eth_syslock_spinlock); mutex_init(&adapter->sgmii_rw_lock); pci11x1x_set_rfe_rd_fifo_threshold(adapter); + sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); + if (adapter->is_sgmii_en) { + sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; + } else { + sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; + sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; + } + lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); } else { adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS; adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS; @@ -3520,10 +3529,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, if (ret) return ret; - ret = lan743x_phy_init(adapter); - if (ret) - return ret; - ret = lan743x_ptp_init(adapter); if (ret) return ret; @@ -3554,7 +3559,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) { - u32 sgmii_ctl; int ret; adapter->mdiobus = devm_mdiobus_alloc(&adapter->pdev->dev); @@ -3566,10 +3570,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) adapter->mdiobus->priv = (void *)adapter; if (adapter->is_pci11x1x) { if (adapter->is_sgmii_en) { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl |= SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl &= ~SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "SGMII operation\n"); adapter->mdiobus->read = lan743x_mdiobus_read_c22; @@ -3580,10 +3580,6 @@ static int lan743x_mdiobus_init(struct lan743x_adapter *adapter) netif_dbg(adapter, drv, adapter->netdev, "lan743x-mdiobus-c45\n"); } else { - sgmii_ctl = lan743x_csr_read(adapter, SGMII_CTL); - sgmii_ctl &= ~SGMII_CTL_SGMII_ENABLE_; - sgmii_ctl |= SGMII_CTL_SGMII_POWER_DN_; - lan743x_csr_write(adapter, SGMII_CTL, sgmii_ctl); netif_dbg(adapter, drv, adapter->netdev, "RGMII operation\n"); // Only C22 support when RGMII I/F @@ -3669,6 +3665,10 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev, if (ret) goto cleanup_pci; + ret = lan743x_hw_reset_phy(adapter); + if (ret) + goto cleanup_pci; + ret = lan743x_hardware_init(adapter, pdev); if (ret) goto cleanup_pci; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 7f73d66854bee4..db5fc73e41cca5 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -980,6 +980,7 @@ struct lan743x_tx { u32 frame_first; u32 frame_data0; u32 frame_tail; + u32 frame_last; struct lan743x_tx_buffer_info *buffer_info; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 0af143ec0f8694..7001584f1b7a62 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -353,6 +353,11 @@ static void lan966x_ifh_set_rew_op(void *ifh, u64 rew_op) lan966x_ifh_set(ifh, rew_op, IFH_POS_REW_CMD, IFH_WID_REW_CMD); } +static void lan966x_ifh_set_oam_type(void *ifh, u64 oam_type) +{ + lan966x_ifh_set(ifh, oam_type, IFH_POS_PDU_TYPE, IFH_WID_PDU_TYPE); +} + static void lan966x_ifh_set_timestamp(void *ifh, u64 timestamp) { lan966x_ifh_set(ifh, timestamp, IFH_POS_TIMESTAMP, IFH_WID_TIMESTAMP); @@ -380,6 +385,7 @@ static netdev_tx_t lan966x_port_xmit(struct sk_buff *skb, return err; lan966x_ifh_set_rew_op(ifh, LAN966X_SKB_CB(skb)->rew_op); + lan966x_ifh_set_oam_type(ifh, LAN966X_SKB_CB(skb)->pdu_type); lan966x_ifh_set_timestamp(ifh, LAN966X_SKB_CB(skb)->ts_id); } @@ -873,6 +879,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, lan966x_vlan_port_set_vlan_aware(port, 0); lan966x_vlan_port_set_vid(port, HOST_PVID, false, false); lan966x_vlan_port_apply(port); + lan966x_vlan_port_rew_host(port); return 0; } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 1efa584e710777..4f75f068836933 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -75,6 +75,10 @@ #define IFH_REW_OP_ONE_STEP_PTP 0x3 #define IFH_REW_OP_TWO_STEP_PTP 0x4 +#define IFH_PDU_TYPE_NONE 0 +#define IFH_PDU_TYPE_IPV4 7 +#define IFH_PDU_TYPE_IPV6 8 + #define FDMA_RX_DCB_MAX_DBS 1 #define FDMA_TX_DCB_MAX_DBS 1 @@ -254,6 +258,7 @@ struct lan966x_phc { struct lan966x_skb_cb { u8 rew_op; + u8 pdu_type; u16 ts_id; unsigned long jiffies; }; @@ -492,6 +497,7 @@ void lan966x_vlan_port_apply(struct lan966x_port *port); bool lan966x_vlan_cpu_member_cpu_vlan_mask(struct lan966x *lan966x, u16 vid); void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port, bool vlan_aware); +void lan966x_vlan_port_rew_host(struct lan966x_port *port); int lan966x_vlan_port_set_vid(struct lan966x_port *port, u16 vid, bool pvid, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index 63905bb5a63a83..87e5e81d40dc68 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -322,34 +322,55 @@ void lan966x_ptp_hwtstamp_get(struct lan966x_port *port, *cfg = phc->hwtstamp_config; } -static int lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb) +static void lan966x_ptp_classify(struct lan966x_port *port, struct sk_buff *skb, + u8 *rew_op, u8 *pdu_type) { struct ptp_header *header; u8 msgtype; int type; - if (port->ptp_tx_cmd == IFH_REW_OP_NOOP) - return IFH_REW_OP_NOOP; + if (port->ptp_tx_cmd == IFH_REW_OP_NOOP) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + return; + } type = ptp_classify_raw(skb); - if (type == PTP_CLASS_NONE) - return IFH_REW_OP_NOOP; + if (type == PTP_CLASS_NONE) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + return; + } header = ptp_parse_header(skb, type); - if (!header) - return IFH_REW_OP_NOOP; + if (!header) { + *rew_op = IFH_REW_OP_NOOP; + *pdu_type = IFH_PDU_TYPE_NONE; + return; + } - if (port->ptp_tx_cmd == IFH_REW_OP_TWO_STEP_PTP) - return IFH_REW_OP_TWO_STEP_PTP; + if (type & PTP_CLASS_L2) + *pdu_type = IFH_PDU_TYPE_NONE; + if (type & PTP_CLASS_IPV4) + *pdu_type = IFH_PDU_TYPE_IPV4; + if (type & PTP_CLASS_IPV6) + *pdu_type = IFH_PDU_TYPE_IPV6; + + if (port->ptp_tx_cmd == IFH_REW_OP_TWO_STEP_PTP) { + *rew_op = IFH_REW_OP_TWO_STEP_PTP; + return; + } /* If it is sync and run 1 step then set the correct operation, * otherwise run as 2 step */ msgtype = ptp_get_msgtype(header, type); - if ((msgtype & 0xf) == 0) - return IFH_REW_OP_ONE_STEP_PTP; + if ((msgtype & 0xf) == 0) { + *rew_op = IFH_REW_OP_ONE_STEP_PTP; + return; + } - return IFH_REW_OP_TWO_STEP_PTP; + *rew_op = IFH_REW_OP_TWO_STEP_PTP; } static void lan966x_ptp_txtstamp_old_release(struct lan966x_port *port) @@ -374,10 +395,12 @@ int lan966x_ptp_txtstamp_request(struct lan966x_port *port, { struct lan966x *lan966x = port->lan966x; unsigned long flags; + u8 pdu_type; u8 rew_op; - rew_op = lan966x_ptp_classify(port, skb); + lan966x_ptp_classify(port, skb, &rew_op, &pdu_type); LAN966X_SKB_CB(skb)->rew_op = rew_op; + LAN966X_SKB_CB(skb)->pdu_type = pdu_type; if (rew_op != IFH_REW_OP_TWO_STEP_PTP) return 0; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c index 1c88120eb291a2..bcb4db76b75cd5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_switchdev.c @@ -297,6 +297,7 @@ static void lan966x_port_bridge_leave(struct lan966x_port *port, lan966x_vlan_port_set_vlan_aware(port, false); lan966x_vlan_port_set_vid(port, HOST_PVID, false, false); lan966x_vlan_port_apply(port); + lan966x_vlan_port_rew_host(port); } int lan966x_port_changeupper(struct net_device *dev, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c index fa34a739c748e1..7da22520724ce2 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vlan.c @@ -149,6 +149,27 @@ void lan966x_vlan_port_set_vlan_aware(struct lan966x_port *port, port->vlan_aware = vlan_aware; } +/* When the interface is in host mode, the interface should not be vlan aware + * but it should insert all the tags that it gets from the network stack. + * The tags are not in the data of the frame but actually in the skb and the ifh + * is configured already to get this tag. So what we need to do is to update the + * rewriter to insert the vlan tag for all frames which have a vlan tag + * different than 0. + */ +void lan966x_vlan_port_rew_host(struct lan966x_port *port) +{ + struct lan966x *lan966x = port->lan966x; + u32 val; + + /* Tag all frames except when VID=0*/ + val = REW_TAG_CFG_TAG_CFG_SET(2); + + /* Update only some bits in the register */ + lan_rmw(val, + REW_TAG_CFG_TAG_CFG, + lan966x, REW_TAG_CFG(port->chip_port)); +} + void lan966x_vlan_port_apply(struct lan966x_port *port) { struct lan966x *lan966x = port->lan966x; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ef93df52088710..08bee56aea35f3 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -830,6 +830,7 @@ EXPORT_SYMBOL(ocelot_vlan_prepare); int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, bool untagged) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; /* Ignore VID 0 added to our RX filter by the 8021q module, since @@ -849,6 +850,11 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, ocelot_bridge_vlan_find(ocelot, vid)); if (err) return err; + } else if (ocelot_port->pvid_vlan && + ocelot_bridge_vlan_find(ocelot, vid) == ocelot_port->pvid_vlan) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; } /* Untagged egress vlan clasification */ diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c index 671af5d4c5d25c..9e7c285eaa6bca 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -266,17 +266,17 @@ static void set_sha2_512hmac(struct nfp_ipsec_cfg_add_sa *cfg, int *trunc_len) } } -static int nfp_net_xfrm_add_state(struct xfrm_state *x, +static int nfp_net_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xso.real_dev; struct nfp_ipsec_cfg_mssg msg = {}; int i, key_len, trunc_len, err = 0; struct nfp_ipsec_cfg_add_sa *cfg; struct nfp_net *nn; unsigned int saidx; - nn = netdev_priv(netdev); + nn = netdev_priv(dev); cfg = &msg.cfg_add_sa; /* General */ @@ -546,17 +546,16 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x, return 0; } -static void nfp_net_xfrm_del_state(struct xfrm_state *x) +static void nfp_net_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { struct nfp_ipsec_cfg_mssg msg = { .cmd = NFP_IPSEC_CFG_MSSG_INV_SA, .sa_idx = x->xso.offload_handle - 1, }; - struct net_device *netdev = x->xso.real_dev; struct nfp_net *nn; int err; - nn = netdev_priv(netdev); + nn = netdev_priv(dev); err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, sizeof(msg), nfp_net_ipsec_cfg); if (err) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 99df00c30b8c6c..b5d744d2586f72 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -203,7 +203,7 @@ static struct pci_driver qede_pci_driver = { }; static struct qed_eth_cb_ops qede_ll_ops = { - { + .common = { #ifdef CONFIG_RFS_ACCEL .arfs_filter_op = qede_arfs_filter_op, #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 28d24d59efb84f..d57b976b904095 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -1484,8 +1484,11 @@ static int qlcnic_sriov_channel_cfg_cmd(struct qlcnic_adapter *adapter, u8 cmd_o } cmd_op = (cmd.rsp.arg[0] & 0xff); - if (cmd.rsp.arg[0] >> 25 == 2) - return 2; + if (cmd.rsp.arg[0] >> 25 == 2) { + ret = 2; + goto out; + } + if (cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) set_bit(QLC_BC_VF_STATE, &vf->state); else diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 2aacc1996796db..55b8d36661530c 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1925,8 +1925,8 @@ static u16 rtase_calc_time_mitigation(u32 time_us) time_us = min_t(int, time_us, RTASE_MITI_MAX_TIME); - msb = fls(time_us); - if (msb >= RTASE_MITI_COUNT_BIT_NUM) { + if (time_us > RTASE_MITI_TIME_COUNT_MASK) { + msb = fls(time_us); time_unit = msb - RTASE_MITI_COUNT_BIT_NUM; time_count = time_us >> (msb - RTASE_MITI_COUNT_BIT_NUM); } else { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 85723a78793ab6..6c7e8655a7eb92 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -964,7 +964,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY * address. No need to mask it again. */ - reg |= 1 << H3_EPHY_ADDR_SHIFT; + reg |= ret << H3_EPHY_ADDR_SHIFT; } else { /* For SoCs without internal PHY the PHY selection bit should be * set to 0 (external PHY). diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index 967a16212faf00..0c011a47d5a3e9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -320,8 +320,8 @@ enum rtc_control { /* PTP and timestamping registers */ -#define GMAC3_X_ATSNS GENMASK(19, 16) -#define GMAC3_X_ATSNS_SHIFT 16 +#define GMAC3_X_ATSNS GENMASK(29, 25) +#define GMAC3_X_ATSNS_SHIFT 25 #define GMAC_PTP_TCR_ATSFC BIT(24) #define GMAC_PTP_TCR_ATSEN0 BIT(25) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index a8b901cdf5cbb3..56b76aaa58f04a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -553,7 +553,7 @@ void dwmac1000_get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) u64 ns; ns = readl(ptpaddr + GMAC_PTP_ATNR); - ns += readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; + ns += (u64)readl(ptpaddr + GMAC_PTP_ATSR) * NSEC_PER_SEC; *ptp_time = ns; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c index c9693f77e1f61f..ac6f2e3a3fcd2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c @@ -32,6 +32,11 @@ static int est_configure(struct stmmac_priv *priv, struct stmmac_est *cfg, int i, ret = 0; u32 ctrl; + if (!ptp_rate) { + netdev_warn(priv->dev, "Invalid PTP rate"); + return -EINVAL; + } + ret |= est_write(est_addr, EST_BTR_LOW, cfg->btr[0], false); ret |= est_write(est_addr, EST_BTR_HIGH, cfg->btr[1], false); ret |= est_write(est_addr, EST_TER, cfg->ter, false); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 0f59aa98260404..e2840fa241f291 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -222,7 +222,7 @@ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) u64 ns; ns = readl(ptpaddr + PTP_ATNR); - ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; + ns += (u64)readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; *ptp_time = ns; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 59d07d0d3369db..3a049a158ea111 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -803,6 +803,11 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) return -EOPNOTSUPP; + if (!priv->plat->clk_ptp_rate) { + netdev_err(priv->dev, "Invalid PTP clock rate"); + return -EINVAL; + } + stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); priv->systime_flags = systime_flags; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index c73eff6a56b87a..15205a47cafc27 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -430,6 +430,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat; struct stmmac_dma_cfg *dma_cfg; + static int bus_id = -ENODEV; int phy_mode; void *ret; int rc; @@ -465,8 +466,14 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) of_property_read_u32(np, "max-speed", &plat->max_speed); plat->bus_id = of_alias_get_id(np, "ethernet"); - if (plat->bus_id < 0) - plat->bus_id = 0; + if (plat->bus_id < 0) { + if (bus_id < 0) + bus_id = of_alias_get_highest_id("ethernet"); + /* No ethernet alias found, init at -1 so first bus_id is 0 */ + if (bus_id < 0) + bus_id = -1; + plat->bus_id = ++bus_id; + } /* Default to phy auto-detection */ plat->phy_addr = -1; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 429b2d357813c8..3767ba495e78d2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -317,7 +317,7 @@ void stmmac_ptp_register(struct stmmac_priv *priv) /* Calculate the clock domain crossing (CDC) error if necessary */ priv->plat->cdc_error_adj = 0; - if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) + if (priv->plat->has_gmac4) priv->plat->cdc_error_adj = (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate; /* Update the ptp clock parameters based on feature discovery, when diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 73c07f10f053a4..379b6e90121d9f 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9064,6 +9064,8 @@ static void niu_try_msix(struct niu *np, u8 *ldg_num_map) msi_vec[i].entry = i; } + pdev->dev_flags |= PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST; + num_irqs = pci_enable_msix_range(pdev, msi_vec, 1, num_irqs); if (num_irqs < 0) { np->flags &= ~NIU_FLAGS_MSIX; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c9fd34787c9986..30665ffe78cf91 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2666,7 +2666,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) of_property_read_bool(port_np, "ti,mac-only"); /* get phy/link info */ - port->slave.port_np = port_np; + port->slave.port_np = of_node_get(port_np); ret = of_get_phy_mode(port_np, &port->slave.phy_if); if (ret) { dev_err(dev, "%pOF read phy-mode err %d\n", @@ -2685,7 +2685,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) port->slave.mac_addr); if (!is_valid_ether_addr(port->slave.mac_addr)) { eth_random_addr(port->slave.mac_addr); - dev_err(dev, "Use random MAC address\n"); + dev_info(dev, "Use random MAC address\n"); } } @@ -2720,6 +2720,17 @@ static void am65_cpsw_nuss_phylink_cleanup(struct am65_cpsw_common *common) } } +static void am65_cpsw_remove_dt(struct am65_cpsw_common *common) +{ + struct am65_cpsw_port *port; + int i; + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; + of_node_put(port->slave.port_np); + } +} + static int am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) { @@ -3622,6 +3633,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) am65_cpsw_nuss_cleanup_ndev(common); am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); + am65_cpsw_remove_dt(common); err_of_clear: if (common->mdio_dev) of_platform_device_destroy(common->mdio_dev, NULL); @@ -3661,6 +3673,7 @@ static void am65_cpsw_nuss_remove(struct platform_device *pdev) am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); am65_cpsw_disable_serdes_phy(common); + am65_cpsw_remove_dt(common); if (common->mdio_dev) of_platform_device_destroy(common->mdio_dev, NULL); diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index b4a34c57b7b482..2a1c43316f462b 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -412,6 +412,22 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, int ret; u64 cmp; + if (!on) { + /* Disable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), 0); + + /* clear CMP regs */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); + + /* Disable sync */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); + + return 0; + } + /* Calculate width of the signal for PPS/PEROUT handling */ ts.tv_sec = req->on.sec; ts.tv_nsec = req->on.nsec; @@ -430,64 +446,39 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, if (ret) return ret; - if (on) { - /* Configure CMP */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); - /* Configure SYNC, based on req on width */ - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - } + /* Configure CMP */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); + /* Configure SYNC, based on req on width */ + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } else { - if (on) { - u64 start_ns; - - iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + - req->period.nsec; - start_ns = ((u64)req->period.sec * NSEC_PER_SEC) - + req->period.nsec; - icss_iep_update_to_next_boundary(iep, start_ns); - - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - /* Enable Sync in single shot mode */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, - IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear CMP regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - - /* Disable sync */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); - } + u64 start_ns; + + iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + start_ns = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + icss_iep_update_to_next_boundary(iep, start_ns); + + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + /* Enable Sync in single shot mode */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, + IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } return 0; @@ -498,11 +489,21 @@ static int icss_iep_perout_enable(struct icss_iep *iep, { int ret = 0; + if (!on) + goto disable; + /* Reject requests with unsupported flags */ if (req->flags & ~(PTP_PEROUT_DUTY_CYCLE | PTP_PEROUT_PHASE)) return -EOPNOTSUPP; + /* Set default "on" time (1ms) for the signal if not passed by the app */ + if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { + req->on.sec = 0; + req->on.nsec = NSEC_PER_MSEC; + } + +disable: mutex_lock(&iep->ptp_clk_mutex); if (iep->pps_enabled) { @@ -513,12 +514,6 @@ static int icss_iep_perout_enable(struct icss_iep *iep, if (iep->perout_enabled == !!on) goto exit; - /* Set default "on" time (1ms) for the signal if not passed by the app */ - if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { - req->on.sec = 0; - req->on.nsec = NSEC_PER_MSEC; - } - ret = icss_iep_perout_enable_hw(iep, req, on); if (!ret) iep->perout_enabled = !!on; diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 14002b0264528c..d88a0180294e0f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -187,7 +187,6 @@ int emac_tx_complete_packets(struct prueth_emac *emac, int chn, xdp_return_frame(xdpf); break; default: - netdev_err(ndev, "tx_complete: invalid swdata type %d\n", swdata->type); prueth_xmit_free(tx_chn, desc_tx); ndev->stats.tx_dropped++; continue; @@ -567,6 +566,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, { struct cppi5_host_desc_t *first_desc; struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; struct prueth_tx_chn *tx_chn; dma_addr_t desc_dma, buf_dma; struct prueth_swdata *swdata; @@ -583,7 +583,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); if (!first_desc) { netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n"); - goto drop_free_descs; /* drop */ + return ICSSG_XDP_CONSUMED; /* drop */ } if (page) { /* already DMA mapped by page_pool */ @@ -620,12 +620,17 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, swdata->data.xdpf = xdpf; } + /* Report BQL before sending the packet */ + netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); + netdev_tx_sent_queue(netif_txq, xdpf->len); + cppi5_hdesc_set_pktlen(first_desc, xdpf->len); desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, first_desc); ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "xdp tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, xdpf->len); goto drop_free_descs; } @@ -650,6 +655,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, struct page *page, u32 *len) { struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct bpf_prog *xdp_prog; struct xdp_frame *xdpf; u32 pkt_len = *len; @@ -669,10 +676,15 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, goto drop; } - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); + __netif_tx_lock(netif_txq, cpu); result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); - if (result == ICSSG_XDP_CONSUMED) + __netif_tx_unlock(netif_txq); + if (result == ICSSG_XDP_CONSUMED) { + ndev->stats.tx_dropped++; goto drop; + } dev_sw_netstats_rx_add(ndev, xdpf->len); return result; @@ -977,6 +989,7 @@ enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev ret = k3_udma_glue_push_tx_chn(tx_chn->tx_chn, first_desc, desc_dma); if (ret) { netdev_err(ndev, "tx: push failed: %d\n", ret); + netdev_tx_completed_queue(netif_txq, 1, pkt_len); goto drop_free_descs; } @@ -1215,9 +1228,6 @@ void prueth_reset_rx_chan(struct prueth_rx_chn *chn, prueth_rx_cleanup); if (disable) k3_udma_glue_disable_rx_chn(chn->rx_chn); - - page_pool_destroy(chn->pg_pool); - chn->pg_pool = NULL; } EXPORT_SYMBOL_GPL(prueth_reset_rx_chan); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 443f90fa65575a..86fc1278127c74 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1075,17 +1075,21 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame { struct prueth_emac *emac = netdev_priv(dev); struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct xdp_frame *xdpf; unsigned int q_idx; int nxmit = 0; u32 err; int i; - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; + __netif_tx_lock(netif_txq, cpu); for (i = 0; i < n; i++) { xdpf = frames[i]; err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); @@ -1095,6 +1099,7 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame } nxmit++; } + __netif_tx_unlock(netif_txq); return nxmit; } @@ -1109,11 +1114,6 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->prog; - xdp_features_t val; - - val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - xdp_set_features_flag(emac->ndev, val); if (!emac->xdpi.prog && !prog) return 0; @@ -1291,6 +1291,10 @@ static int prueth_netdev_init(struct prueth *prueth, ndev->hw_features = NETIF_F_SG; ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->hw_features |= NETIF_PRUETH_HSR_OFFLOAD_FEATURES; + xdp_set_features_flag(ndev, + NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT); netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC, diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index 6f0edae38ea242..172ae38381b453 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -29,6 +29,14 @@ void emac_update_hardware_stats(struct prueth_emac *emac) spin_lock(&prueth->stats_lock); for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) { + /* In MII mode TX lines are swapped inside ICSSG, so read Tx stats + * from slice1 for port0 and slice0 for port1 to get accurate Tx + * stats for a given port + */ + if (emac->phy_if == PHY_INTERFACE_MODE_MII && + icssg_all_miig_stats[i].offset >= ICSSG_TX_PACKET_OFFSET && + icssg_all_miig_stats[i].offset <= ICSSG_TX_BYTE_OFFSET) + base = stats_base[slice ^ 1]; regmap_read(prueth->miig_rt, base + icssg_all_miig_stats[i].offset, &val); diff --git a/drivers/net/ethernet/vertexcom/mse102x.c b/drivers/net/ethernet/vertexcom/mse102x.c index 89dc4c401a8de4..e4d993f3137407 100644 --- a/drivers/net/ethernet/vertexcom/mse102x.c +++ b/drivers/net/ethernet/vertexcom/mse102x.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -33,7 +34,7 @@ #define CMD_CTR (0x2 << CMD_SHIFT) #define CMD_MASK GENMASK(15, CMD_SHIFT) -#define LEN_MASK GENMASK(CMD_SHIFT - 1, 0) +#define LEN_MASK GENMASK(CMD_SHIFT - 2, 0) #define DET_CMD_LEN 4 #define DET_SOF_LEN 2 @@ -262,7 +263,7 @@ static int mse102x_tx_frame_spi(struct mse102x_net *mse, struct sk_buff *txp, } static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, - unsigned int frame_len) + unsigned int frame_len, bool drop) { struct mse102x_net_spi *mses = to_mse102x_spi(mse); struct spi_transfer *xfer = &mses->spi_xfer; @@ -280,6 +281,9 @@ static int mse102x_rx_frame_spi(struct mse102x_net *mse, u8 *buff, netdev_err(mse->ndev, "%s: spi_sync() failed: %d\n", __func__, ret); mse->stats.xfer_err++; + } else if (drop) { + netdev_dbg(mse->ndev, "%s: Drop frame\n", __func__); + ret = -EINVAL; } else if (*sof != cpu_to_be16(DET_SOF)) { netdev_dbg(mse->ndev, "%s: SPI start of frame is invalid (0x%04x)\n", __func__, *sof); @@ -307,6 +311,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) struct sk_buff *skb; unsigned int rxalign; unsigned int rxlen; + bool drop = false; __be16 rx = 0; u16 cmd_resp; u8 *rxpkt; @@ -329,7 +334,8 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) net_dbg_ratelimited("%s: Unexpected response (0x%04x)\n", __func__, cmd_resp); mse->stats.invalid_rts++; - return; + drop = true; + goto drop; } net_dbg_ratelimited("%s: Unexpected response to first CMD\n", @@ -337,12 +343,20 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) } rxlen = cmd_resp & LEN_MASK; - if (!rxlen) { - net_dbg_ratelimited("%s: No frame length defined\n", __func__); + if (rxlen < ETH_ZLEN || rxlen > VLAN_ETH_FRAME_LEN) { + net_dbg_ratelimited("%s: Invalid frame length: %d\n", __func__, + rxlen); mse->stats.invalid_len++; - return; + drop = true; } + /* In case of a invalid CMD_RTS, the frame must be consumed anyway. + * So assume the maximum possible frame length. + */ +drop: + if (drop) + rxlen = VLAN_ETH_FRAME_LEN; + rxalign = ALIGN(rxlen + DET_SOF_LEN + DET_DFT_LEN, 4); skb = netdev_alloc_skb_ip_align(mse->ndev, rxalign); if (!skb) @@ -353,7 +367,7 @@ static void mse102x_rx_pkt_spi(struct mse102x_net *mse) * They are copied, but ignored. */ rxpkt = skb_put(skb, rxlen) - DET_SOF_LEN; - if (mse102x_rx_frame_spi(mse, rxpkt, rxlen)) { + if (mse102x_rx_frame_spi(mse, rxpkt, rxlen, drop)) { mse->ndev->stats.rx_errors++; dev_kfree_skb(skb); return; @@ -509,6 +523,7 @@ static irqreturn_t mse102x_irq(int irq, void *_mse) static int mse102x_net_open(struct net_device *ndev) { struct mse102x_net *mse = netdev_priv(ndev); + struct mse102x_net_spi *mses = to_mse102x_spi(mse); int ret; ret = request_threaded_irq(ndev->irq, NULL, mse102x_irq, IRQF_ONESHOT, @@ -524,6 +539,13 @@ static int mse102x_net_open(struct net_device *ndev) netif_carrier_on(ndev); + /* The SPI interrupt can stuck in case of pending packet(s). + * So poll for possible packet(s) to re-arm the interrupt. + */ + mutex_lock(&mses->lock); + mse102x_rx_pkt_spi(mse); + mutex_unlock(&mses->lock); + netif_dbg(mse, ifup, ndev, "network device up\n"); return 0; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index aed45abafb1b79..490d34233d38c5 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -434,14 +434,20 @@ static int wx_host_interface_command_r(struct wx *wx, u32 *buffer, wr32m(wx, WX_SW2FW_MBOX_CMD, WX_SW2FW_MBOX_CMD_VLD, WX_SW2FW_MBOX_CMD_VLD); /* polling reply from FW */ - err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 1000, 50000, - true, wx, buffer, send_cmd); + err = read_poll_timeout(wx_poll_fw_reply, reply, reply, 2000, + timeout * 1000, true, wx, buffer, send_cmd); if (err) { wx_err(wx, "Polling from FW messages timeout, cmd: 0x%x, index: %d\n", send_cmd, wx->swfw_index); goto rel_out; } + if (hdr->cmd_or_resp.ret_status == 0x80) { + wx_err(wx, "Unknown FW command: 0x%x\n", send_cmd); + err = -EINVAL; + goto rel_out; + } + /* expect no reply from FW then return */ if (!return_data) goto rel_out; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 00b0b318df27e8..e69eaa65e0de85 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -310,7 +310,8 @@ static bool wx_alloc_mapped_page(struct wx_ring *rx_ring, return true; page = page_pool_dev_alloc_pages(rx_ring->page_pool); - WARN_ON(!page); + if (unlikely(!page)) + return false; dma = page_pool_get_dma_addr(page); bi->page_dma = dma; @@ -546,7 +547,8 @@ static void wx_rx_checksum(struct wx_ring *ring, return; /* Hardware can't guarantee csum if IPv6 Dest Header found */ - if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && WX_RXD_IPV6EX(rx_desc)) + if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && + wx_test_staterr(rx_desc, WX_RXD_STAT_IPV6EX)) return; /* if L4 checksum error */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 5b230ecbbabb51..4c545b2aa997cb 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -513,6 +513,7 @@ enum WX_MSCA_CMD_value { #define WX_RXD_STAT_L4CS BIT(7) /* L4 xsum calculated */ #define WX_RXD_STAT_IPCS BIT(8) /* IP xsum calculated */ #define WX_RXD_STAT_OUTERIPCS BIT(10) /* Cloud IP xsum calculated*/ +#define WX_RXD_STAT_IPV6EX BIT(12) /* IPv6 Dest Header */ #define WX_RXD_STAT_TS BIT(14) /* IEEE1588 Time Stamp */ #define WX_RXD_ERR_OUTERIPER BIT(26) /* CRC IP Header error */ @@ -589,8 +590,6 @@ enum wx_l2_ptypes { #define WX_RXD_PKTTYPE(_rxd) \ ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF) -#define WX_RXD_IPV6EX(_rxd) \ - ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 6) & 0x1) /*********************** Transmit Descriptor Config Masks ****************/ #define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */ #define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */ diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index a6159214ec0a98..91b3055a5a9f59 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -625,7 +625,7 @@ static int ngbe_probe(struct pci_dev *pdev, /* setup the private structure */ err = ngbe_sw_init(wx); if (err) - goto err_free_mac_table; + goto err_pci_release_regions; /* check if flash load is done after hw power up */ err = wx_check_flash_load(wx, NGBE_SPI_ILDR_STATUS_PERST); @@ -719,6 +719,7 @@ static int ngbe_probe(struct pci_dev *pdev, err_clear_interrupt_scheme: wx_clear_interrupt_scheme(wx); err_free_mac_table: + kfree(wx->rss_key); kfree(wx->mac_table); err_pci_release_regions: pci_release_selected_regions(pdev, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index 4b9921b7bb1125..a054b259d435dd 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -99,9 +99,15 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) } local_buffer = eeprom_ptrs; - for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) + for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) { + if (wx->mac.type == wx_mac_aml) { + if (i >= TXGBE_EEPROM_I2C_SRART_PTR && + i < TXGBE_EEPROM_I2C_END_PTR) + local_buffer[i] = 0xffff; + } if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM) *checksum += local_buffer[i]; + } kvfree(eeprom_ptrs); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index a2e245e3b01683..38206a46693bcd 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -611,7 +611,7 @@ static int txgbe_probe(struct pci_dev *pdev, /* setup the private structure */ err = txgbe_sw_init(wx); if (err) - goto err_free_mac_table; + goto err_pci_release_regions; /* check if flash load is done after hw power up */ err = wx_check_flash_load(wx, TXGBE_SPI_ILDR_STATUS_PERST); @@ -769,6 +769,7 @@ static int txgbe_probe(struct pci_dev *pdev, wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); err_free_mac_table: + kfree(wx->rss_key); kfree(wx->mac_table); err_pci_release_regions: pci_release_selected_regions(pdev, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 9c1c26234cad95..f423012dec2256 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -158,6 +158,8 @@ #define TXGBE_EEPROM_VERSION_L 0x1D #define TXGBE_EEPROM_VERSION_H 0x1E #define TXGBE_ISCSI_BOOT_CONFIG 0x07 +#define TXGBE_EEPROM_I2C_SRART_PTR 0x580 +#define TXGBE_EEPROM_I2C_END_PTR 0x800 #define TXGBE_MAX_MSIX_VECTORS 64 #define TXGBE_MAX_FDIR_INDICES 63 diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 054abf283ab33e..5f912b27bfd7fc 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -880,7 +880,7 @@ static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result) dev_consume_skb_any(skbuf_dma->skb); netif_txq_completed_wake(txq, 1, len, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX), - 2 * MAX_SKB_FRAGS); + 2); } /** @@ -914,7 +914,7 @@ axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) dma_dev = lp->tx_chan->device; sg_len = skb_shinfo(skb)->nr_frags + 1; - if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) { + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= 1) { netif_stop_queue(ndev); if (net_ratelimit()) netdev_warn(ndev, "TX ring unexpectedly full\n"); @@ -964,7 +964,7 @@ axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) txq = skb_get_tx_queue(lp->ndev, skb); netdev_tx_sent_queue(txq, skb->len); netif_txq_maybe_stop(txq, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX), - MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS); + 1, 2); dmaengine_submit(dma_tx_desc); dma_async_issue_pending(lp->tx_chan); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 70f7cb383228ea..cb6f5482d203e1 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -158,7 +158,6 @@ struct hv_netvsc_packet { u8 cp_partial; /* partial copy into send buffer */ u8 rmsg_size; /* RNDIS header and PPI size */ - u8 rmsg_pgcnt; /* page count of RNDIS header and PPI */ u8 page_buf_cnt; u16 q_idx; @@ -893,6 +892,18 @@ struct nvsp_message { sizeof(struct nvsp_message)) #define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) +/* Maximum # of contiguous data ranges that can make up a trasmitted packet. + * Typically it's the max SKB fragments plus 2 for the rndis packet and the + * linear portion of the SKB. But if MAX_SKB_FRAGS is large, the value may + * need to be limited to MAX_PAGE_BUFFER_COUNT, which is the max # of entries + * in a GPA direct packet sent to netvsp over VMBus. + */ +#if MAX_SKB_FRAGS + 2 < MAX_PAGE_BUFFER_COUNT +#define MAX_DATA_RANGES (MAX_SKB_FRAGS + 2) +#else +#define MAX_DATA_RANGES MAX_PAGE_BUFFER_COUNT +#endif + /* Estimated requestor size: * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d6f5b9ea3109d2..720104661d7f24 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -953,8 +953,7 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, + pend_size; int i; u32 padding = 0; - u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : - packet->page_buf_cnt; + u32 page_count = packet->cp_partial ? 1 : packet->page_buf_cnt; u32 remain; /* Add padding */ @@ -1055,6 +1054,42 @@ static int netvsc_dma_map(struct hv_device *hv_dev, return 0; } +/* Build an "array" of mpb entries describing the data to be transferred + * over VMBus. After the desc header fields, each "array" entry is variable + * size, and each entry starts after the end of the previous entry. The + * "offset" and "len" fields for each entry imply the size of the entry. + * + * The pfns are in HV_HYP_PAGE_SIZE, because all communication with Hyper-V + * uses that granularity, even if the system page size of the guest is larger. + * Each entry in the input "pb" array must describe a contiguous range of + * guest physical memory so that the pfns are sequential if the range crosses + * a page boundary. The offset field must be < HV_HYP_PAGE_SIZE. + */ +static inline void netvsc_build_mpb_array(struct hv_page_buffer *pb, + u32 page_buffer_count, + struct vmbus_packet_mpb_array *desc, + u32 *desc_size) +{ + struct hv_mpb_array *mpb_entry = &desc->range; + int i, j; + + for (i = 0; i < page_buffer_count; i++) { + u32 offset = pb[i].offset; + u32 len = pb[i].len; + + mpb_entry->offset = offset; + mpb_entry->len = len; + + for (j = 0; j < HVPFN_UP(offset + len); j++) + mpb_entry->pfn_array[j] = pb[i].pfn + j; + + mpb_entry = (struct hv_mpb_array *)&mpb_entry->pfn_array[j]; + } + + desc->rangecount = page_buffer_count; + *desc_size = (char *)mpb_entry - (char *)desc; +} + static inline int netvsc_send_pkt( struct hv_device *device, struct hv_netvsc_packet *packet, @@ -1097,8 +1132,11 @@ static inline int netvsc_send_pkt( packet->dma_range = NULL; if (packet->page_buf_cnt) { + struct vmbus_channel_packet_page_buffer desc; + u32 desc_size; + if (packet->cp_partial) - pb += packet->rmsg_pgcnt; + pb++; ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb); if (ret) { @@ -1106,11 +1144,12 @@ static inline int netvsc_send_pkt( goto exit; } - ret = vmbus_sendpacket_pagebuffer(out_channel, - pb, packet->page_buf_cnt, - &nvmsg, sizeof(nvmsg), - req_id); - + netvsc_build_mpb_array(pb, packet->page_buf_cnt, + (struct vmbus_packet_mpb_array *)&desc, + &desc_size); + ret = vmbus_sendpacket_mpb_desc(out_channel, + (struct vmbus_packet_mpb_array *)&desc, + desc_size, &nvmsg, sizeof(nvmsg), req_id); if (ret) netvsc_dma_unmap(ndev_ctx->device_ctx, packet); } else { @@ -1259,7 +1298,7 @@ int netvsc_send(struct net_device *ndev, packet->send_buf_index = section_index; if (packet->cp_partial) { - packet->page_buf_cnt -= packet->rmsg_pgcnt; + packet->page_buf_cnt--; packet->total_data_buflen = msd_len + packet->rmsg_size; } else { packet->page_buf_cnt = 0; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c51b318b8a72e4..d8b169ac0343c5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -326,43 +326,10 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, return txq; } -static u32 fill_pg_buf(unsigned long hvpfn, u32 offset, u32 len, - struct hv_page_buffer *pb) -{ - int j = 0; - - hvpfn += offset >> HV_HYP_PAGE_SHIFT; - offset = offset & ~HV_HYP_PAGE_MASK; - - while (len > 0) { - unsigned long bytes; - - bytes = HV_HYP_PAGE_SIZE - offset; - if (bytes > len) - bytes = len; - pb[j].pfn = hvpfn; - pb[j].offset = offset; - pb[j].len = bytes; - - offset += bytes; - len -= bytes; - - if (offset == HV_HYP_PAGE_SIZE && len) { - hvpfn++; - offset = 0; - j++; - } - } - - return j + 1; -} - static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, struct hv_netvsc_packet *packet, struct hv_page_buffer *pb) { - u32 slots_used = 0; - char *data = skb->data; int frags = skb_shinfo(skb)->nr_frags; int i; @@ -371,28 +338,27 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb, * 2. skb linear data * 3. skb fragment data */ - slots_used += fill_pg_buf(virt_to_hvpfn(hdr), - offset_in_hvpage(hdr), - len, - &pb[slots_used]); + pb[0].offset = offset_in_hvpage(hdr); + pb[0].len = len; + pb[0].pfn = virt_to_hvpfn(hdr); packet->rmsg_size = len; - packet->rmsg_pgcnt = slots_used; - slots_used += fill_pg_buf(virt_to_hvpfn(data), - offset_in_hvpage(data), - skb_headlen(skb), - &pb[slots_used]); + pb[1].offset = offset_in_hvpage(skb->data); + pb[1].len = skb_headlen(skb); + pb[1].pfn = virt_to_hvpfn(skb->data); for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; + struct hv_page_buffer *cur_pb = &pb[i + 2]; + u64 pfn = page_to_hvpfn(skb_frag_page(frag)); + u32 offset = skb_frag_off(frag); - slots_used += fill_pg_buf(page_to_hvpfn(skb_frag_page(frag)), - skb_frag_off(frag), - skb_frag_size(frag), - &pb[slots_used]); + cur_pb->offset = offset_in_hvpage(offset); + cur_pb->len = skb_frag_size(frag); + cur_pb->pfn = pfn + (offset >> HV_HYP_PAGE_SHIFT); } - return slots_used; + return frags + 2; } static int count_skb_frag_slots(struct sk_buff *skb) @@ -483,7 +449,7 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) struct net_device *vf_netdev; u32 rndis_msg_size; u32 hash; - struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + struct hv_page_buffer pb[MAX_DATA_RANGES]; /* If VF is present and up then redirect packets to it. * Skip the VF if it is marked down or has no carrier. diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 82747dfacd70f0..9e73959e61ee0b 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -225,8 +225,7 @@ static int rndis_filter_send_request(struct rndis_device *dev, struct rndis_request *req) { struct hv_netvsc_packet *packet; - struct hv_page_buffer page_buf[2]; - struct hv_page_buffer *pb = page_buf; + struct hv_page_buffer pb; int ret; /* Setup the packet to send it */ @@ -235,27 +234,14 @@ static int rndis_filter_send_request(struct rndis_device *dev, packet->total_data_buflen = req->request_msg.msg_len; packet->page_buf_cnt = 1; - pb[0].pfn = virt_to_phys(&req->request_msg) >> - HV_HYP_PAGE_SHIFT; - pb[0].len = req->request_msg.msg_len; - pb[0].offset = offset_in_hvpage(&req->request_msg); - - /* Add one page_buf when request_msg crossing page boundary */ - if (pb[0].offset + pb[0].len > HV_HYP_PAGE_SIZE) { - packet->page_buf_cnt++; - pb[0].len = HV_HYP_PAGE_SIZE - - pb[0].offset; - pb[1].pfn = virt_to_phys((void *)&req->request_msg - + pb[0].len) >> HV_HYP_PAGE_SHIFT; - pb[1].offset = 0; - pb[1].len = req->request_msg.msg_len - - pb[0].len; - } + pb.pfn = virt_to_phys(&req->request_msg) >> HV_HYP_PAGE_SHIFT; + pb.len = req->request_msg.msg_len; + pb.offset = offset_in_hvpage(&req->request_msg); trace_rndis_send(dev->ndev, 0, &req->request_msg); rcu_read_lock_bh(); - ret = netvsc_send(dev->ndev, packet, NULL, pb, NULL, false); + ret = netvsc_send(dev->ndev, packet, NULL, &pb, NULL, false); rcu_read_unlock_bh(); return ret; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3d315e30ee4725..7edbe76b5455a8 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -247,15 +247,39 @@ static sci_t make_sci(const u8 *addr, __be16 port) return sci; } -static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present) +static sci_t macsec_active_sci(struct macsec_secy *secy) { - sci_t sci; + struct macsec_rx_sc *rx_sc = rcu_dereference_bh(secy->rx_sc); + + /* Case single RX SC */ + if (rx_sc && !rcu_dereference_bh(rx_sc->next)) + return (rx_sc->active) ? rx_sc->sci : 0; + /* Case no RX SC or multiple */ + else + return 0; +} + +static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present, + struct macsec_rxh_data *rxd) +{ + struct macsec_dev *macsec; + sci_t sci = 0; - if (sci_present) + /* SC = 1 */ + if (sci_present) { memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); - else + /* SC = 0; ES = 0 */ + } else if ((!(hdr->tci_an & (MACSEC_TCI_ES | MACSEC_TCI_SC))) && + (list_is_singular(&rxd->secys))) { + /* Only one SECY should exist on this scenario */ + macsec = list_first_or_null_rcu(&rxd->secys, struct macsec_dev, + secys); + if (macsec) + return macsec_active_sci(&macsec->secy); + } else { sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); + } return sci; } @@ -1109,7 +1133,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; - sci_t sci; + sci_t sci = 0; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; @@ -1156,11 +1180,14 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; - sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci); rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); + sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci, rxd); + if (!sci) + goto drop_nosc; + list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); @@ -1283,6 +1310,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); +drop_nosc: rcu_read_unlock(); drop_direct: kfree_skb(skb); diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index e8d4b01c3f3458..775a386d0aca12 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -257,6 +257,8 @@ static int mctp_usb_open(struct net_device *dev) WRITE_ONCE(mctp_usb->stopped, false); + netif_start_queue(dev); + return mctp_usb_rx_queue(mctp_usb, GFP_KERNEL); } diff --git a/drivers/net/mdio/mdio-mux-meson-gxl.c b/drivers/net/mdio/mdio-mux-meson-gxl.c index 00c66240136b10..3dd12a8c8b03e9 100644 --- a/drivers/net/mdio/mdio-mux-meson-gxl.c +++ b/drivers/net/mdio/mdio-mux-meson-gxl.c @@ -17,6 +17,7 @@ #define REG2_LEDACT GENMASK(23, 22) #define REG2_LEDLINK GENMASK(25, 24) #define REG2_DIV4SEL BIT(27) +#define REG2_REVERSED BIT(28) #define REG2_ADCBYPASS BIT(30) #define REG2_CLKINSEL BIT(31) #define ETH_REG3 0x4 @@ -65,7 +66,7 @@ static void gxl_enable_internal_mdio(struct gxl_mdio_mux *priv) * The only constraint is that it must match the one in * drivers/net/phy/meson-gxl.c to properly match the PHY. */ - writel(FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), + writel(REG2_REVERSED | FIELD_PREP(REG2_PHYID, EPHY_GXL_ID), priv->regs + ETH_REG2); /* Enable the internal phy */ diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 4289ccd3e41bff..176935a8645ff1 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1252,7 +1252,6 @@ static int sysdata_append_release(struct netconsole_target *nt, int offset) */ static int prepare_extradata(struct netconsole_target *nt) { - u32 fields = SYSDATA_CPU_NR | SYSDATA_TASKNAME; int extradata_len; /* userdata was appended when configfs write helper was called @@ -1260,7 +1259,7 @@ static int prepare_extradata(struct netconsole_target *nt) */ extradata_len = nt->userdata_length; - if (!(nt->sysdata_fields & fields)) + if (!nt->sysdata_fields) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c index d88bdb9a17176a..47cdee5577d461 100644 --- a/drivers/net/netdevsim/ipsec.c +++ b/drivers/net/netdevsim/ipsec.c @@ -85,11 +85,11 @@ static int nsim_ipsec_find_empty_idx(struct nsim_ipsec *ipsec) return -ENOSPC; } -static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs, +static int nsim_ipsec_parse_proto_keys(struct net_device *dev, + struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { const char aes_gcm_name[] = "rfc4106(gcm(aes))"; - struct net_device *dev = xs->xso.real_dev; unsigned char *key_data; char *alg_name = NULL; int key_len; @@ -129,17 +129,16 @@ static int nsim_ipsec_parse_proto_keys(struct xfrm_state *xs, return 0; } -static int nsim_ipsec_add_sa(struct xfrm_state *xs, +static int nsim_ipsec_add_sa(struct net_device *dev, + struct xfrm_state *xs, struct netlink_ext_ack *extack) { struct nsim_ipsec *ipsec; - struct net_device *dev; struct netdevsim *ns; struct nsim_sa sa; u16 sa_idx; int ret; - dev = xs->xso.real_dev; ns = netdev_priv(dev); ipsec = &ns->ipsec; @@ -174,7 +173,7 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs, sa.crypt = xs->ealg || xs->aead; /* get the key and salt */ - ret = nsim_ipsec_parse_proto_keys(xs, sa.key, &sa.salt); + ret = nsim_ipsec_parse_proto_keys(dev, xs, sa.key, &sa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for SA table"); return ret; @@ -200,9 +199,9 @@ static int nsim_ipsec_add_sa(struct xfrm_state *xs, return 0; } -static void nsim_ipsec_del_sa(struct xfrm_state *xs) +static void nsim_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs) { - struct netdevsim *ns = netdev_priv(xs->xso.real_dev); + struct netdevsim *ns = netdev_priv(dev); struct nsim_ipsec *ipsec = &ns->ipsec; u16 sa_idx; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 0e0321a7ddd710..31a06e71be25bb 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -369,7 +369,8 @@ static int nsim_poll(struct napi_struct *napi, int budget) int done; done = nsim_rcv(rq, budget); - napi_complete(napi); + if (done < budget) + napi_complete_done(napi, done); return done; } diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 14f36154963841..e32013eb0186ff 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -730,7 +730,7 @@ static int dp83822_phy_reset(struct phy_device *phydev) return phydev->drv->config_init(phydev); } -#ifdef CONFIG_OF_MDIO +#if IS_ENABLED(CONFIG_OF_MDIO) static const u32 tx_amplitude_100base_tx_gain[] = { 80, 82, 83, 85, 87, 88, 90, 92, 93, 95, 97, 98, 100, 102, 103, 105, diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index ede596c1a69d1b..909b4d53fdacdc 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -903,6 +903,9 @@ int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read) retval = bus->read(bus, addr, regnum); else @@ -932,6 +935,9 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write) err = bus->write(bus, addr, regnum, val); else @@ -993,6 +999,9 @@ int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read_c45) retval = bus->read_c45(bus, addr, devad, regnum); else @@ -1024,6 +1033,9 @@ int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write_c45) err = bus->write_c45(bus, addr, devad, regnum, val); else diff --git a/drivers/net/phy/mediatek/Kconfig b/drivers/net/phy/mediatek/Kconfig index 2a8ac5aed0f893..6a4c2b328c4183 100644 --- a/drivers/net/phy/mediatek/Kconfig +++ b/drivers/net/phy/mediatek/Kconfig @@ -15,8 +15,7 @@ config MEDIATEK_GE_PHY config MEDIATEK_GE_SOC_PHY tristate "MediaTek SoC Ethernet PHYs" - depends on (ARM64 && ARCH_MEDIATEK) || COMPILE_TEST - depends on NVMEM_MTK_EFUSE + depends on (ARM64 && ARCH_MEDIATEK && NVMEM_MTK_EFUSE) || COMPILE_TEST select MTK_NET_PHYLIB help Supports MediaTek SoC built-in Gigabit Ethernet PHYs. diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 24882d30f68589..e2c6569d8c45ca 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2027,12 +2027,6 @@ static int ksz9477_config_init(struct phy_device *phydev) return err; } - /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes - * in this switch shall be regarded as broken. - */ - if (phydev->dev_flags & MICREL_NO_EEE) - phy_disable_eee(phydev); - return kszphy_config_init(phydev); } @@ -5705,7 +5699,6 @@ static struct phy_driver ksphy_driver[] = { .handle_interrupt = kszphy_handle_interrupt, .suspend = genphy_suspend, .resume = ksz9477_resume, - .get_features = ksz9477_get_features, } }; module_phy_driver(ksphy_driver); diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 0e17cc458efdc7..93de88c1c8fd58 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -37,47 +37,6 @@ static int lan88xx_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, page); } -static int lan88xx_phy_config_intr(struct phy_device *phydev) -{ - int rc; - - if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - /* unmask all source and clear them before enable */ - rc = phy_write(phydev, LAN88XX_INT_MASK, 0x7FFF); - rc = phy_read(phydev, LAN88XX_INT_STS); - rc = phy_write(phydev, LAN88XX_INT_MASK, - LAN88XX_INT_MASK_MDINTPIN_EN_ | - LAN88XX_INT_MASK_LINK_CHANGE_); - } else { - rc = phy_write(phydev, LAN88XX_INT_MASK, 0); - if (rc) - return rc; - - /* Ack interrupts after they have been disabled */ - rc = phy_read(phydev, LAN88XX_INT_STS); - } - - return rc < 0 ? rc : 0; -} - -static irqreturn_t lan88xx_handle_interrupt(struct phy_device *phydev) -{ - int irq_status; - - irq_status = phy_read(phydev, LAN88XX_INT_STS); - if (irq_status < 0) { - phy_error(phydev); - return IRQ_NONE; - } - - if (!(irq_status & LAN88XX_INT_STS_LINK_CHANGE_)) - return IRQ_NONE; - - phy_trigger_machine(phydev); - - return IRQ_HANDLED; -} - static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; @@ -528,8 +487,9 @@ static struct phy_driver microchip_phy_driver[] = { .config_aneg = lan88xx_config_aneg, .link_change_notify = lan88xx_link_change_notify, - .config_intr = lan88xx_phy_config_intr, - .handle_interrupt = lan88xx_handle_interrupt, + /* Interrupt handling is broken, do not define related + * functions to force polling. + */ .suspend = lan88xx_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index ed8fb14a7f215e..6b800081eed52f 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -946,7 +946,9 @@ static int vsc85xx_ip1_conf(struct phy_device *phydev, enum ts_blk blk, /* UDP checksum offset in IPv4 packet * according to: https://tools.ietf.org/html/rfc768 */ - val |= IP1_NXT_PROT_UDP_CHKSUM_OFF(26) | IP1_NXT_PROT_UDP_CHKSUM_CLEAR; + val |= IP1_NXT_PROT_UDP_CHKSUM_OFF(26); + if (enable) + val |= IP1_NXT_PROT_UDP_CHKSUM_CLEAR; vsc85xx_ts_write_csr(phydev, blk, MSCC_ANA_IP1_NXT_PROT_UDP_CHKSUM, val); @@ -1166,18 +1168,24 @@ static void vsc85xx_txtstamp(struct mii_timestamper *mii_ts, container_of(mii_ts, struct vsc8531_private, mii_ts); if (!vsc8531->ptp->configured) - return; + goto out; - if (vsc8531->ptp->tx_type == HWTSTAMP_TX_OFF) { - kfree_skb(skb); - return; - } + if (vsc8531->ptp->tx_type == HWTSTAMP_TX_OFF) + goto out; + + if (vsc8531->ptp->tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + if (ptp_msg_is_sync(skb, type)) + goto out; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; mutex_lock(&vsc8531->ts_lock); __skb_queue_tail(&vsc8531->ptp->tx_queue, skb); mutex_unlock(&vsc8531->ts_lock); + return; + +out: + kfree_skb(skb); } static bool vsc85xx_rxtstamp(struct mii_timestamper *mii_ts, diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c index 7033216897264e..38417e2886118c 100644 --- a/drivers/net/phy/phy_caps.c +++ b/drivers/net/phy/phy_caps.c @@ -188,6 +188,9 @@ phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only) * When @exact is not set, we return either an exact match, or matching capabilities * at lower speed, or the lowest matching speed, or NULL. * + * Non-exact matches will try to return an exact speed and duplex match, but may + * return matching capabilities with same speed but a different duplex. + * * Returns: a matched link_capabilities according to the above process, NULL * otherwise. */ @@ -195,7 +198,7 @@ const struct link_capabilities * phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, bool exact) { - const struct link_capabilities *lcap, *last = NULL; + const struct link_capabilities *lcap, *match = NULL, *last = NULL; for_each_link_caps_desc_speed(lcap) { if (linkmode_intersects(lcap->linkmodes, supported)) { @@ -204,16 +207,19 @@ phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, if (lcap->speed == speed && lcap->duplex == duplex) { return lcap; } else if (!exact) { - if (lcap->speed <= speed) - return lcap; + if (!match && lcap->speed <= speed) + match = lcap; + + if (lcap->speed < speed) + break; } } } - if (!exact) - return last; + if (!match && !exact) + match = last; - return NULL; + return match; } EXPORT_SYMBOL_GPL(phy_caps_lookup); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 675fbd22537879..7d5e76a3db0e94 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -244,6 +244,46 @@ static bool phy_drv_wol_enabled(struct phy_device *phydev) return wol.wolopts != 0; } +static void phy_link_change(struct phy_device *phydev, bool up) +{ + struct net_device *netdev = phydev->attached_dev; + + if (up) + netif_carrier_on(netdev); + else + netif_carrier_off(netdev); + phydev->adjust_link(netdev); + if (phydev->mii_ts && phydev->mii_ts->link_state) + phydev->mii_ts->link_state(phydev->mii_ts, phydev); +} + +/** + * phy_uses_state_machine - test whether consumer driver uses PAL state machine + * @phydev: the target PHY device structure + * + * Ultimately, this aims to indirectly determine whether the PHY is attached + * to a consumer which uses the state machine by calling phy_start() and + * phy_stop(). + * + * When the PHY driver consumer uses phylib, it must have previously called + * phy_connect_direct() or one of its derivatives, so that phy_prepare_link() + * has set up a hook for monitoring state changes. + * + * When the PHY driver is used by the MAC driver consumer through phylink (the + * only other provider of a phy_link_change() method), using the PHY state + * machine is not optional. + * + * Return: true if consumer calls phy_start() and phy_stop(), false otherwise. + */ +static bool phy_uses_state_machine(struct phy_device *phydev) +{ + if (phydev->phy_link_change == phy_link_change) + return phydev->attached_dev && phydev->adjust_link; + + /* phydev->phy_link_change is implicitly phylink_phy_change() */ + return true; +} + static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; @@ -310,7 +350,7 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) * may call phy routines that try to grab the same lock, and that may * lead to a deadlock. */ - if (phydev->attached_dev && phydev->adjust_link) + if (phy_uses_state_machine(phydev)) phy_stop_machine(phydev); if (!mdio_bus_phy_may_suspend(phydev)) @@ -364,7 +404,7 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) } } - if (phydev->attached_dev && phydev->adjust_link) + if (phy_uses_state_machine(phydev)) phy_start_machine(phydev); return 0; @@ -1055,19 +1095,6 @@ struct phy_device *phy_find_first(struct mii_bus *bus) } EXPORT_SYMBOL(phy_find_first); -static void phy_link_change(struct phy_device *phydev, bool up) -{ - struct net_device *netdev = phydev->attached_dev; - - if (up) - netif_carrier_on(netdev); - else - netif_carrier_off(netdev); - phydev->adjust_link(netdev); - if (phydev->mii_ts && phydev->mii_ts->link_state) - phydev->mii_ts->link_state(phydev->mii_ts, phydev); -} - /** * phy_prepare_link - prepares the PHY layer to monitor link status * @phydev: target phy_device struct @@ -1700,8 +1727,10 @@ void phy_detach(struct phy_device *phydev) struct module *ndev_owner = NULL; struct mii_bus *bus; - if (phydev->devlink) + if (phydev->devlink) { device_link_del(phydev->devlink); + phydev->devlink = NULL; + } if (phydev->sysfs_links) { if (dev) diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c index bd3c9554f6acb5..60893691d4c357 100644 --- a/drivers/net/phy/phy_led_triggers.c +++ b/drivers/net/phy/phy_led_triggers.c @@ -93,9 +93,8 @@ int phy_led_triggers_register(struct phy_device *phy) if (!phy->phy_num_led_triggers) return 0; - phy->led_link_trigger = devm_kzalloc(&phy->mdio.dev, - sizeof(*phy->led_link_trigger), - GFP_KERNEL); + phy->led_link_trigger = kzalloc(sizeof(*phy->led_link_trigger), + GFP_KERNEL); if (!phy->led_link_trigger) { err = -ENOMEM; goto out_clear; @@ -105,10 +104,9 @@ int phy_led_triggers_register(struct phy_device *phy) if (err) goto out_free_link; - phy->phy_led_triggers = devm_kcalloc(&phy->mdio.dev, - phy->phy_num_led_triggers, - sizeof(struct phy_led_trigger), - GFP_KERNEL); + phy->phy_led_triggers = kcalloc(phy->phy_num_led_triggers, + sizeof(struct phy_led_trigger), + GFP_KERNEL); if (!phy->phy_led_triggers) { err = -ENOMEM; goto out_unreg_link; @@ -129,11 +127,11 @@ int phy_led_triggers_register(struct phy_device *phy) out_unreg: while (i--) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); - devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); + kfree(phy->phy_led_triggers); out_unreg_link: phy_led_trigger_unregister(phy->led_link_trigger); out_free_link: - devm_kfree(&phy->mdio.dev, phy->led_link_trigger); + kfree(phy->led_link_trigger); phy->led_link_trigger = NULL; out_clear: phy->phy_num_led_triggers = 0; @@ -147,8 +145,13 @@ void phy_led_triggers_unregister(struct phy_device *phy) for (i = 0; i < phy->phy_num_led_triggers; i++) phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + kfree(phy->phy_led_triggers); + phy->phy_led_triggers = NULL; - if (phy->led_link_trigger) + if (phy->led_link_trigger) { phy_led_trigger_unregister(phy->led_link_trigger); + kfree(phy->led_link_trigger); + phy->led_link_trigger = NULL; + } } EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b68369e2342b27..1bdd5d8bb5b021 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -81,6 +81,7 @@ struct phylink { unsigned int pcs_state; bool link_failed; + bool suspend_link_up; bool major_config_failed; bool mac_supports_eee_ops; bool mac_supports_eee; @@ -2545,14 +2546,16 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) /* Stop the resolver bringing the link up */ __set_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state); - /* Disable the carrier, to prevent transmit timeouts, - * but one would hope all packets have been sent. This - * also means phylink_resolve() will do nothing. - */ - if (pl->netdev) - netif_carrier_off(pl->netdev); - else + pl->suspend_link_up = phylink_link_is_up(pl); + if (pl->suspend_link_up) { + /* Disable the carrier, to prevent transmit timeouts, + * but one would hope all packets have been sent. This + * also means phylink_resolve() will do nothing. + */ + if (pl->netdev) + netif_carrier_off(pl->netdev); pl->old_link_state = false; + } /* We do not call mac_link_down() here as we want the * link to remain up to receive the WoL packets. @@ -2603,15 +2606,18 @@ void phylink_resume(struct phylink *pl) if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) { /* Wake-on-Lan enabled, MAC handling */ - /* Call mac_link_down() so we keep the overall state balanced. - * Do this under the state_mutex lock for consistency. This - * will cause a "Link Down" message to be printed during - * resume, which is harmless - the true link state will be - * printed when we run a resolve. - */ - mutex_lock(&pl->state_mutex); - phylink_link_down(pl); - mutex_unlock(&pl->state_mutex); + if (pl->suspend_link_up) { + /* Call mac_link_down() so we keep the overall state + * balanced. Do this under the state_mutex lock for + * consistency. This will cause a "Link Down" message + * to be printed during resume, which is harmless - + * the true link state will be printed when we run a + * resolve. + */ + mutex_lock(&pl->state_mutex); + phylink_link_down(pl); + mutex_unlock(&pl->state_mutex); + } /* Re-apply the link parameters so that all the settings get * restored to the MAC. diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 644e99fc3623f5..9c4932198931f3 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -506,6 +506,11 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb) unsigned char *data; int islcp; + /* Ensure we can safely access protocol field and LCP code */ + if (!pskb_may_pull(skb, 3)) { + kfree_skb(skb); + return NULL; + } data = skb->data; proto = get_unaligned_be16(data); diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index d8fc0c79745d33..b75ceb90359f89 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1778,8 +1778,8 @@ static void team_change_rx_flags(struct net_device *dev, int change) struct team_port *port; int inc; - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) { + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) { if (change & IFF_PROMISC) { inc = dev->flags & IFF_PROMISC ? 1 : -1; dev_set_promiscuity(port->dev, inc); @@ -1789,7 +1789,7 @@ static void team_change_rx_flags(struct net_device *dev, int change) dev_set_allmulti(port->dev, inc); } } - rcu_read_unlock(); + mutex_unlock(&team->lock); } static void team_set_rx_mode(struct net_device *dev) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index ff5be2cbf17b90..9201ee10a13f78 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -30,11 +30,14 @@ static int aqc111_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value, ret = usbnet_read_cmd_nopm(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely(ret < size)) { netdev_warn(dev->net, "Failed to read(0x%x) reg index 0x%04x: %d\n", cmd, index, ret); + ret = ret < 0 ? ret : -ENODATA; + } + return ret; } @@ -46,11 +49,14 @@ static int aqc111_read_cmd(struct usbnet *dev, u8 cmd, u16 value, ret = usbnet_read_cmd(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); - if (unlikely(ret < 0)) + if (unlikely(ret < size)) { netdev_warn(dev->net, "Failed to read(0x%x) reg index 0x%04x: %d\n", cmd, index, ret); + ret = ret < 0 ? ret : -ENODATA; + } + return ret; } diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bb0bf141587274..7b3739b29c8f72 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -630,16 +630,6 @@ static const struct driver_info zte_rndis_info = { .tx_fixup = rndis_tx_fixup, }; -static const struct driver_info wwan_rndis_info = { - .description = "Mobile Broadband RNDIS device", - .flags = FLAG_WWAN | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, - .bind = rndis_bind, - .unbind = rndis_unbind, - .status = rndis_status, - .rx_fixup = rndis_rx_fixup, - .tx_fixup = rndis_tx_fixup, -}; - /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -676,11 +666,9 @@ static const struct usb_device_id products [] = { USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { - /* Mobile Broadband Modem, seen in Novatel Verizon USB730L and - * Telit FN990A (RNDIS) - */ + /* Novatel Verizon USB730L */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), - .driver_info = (unsigned long)&wwan_rndis_info, + .driver_info = (unsigned long) &rndis_info, }, { }, // END }; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e4617216a4bd5..e53ba600605a5d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3342,7 +3342,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +static void __virtnet_rx_pause(struct virtnet_info *vi, + struct receive_queue *rq) { bool running = netif_running(vi->dev); @@ -3352,15 +3353,64 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) } } -static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +static void virtnet_rx_pause_all(struct virtnet_info *vi) { - bool running = netif_running(vi->dev); + int i; - if (!try_fill_recv(vi, rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + for (i = 0; i < vi->max_queue_pairs; i++) + __virtnet_rx_pause(vi, &vi->rq[i]); +} +static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +{ + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + __virtnet_rx_pause(vi, rq); +} + +static void __virtnet_rx_resume(struct virtnet_info *vi, + struct receive_queue *rq, + bool refill) +{ + bool running = netif_running(vi->dev); + bool schedule_refill = false; + + if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) + schedule_refill = true; if (running) virtnet_napi_enable(rq); + + if (schedule_refill) + schedule_delayed_work(&vi->refill, 0); +} + +static void virtnet_rx_resume_all(struct virtnet_info *vi) +{ + int i; + + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + __virtnet_rx_resume(vi, &vi->rq[i], true); + else + __virtnet_rx_resume(vi, &vi->rq[i], false); + } +} + +static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +{ + enable_delayed_refill(vi); + __virtnet_rx_resume(vi, rq, true); } static int virtnet_rx_resize(struct virtnet_info *vi, @@ -3681,8 +3731,10 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) succ: vi->curr_queue_pairs = queue_pairs; /* virtnet_open() will refill when device is going to up. */ - if (dev->flags & IFF_UP) + spin_lock_bh(&vi->refill_lock); + if (dev->flags & IFF_UP && vi->refill_enabled) schedule_delayed_work(&vi->refill, 0); + spin_unlock_bh(&vi->refill_lock); return 0; } @@ -5626,6 +5678,10 @@ static void virtnet_get_base_stats(struct net_device *dev, if (vi->device_stats_cap & VIRTIO_NET_STATS_TYPE_TX_SPEED) tx->hw_drop_ratelimits = 0; + + netdev_stat_queue_sum(dev, + dev->real_num_rx_queues, vi->max_queue_pairs, rx, + dev->real_num_tx_queues, vi->max_queue_pairs, tx); } static const struct netdev_stat_ops virtnet_stat_ops = { @@ -5838,8 +5894,10 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len, DMA_TO_DEVICE, 0); - if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) - return -ENOMEM; + if (virtqueue_dma_mapping_error(sq->vq, hdr_dma)) { + err = -ENOMEM; + goto err_free_buffs; + } err = xsk_pool_dma_map(pool, dma_dev, 0); if (err) @@ -5867,6 +5925,8 @@ static int virtnet_xsk_pool_enable(struct net_device *dev, err_xsk_map: virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len, DMA_TO_DEVICE, 0); +err_free_buffs: + kvfree(rq->xsk_buffs); return err; } @@ -5959,12 +6019,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (prog) bpf_prog_add(prog, vi->max_queue_pairs - 1); + virtnet_rx_pause_all(vi); + /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_disable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_disable(&vi->sq[i]); - } } if (!prog) { @@ -5996,13 +6056,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_enabled = false; } + virtnet_rx_resume_all(vi); for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) { - virtnet_napi_enable(&vi->rq[i]); + if (netif_running(dev)) virtnet_napi_tx_enable(&vi->sq[i]); - } } return 0; @@ -6014,11 +6073,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); } + virtnet_rx_resume_all(vi); if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_enable(&vi->sq[i]); - } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3df6aabc7e339e..287b7c20c0d6c6 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1568,6 +1568,30 @@ vmxnet3_get_hdr_len(struct vmxnet3_adapter *adapter, struct sk_buff *skb, return (hlen + (hdr.tcp->doff << 2)); } +static void +vmxnet3_lro_tunnel(struct sk_buff *skb, __be16 ip_proto) +{ + struct udphdr *uh = NULL; + + if (ip_proto == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } else { + struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + + if (iph->nexthdr == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } + if (uh) { + if (uh->check) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + else + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } +} + static int vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter, int quota) @@ -1881,6 +1905,8 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (segCnt != 0 && mss != 0) { skb_shinfo(skb)->gso_type = rcd->v4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6; + if (encap_lro) + vmxnet3_lro_tunnel(skb, skb->protocol); skb_shinfo(skb)->gso_size = mss; skb_shinfo(skb)->gso_segs = segCnt; } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { @@ -3607,8 +3633,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) struct vmxnet3_adapter *adapter = netdev_priv(netdev); int err = 0; - WRITE_ONCE(netdev->mtu, new_mtu); - /* * Reset_work may be in the middle of resetting the device, wait for its * completion. @@ -3622,6 +3646,7 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) /* we need to re-create the rx queue based on the new mtu */ vmxnet3_rq_destroy_all(adapter); + WRITE_ONCE(netdev->mtu, new_mtu); vmxnet3_adjust_rx_ring_size(adapter); err = vmxnet3_rq_create_all(adapter); if (err) { @@ -3638,6 +3663,8 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) "Closing it\n", err); goto out; } + } else { + WRITE_ONCE(netdev->mtu, new_mtu); } out: diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 616ecc38d1726c..5f470499e60024 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -397,7 +397,7 @@ vmxnet3_process_xdp(struct vmxnet3_adapter *adapter, xdp_init_buff(&xdp, PAGE_SIZE, &rq->xdp_rxq); xdp_prepare_buff(&xdp, page_address(page), rq->page_pool->p.offset, - rbi->len, false); + rcd->len, false); xdp_buff_clear_frags_flag(&xdp); xdp_prog = rcu_dereference(rq->adapter->xdp_bpf_prog); diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c index 6e6e9f05509ab0..06d19e90eadb59 100644 --- a/drivers/net/vxlan/vxlan_vnifilter.c +++ b/drivers/net/vxlan/vxlan_vnifilter.c @@ -627,7 +627,11 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, * default dst remote_ip previously added for this vni */ if (!vxlan_addr_any(&vninode->remote_ip) || - !vxlan_addr_any(&dst->remote_ip)) + !vxlan_addr_any(&dst->remote_ip)) { + u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, + vninode->vni); + + spin_lock_bh(&vxlan->hash_lock[hash_index]); __vxlan_fdb_delete(vxlan, all_zeros_mac, (vxlan_addr_any(&vninode->remote_ip) ? dst->remote_ip : vninode->remote_ip), @@ -635,6 +639,8 @@ static void vxlan_vni_delete_group(struct vxlan_dev *vxlan, vninode->vni, vninode->vni, dst->remote_ifindex, true); + spin_unlock_bh(&vxlan->hash_lock[hash_index]); + } if (vxlan->dev->flags & IFF_UP) { if (vxlan_addr_multicast(&vninode->remote_ip) && diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 3ffeeba5dccf40..4a529f1f9beab6 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -366,6 +366,7 @@ static int wg_newlink(struct net_device *dev, if (ret < 0) goto err_free_handshake_queue; + dev_set_threaded(dev, true); ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 866bad2db33487..65673b1aba55d2 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -937,7 +937,9 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) dev_set_threaded(ar->napi_dev, true); ath10k_core_napi_enable(ar); - ath10k_snoc_irq_enable(ar); + /* IRQs are left enabled when we restart due to a firmware crash */ + if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags)) + ath10k_snoc_irq_enable(ar); ath10k_snoc_rx_post(ar); clear_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags); diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 3d39ff85ba94ad..22eb1b0377ffed 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -951,6 +951,7 @@ void ath11k_fw_stats_init(struct ath11k *ar) INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath11k_fw_stats_free(struct ath11k_fw_stats *stats) @@ -1946,6 +1947,20 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) { int ret; + switch (ath11k_crypto_mode) { + case ATH11K_CRYPT_MODE_SW: + set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + case ATH11K_CRYPT_MODE_HW: + clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + default: + ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); + return -EINVAL; + } + ret = ath11k_core_start_firmware(ab, ab->fw_mode); if (ret) { ath11k_err(ab, "failed to start firmware: %d\n", ret); @@ -1964,20 +1979,6 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) goto err_firmware_stop; } - switch (ath11k_crypto_mode) { - case ATH11K_CRYPT_MODE_SW: - set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - case ATH11K_CRYPT_MODE_HW: - clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - default: - ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); - return -EINVAL; - } - if (ath11k_frame_mode == ATH11K_HW_TXRX_RAW) set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); @@ -2050,6 +2051,7 @@ static int ath11k_core_reconfigure_on_crash(struct ath11k_base *ab) void ath11k_core_halt(struct ath11k *ar) { struct ath11k_base *ab = ar->ab; + struct list_head *pos, *n; lockdep_assert_held(&ar->conf_mutex); @@ -2065,7 +2067,12 @@ void ath11k_core_halt(struct ath11k *ar) rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], NULL); synchronize_rcu(); - INIT_LIST_HEAD(&ar->arvifs); + + spin_lock_bh(&ar->data_lock); + list_for_each_safe(pos, n, &ar->arvifs) + list_del_init(pos); + spin_unlock_bh(&ar->data_lock); + idr_init(&ar->txmgmt_idr); } diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 1a3d0de4afde83..529aca4f40621e 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -599,6 +599,8 @@ struct ath11k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath11k_dbg_htt_stats { @@ -783,7 +785,7 @@ struct ath11k { u8 alpha2[REG_ALPHA2_LEN + 1]; struct ath11k_fw_stats fw_stats; struct completion fw_stats_complete; - bool fw_stats_done; + struct completion fw_stats_done; /* protected by conf_mutex */ bool ps_state_enable; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index bf192529e3fe26..5d46f8e4c231fb 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -93,57 +93,14 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, spin_unlock_bh(&dbr_data->lock); } -static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) -{ - spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); - spin_unlock_bh(&ar->data_lock); -} - void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats) { struct ath11k_base *ab = ar->ab; - struct ath11k_pdev *pdev; - bool is_end; - static unsigned int num_vdev, num_bcn; - size_t total_vdevs_started = 0; - int i; - - /* WMI_REQUEST_PDEV_STAT request has been already processed */ - - if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - ar->fw_stats_done = true; - return; - } - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath11k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += ar->num_started_vdevs; - } - - is_end = ((++num_vdev) == total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) { - ar->fw_stats_done = true; - num_vdev = 0; - } - return; - } + bool is_end = true; + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_RSSI_PER_CHAIN_STAT and + * WMI_REQUEST_VDEV_STAT requests have been already processed. + */ if (stats->stats_id == WMI_REQUEST_BCN_STAT) { if (list_empty(&stats->bcn)) { ath11k_warn(ab, "empty bcn stats"); @@ -152,97 +109,18 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++num_bcn) == ar->num_started_vdevs); + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); - if (is_end) { - ar->fw_stats_done = true; - num_bcn = 0; - } + if (is_end) + complete(&ar->fw_stats_done); } } -static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long timeout, time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + secs_to_jiffies(3); - - ath11k_debugfs_fw_stats_reset(ar); - - reinit_completion(&ar->fw_stats_complete); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - - if (!time_left) - return -ETIMEDOUT; - - for (;;) { - if (time_after(jiffies, timeout)) - break; - - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); - } - return 0; -} - -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id) -{ - struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param; - int ret; - - mutex_lock(&ar->conf_mutex); - - if (ar->state != ATH11K_STATE_ON) { - ret = -ENETDOWN; - goto err_unlock; - } - - req_param.pdev_id = pdev_id; - req_param.vdev_id = vdev_id; - req_param.stats_id = stats_id; - - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); - if (ret) - ath11k_warn(ab, "failed to request fw stats: %d\n", ret); - - ath11k_dbg(ab, ATH11K_DBG_WMI, - "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", - pdev_id, vdev_id, stats_id); - -err_unlock: - mutex_unlock(&ar->conf_mutex); - - return ret; -} - static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) { struct ath11k *ar = inode->i_private; @@ -268,7 +146,7 @@ static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_PDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_free; @@ -339,7 +217,7 @@ static int ath11k_open_vdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_VDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw vdev stats: %d\n", ret); goto err_free; @@ -415,7 +293,7 @@ static int ath11k_open_bcn_stats(struct inode *inode, struct file *file) continue; req_param.vdev_id = arvif->vdev_id; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw bcn stats: %d\n", ret); goto err_free; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h index a39e458637b013..ed7fec177588f6 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.h +++ b/drivers/net/wireless/ath/ath11k/debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ATH11K_DEBUGFS_H_ @@ -273,8 +273,6 @@ void ath11k_debugfs_unregister(struct ath11k *ar); void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats); void ath11k_debugfs_fw_stats_init(struct ath11k *ar); -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id); static inline bool ath11k_debugfs_is_pktlog_lite_mode_enabled(struct ath11k *ar) { @@ -381,12 +379,6 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar) return 0; } -static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar, - u32 pdev_id, u32 vdev_id, u32 stats_id) -{ - return 0; -} - static inline void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, enum wmi_direct_buffer_module id, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 97816916abac96..4763b271309aa2 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -8991,6 +8991,86 @@ static void ath11k_mac_put_chain_rssi(struct station_info *sinfo, } } +static void ath11k_mac_fw_stats_reset(struct ath11k *ar) +{ + spin_lock_bh(&ar->data_lock); + ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; + spin_unlock_bh(&ar->data_lock); +} + +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param) +{ + struct ath11k_base *ab = ar->ab; + unsigned long time_left; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + ath11k_mac_fw_stats_reset(ar); + + reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); + + ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); + + if (ret) { + ath11k_warn(ab, "could not request fw stats (%d)\n", + ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); + if (!time_left) + return -ETIMEDOUT; + + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + +static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, + u32 vdev_id, u32 stats_id) +{ + struct ath11k_base *ab = ar->ab; + struct stats_request_params req_param; + int ret; + + mutex_lock(&ar->conf_mutex); + + if (ar->state != ATH11K_STATE_ON) { + ret = -ENETDOWN; + goto err_unlock; + } + + req_param.pdev_id = pdev_id; + req_param.vdev_id = vdev_id; + req_param.stats_id = stats_id; + + ret = ath11k_mac_fw_stats_request(ar, &req_param); + if (ret) + ath11k_warn(ab, "failed to request fw stats: %d\n", ret); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", + pdev_id, vdev_id, stats_id); + +err_unlock: + mutex_unlock(&ar->conf_mutex); + + return ret; +} + static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -9028,8 +9108,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { + !ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { ath11k_mac_put_chain_rssi(sinfo, arsta, "fw stats", true); } @@ -9037,8 +9117,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!signal && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !(ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_VDEV_STAT))) + !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_VDEV_STAT))) signal = arsta->rssi_beacon; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, @@ -9384,11 +9464,13 @@ static int ath11k_fw_stats_request(struct ath11k *ar, lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); if (ret) { diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h index f5800fbecff89e..5e61eea1bb0378 100644 --- a/drivers/net/wireless/ath/ath11k/mac.h +++ b/drivers/net/wireless/ath/ath11k/mac.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH11K_MAC_H @@ -179,4 +179,6 @@ int ath11k_mac_vif_set_keepalive(struct ath11k_vif *arvif, void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar, struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *ctx); +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index d7f852bebf4aa2..98811726d33bf1 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8158,6 +8158,11 @@ static void ath11k_peer_assoc_conf_event(struct ath11k_base *ab, struct sk_buff static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *skb) { struct ath11k_fw_stats stats = {}; + size_t total_vdevs_started = 0; + struct ath11k_pdev *pdev; + bool is_end = true; + int i; + struct ath11k *ar; int ret; @@ -8184,18 +8189,50 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_VDEV_STAT and + * WMI_REQUEST_RSSI_PER_CHAIN_STAT can be requested via mac ops or via * debugfs fw stats. Therefore, processing it separately. */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); + goto complete; + } + + if (stats.stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { + complete(&ar->fw_stats_done); + goto complete; + } + + if (stats.stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats.vdevs)) { + ath11k_warn(ab, "empty vdev stats"); + goto complete; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += ar->num_started_vdevs; + } + + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); + + list_splice_tail_init(&stats.vdevs, + &ar->fw_stats.vdevs); + + if (is_end) + complete(&ar->fw_stats_done); + goto complete; } - /* WMI_REQUEST_VDEV_STAT, WMI_REQUEST_BCN_STAT and WMI_REQUEST_RSSI_PER_CHAIN_STAT - * are currently requested only via debugfs fw stats. Hence, processing these - * in debugfs context + /* WMI_REQUEST_BCN_STAT is currently requested only via debugfs fw stats. + * Hence, processing it in debugfs context */ ath11k_debugfs_fw_stats_process(ar, &stats); diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 0b2dec081c6ee8..261f52b327e89c 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -891,6 +891,9 @@ static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag) ab = ag->ab[i]; if (!ab) continue; + + clear_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + ath12k_core_device_cleanup(ab); } @@ -1026,6 +1029,8 @@ static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) mutex_lock(&ab->core_lock); + set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + ret = ath12k_core_pdev_create(ab); if (ret) { ath12k_err(ab, "failed to create pdev core %d\n", ret); @@ -1246,6 +1251,7 @@ static void ath12k_rfkill_work(struct work_struct *work) void ath12k_core_halt(struct ath12k *ar) { + struct list_head *pos, *n; struct ath12k_base *ab = ar->ab; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -1261,7 +1267,12 @@ void ath12k_core_halt(struct ath12k *ar) rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], NULL); synchronize_rcu(); - INIT_LIST_HEAD(&ar->arvifs); + + spin_lock_bh(&ar->data_lock); + list_for_each_safe(pos, n, &ar->arvifs) + list_del_init(pos); + spin_unlock_bh(&ar->data_lock); + idr_init(&ar->txmgmt_idr); } @@ -1774,7 +1785,7 @@ static void ath12k_core_hw_group_destroy(struct ath12k_hw_group *ag) } } -static void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag) +void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag) { struct ath12k_base *ab; int i; @@ -1891,7 +1902,8 @@ int ath12k_core_init(struct ath12k_base *ab) if (!ag) { mutex_unlock(&ath12k_hw_group_mutex); ath12k_warn(ab, "unable to get hw group\n"); - return -ENODEV; + ret = -ENODEV; + goto err_unregister_notifier; } mutex_unlock(&ath12k_hw_group_mutex); @@ -1906,7 +1918,7 @@ int ath12k_core_init(struct ath12k_base *ab) if (ret) { mutex_unlock(&ag->mutex); ath12k_warn(ab, "unable to create hw group\n"); - goto err; + goto err_destroy_hw_group; } } @@ -1914,18 +1926,20 @@ int ath12k_core_init(struct ath12k_base *ab) return 0; -err: +err_destroy_hw_group: ath12k_core_hw_group_destroy(ab->ag); ath12k_core_hw_group_unassign(ab); +err_unregister_notifier: + ath12k_core_panic_notifier_unregister(ab); + return ret; } void ath12k_core_deinit(struct ath12k_base *ab) { - ath12k_core_panic_notifier_unregister(ab); - ath12k_core_hw_group_cleanup(ab->ag); ath12k_core_hw_group_destroy(ab->ag); ath12k_core_hw_group_unassign(ab); + ath12k_core_panic_notifier_unregister(ab); } void ath12k_core_free(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 3fac4f00d3832c..f5f1ec796f7c55 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -533,11 +533,21 @@ struct ath12k_sta { enum ieee80211_sta_state state; }; -#define ATH12K_MIN_5G_FREQ 4150 -#define ATH12K_MIN_6G_FREQ 5925 -#define ATH12K_MAX_6G_FREQ 7115 +#define ATH12K_HALF_20MHZ_BW 10 +#define ATH12K_2GHZ_MIN_CENTER 2412 +#define ATH12K_2GHZ_MAX_CENTER 2484 +#define ATH12K_5GHZ_MIN_CENTER 4900 +#define ATH12K_5GHZ_MAX_CENTER 5920 +#define ATH12K_6GHZ_MIN_CENTER 5935 +#define ATH12K_6GHZ_MAX_CENTER 7115 +#define ATH12K_MIN_2GHZ_FREQ (ATH12K_2GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW - 1) +#define ATH12K_MAX_2GHZ_FREQ (ATH12K_2GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW + 1) +#define ATH12K_MIN_5GHZ_FREQ (ATH12K_5GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW) +#define ATH12K_MAX_5GHZ_FREQ (ATH12K_5GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW) +#define ATH12K_MIN_6GHZ_FREQ (ATH12K_6GHZ_MIN_CENTER - ATH12K_HALF_20MHZ_BW) +#define ATH12K_MAX_6GHZ_FREQ (ATH12K_6GHZ_MAX_CENTER + ATH12K_HALF_20MHZ_BW) #define ATH12K_NUM_CHANS 101 -#define ATH12K_MAX_5G_CHAN 173 +#define ATH12K_MAX_5GHZ_CHAN 173 enum ath12k_hw_state { ATH12K_HW_STATE_OFF, @@ -1185,6 +1195,7 @@ struct ath12k_fw_stats_pdev { }; int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab); +void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag); int ath12k_core_pre_init(struct ath12k_base *ab); int ath12k_core_init(struct ath12k_base *ath12k); void ath12k_core_deinit(struct ath12k_base *ath12k); diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index 57002215ddf168..5efe30cf77470a 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -88,8 +88,8 @@ static int ath12k_get_tpc_ctl_mode_idx(struct wmi_tpc_stats_arg *tpc_stats, u32 chan_freq = le32_to_cpu(tpc_stats->tpc_config.chan_freq); u8 band; - band = ((chan_freq > ATH12K_MIN_6G_FREQ) ? NL80211_BAND_6GHZ : - ((chan_freq > ATH12K_MIN_5G_FREQ) ? NL80211_BAND_5GHZ : + band = ((chan_freq > ATH12K_MIN_6GHZ_FREQ) ? NL80211_BAND_6GHZ : + ((chan_freq > ATH12K_MIN_5GHZ_FREQ) ? NL80211_BAND_5GHZ : NL80211_BAND_2GHZ)); if (band == NL80211_BAND_5GHZ || band == NL80211_BAND_6GHZ) { diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 1c0d5fa39a8dcb..aeaf970339d4dc 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -5377,6 +5377,9 @@ static ssize_t ath12k_write_htt_stats_type(struct file *file, const int size = 32; int num_args; + if (count > size) + return -EINVAL; + char *buf __free(kfree) = kzalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index 75435a931548c9..427a87b63dec3b 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -106,6 +106,8 @@ struct dp_mon_mpdu { struct list_head list; struct sk_buff *head; struct sk_buff *tail; + u32 err_bitmap; + u8 decap_format; }; #define DP_MON_MAX_STATUS_BUF 32 diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index d22800e894850d..600d97169f241a 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1647,7 +1647,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k *ar, u32_get_bits(info[0], HAL_RX_MPDU_START_INFO0_PPDU_ID); } - break; + return HAL_RX_MON_STATUS_MPDU_START; } case HAL_RX_MSDU_START: /* TODO: add msdu start parsing logic */ @@ -1700,33 +1700,159 @@ static void ath12k_dp_mon_rx_msdus_set_payload(struct ath12k *ar, skb_pull(head_msdu, rx_pkt_offset + l2_hdr_offset); } +static void +ath12k_dp_mon_fill_rx_stats_info(struct ath12k *ar, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rx_status) +{ + u32 center_freq = ppdu_info->freq; + + rx_status->freq = center_freq; + rx_status->bw = ath12k_mac_bw_to_mac80211_bw(ppdu_info->bw); + rx_status->nss = ppdu_info->nss; + rx_status->rate_idx = 0; + rx_status->encoding = RX_ENC_LEGACY; + rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; + + if (center_freq >= ATH12K_MIN_6GHZ_FREQ && + center_freq <= ATH12K_MAX_6GHZ_FREQ) { + rx_status->band = NL80211_BAND_6GHZ; + } else if (center_freq >= ATH12K_MIN_2GHZ_FREQ && + center_freq <= ATH12K_MAX_2GHZ_FREQ) { + rx_status->band = NL80211_BAND_2GHZ; + } else if (center_freq >= ATH12K_MIN_5GHZ_FREQ && + center_freq <= ATH12K_MAX_5GHZ_FREQ) { + rx_status->band = NL80211_BAND_5GHZ; + } else { + rx_status->band = NUM_NL80211_BANDS; + } +} + +static void +ath12k_dp_mon_fill_rx_rate(struct ath12k *ar, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_supported_band *sband; + enum rx_msdu_start_pkt_type pkt_type; + u8 rate_mcs, nss, sgi; + bool is_cck; + + pkt_type = ppdu_info->preamble_type; + rate_mcs = ppdu_info->rate; + nss = ppdu_info->nss; + sgi = ppdu_info->gi; + + switch (pkt_type) { + case RX_MSDU_START_PKT_TYPE_11A: + case RX_MSDU_START_PKT_TYPE_11B: + is_cck = (pkt_type == RX_MSDU_START_PKT_TYPE_11B); + if (rx_status->band < NUM_NL80211_BANDS) { + sband = &ar->mac.sbands[rx_status->band]; + rx_status->rate_idx = ath12k_mac_hw_rate_to_idx(sband, rate_mcs, + is_cck); + } + break; + case RX_MSDU_START_PKT_TYPE_11N: + rx_status->encoding = RX_ENC_HT; + if (rate_mcs > ATH12K_HT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in HT mode %d\n", + rate_mcs); + break; + } + rx_status->rate_idx = rate_mcs + (8 * (nss - 1)); + if (sgi) + rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; + break; + case RX_MSDU_START_PKT_TYPE_11AC: + rx_status->encoding = RX_ENC_VHT; + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_VHT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in VHT mode %d\n", + rate_mcs); + break; + } + if (sgi) + rx_status->enc_flags |= RX_ENC_FLAG_SHORT_GI; + break; + case RX_MSDU_START_PKT_TYPE_11AX: + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_HE_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in HE mode %d\n", + rate_mcs); + break; + } + rx_status->encoding = RX_ENC_HE; + rx_status->he_gi = ath12k_he_gi_to_nl80211_he_gi(sgi); + break; + case RX_MSDU_START_PKT_TYPE_11BE: + rx_status->rate_idx = rate_mcs; + if (rate_mcs > ATH12K_EHT_MCS_MAX) { + ath12k_warn(ar->ab, + "Received with invalid mcs in EHT mode %d\n", + rate_mcs); + break; + } + rx_status->encoding = RX_ENC_EHT; + rx_status->he_gi = ath12k_he_gi_to_nl80211_he_gi(sgi); + break; + default: + ath12k_dbg(ar->ab, ATH12K_DBG_DATA, + "monitor receives invalid preamble type %d", + pkt_type); + break; + } +} + static struct sk_buff * ath12k_dp_mon_rx_merg_msdus(struct ath12k *ar, - struct sk_buff *head_msdu, struct sk_buff *tail_msdu, - struct ieee80211_rx_status *rxs, bool *fcs_err) + struct dp_mon_mpdu *mon_mpdu, + struct hal_rx_mon_ppdu_info *ppdu_info, + struct ieee80211_rx_status *rxs) { struct ath12k_base *ab = ar->ab; struct sk_buff *msdu, *mpdu_buf, *prev_buf, *head_frag_list; - struct hal_rx_desc *rx_desc, *tail_rx_desc; - u8 *hdr_desc, *dest, decap_format; + struct sk_buff *head_msdu, *tail_msdu; + struct hal_rx_desc *rx_desc; + u8 *hdr_desc, *dest, decap_format = mon_mpdu->decap_format; struct ieee80211_hdr_3addr *wh; - u32 err_bitmap, frag_list_sum_len = 0; + struct ieee80211_channel *channel; + u32 frag_list_sum_len = 0; + u8 channel_num = ppdu_info->chan_num; mpdu_buf = NULL; + head_msdu = mon_mpdu->head; + tail_msdu = mon_mpdu->tail; if (!head_msdu) goto err_merge_fail; - rx_desc = (struct hal_rx_desc *)head_msdu->data; - tail_rx_desc = (struct hal_rx_desc *)tail_msdu->data; + ath12k_dp_mon_fill_rx_stats_info(ar, ppdu_info, rxs); - err_bitmap = ath12k_dp_rx_h_mpdu_err(ab, tail_rx_desc); - if (err_bitmap & HAL_RX_MPDU_ERR_FCS) - *fcs_err = true; + if (unlikely(rxs->band == NUM_NL80211_BANDS || + !ath12k_ar_to_hw(ar)->wiphy->bands[rxs->band])) { + ath12k_dbg(ar->ab, ATH12K_DBG_DATA, + "sband is NULL for status band %d channel_num %d center_freq %d pdev_id %d\n", + rxs->band, channel_num, ppdu_info->freq, ar->pdev_idx); - decap_format = ath12k_dp_rx_h_decap_type(ab, tail_rx_desc); + spin_lock_bh(&ar->data_lock); + channel = ar->rx_channel; + if (channel) { + rxs->band = channel->band; + channel_num = + ieee80211_frequency_to_channel(channel->center_freq); + } + spin_unlock_bh(&ar->data_lock); + } + + if (rxs->band < NUM_NL80211_BANDS) + rxs->freq = ieee80211_channel_to_frequency(channel_num, + rxs->band); - ath12k_dp_rx_h_ppdu(ar, tail_rx_desc, rxs); + ath12k_dp_mon_fill_rx_rate(ar, ppdu_info, rxs); if (decap_format == DP_RX_DECAP_TYPE_RAW) { ath12k_dp_mon_rx_msdus_set_payload(ar, head_msdu, tail_msdu); @@ -1954,7 +2080,8 @@ static void ath12k_dp_mon_update_radiotap(struct ath12k *ar, static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *napi, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ieee80211_rx_status *status, + u8 decap) { static const struct ieee80211_radiotap_he known = { .data1 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN | @@ -1966,7 +2093,7 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct struct ieee80211_sta *pubsta = NULL; struct ath12k_peer *peer; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - u8 decap = DP_RX_DECAP_TYPE_RAW; + struct ath12k_dp_rx_info rx_info; bool is_mcbc = rxcb->is_mcbc; bool is_eapol_tkip = rxcb->is_eapol; @@ -1977,10 +2104,9 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct status->flag |= RX_FLAG_RADIOTAP_HE; } - if (!(status->flag & RX_FLAG_ONLY_MONITOR)) - decap = ath12k_dp_rx_h_decap_type(ar->ab, rxcb->rx_desc); spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); + rx_info.addr2_present = false; + peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu, &rx_info); if (peer && peer->sta) { pubsta = peer->sta; if (pubsta->valid_links) { @@ -2035,25 +2161,23 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct } static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, - struct sk_buff *head_msdu, struct sk_buff *tail_msdu, + struct dp_mon_mpdu *mon_mpdu, struct hal_rx_mon_ppdu_info *ppduinfo, struct napi_struct *napi) { struct ath12k_pdev_dp *dp = &ar->dp; struct sk_buff *mon_skb, *skb_next, *header; struct ieee80211_rx_status *rxs = &dp->rx_status; - bool fcs_err = false; + u8 decap = DP_RX_DECAP_TYPE_RAW; - mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, - head_msdu, tail_msdu, - rxs, &fcs_err); + mon_skb = ath12k_dp_mon_rx_merg_msdus(ar, mon_mpdu, ppduinfo, rxs); if (!mon_skb) goto mon_deliver_fail; header = mon_skb; rxs->flag = 0; - if (fcs_err) + if (mon_mpdu->err_bitmap & HAL_RX_MPDU_ERR_FCS) rxs->flag = RX_FLAG_FAILED_FCS_CRC; do { @@ -2070,8 +2194,12 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, rxs->flag |= RX_FLAG_ALLOW_SAME_PN; } rxs->flag |= RX_FLAG_ONLY_MONITOR; + + if (!(rxs->flag & RX_FLAG_ONLY_MONITOR)) + decap = mon_mpdu->decap_format; + ath12k_dp_mon_update_radiotap(ar, ppduinfo, mon_skb, rxs); - ath12k_dp_mon_rx_deliver_msdu(ar, napi, mon_skb, rxs); + ath12k_dp_mon_rx_deliver_msdu(ar, napi, mon_skb, rxs, decap); mon_skb = skb_next; } while (mon_skb); rxs->flag = 0; @@ -2079,7 +2207,7 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, return 0; mon_deliver_fail: - mon_skb = head_msdu; + mon_skb = mon_mpdu->head; while (mon_skb) { skb_next = mon_skb->next; dev_kfree_skb_any(mon_skb); @@ -2088,6 +2216,144 @@ static int ath12k_dp_mon_rx_deliver(struct ath12k *ar, return -EINVAL; } +static int ath12k_dp_pkt_set_pktlen(struct sk_buff *skb, u32 len) +{ + if (skb->len > len) { + skb_trim(skb, len); + } else { + if (skb_tailroom(skb) < len - skb->len) { + if ((pskb_expand_head(skb, 0, + len - skb->len - skb_tailroom(skb), + GFP_ATOMIC))) { + return -ENOMEM; + } + } + skb_put(skb, (len - skb->len)); + } + + return 0; +} + +static void ath12k_dp_mon_parse_rx_msdu_end_err(u32 info, u32 *errmap) +{ + if (info & RX_MSDU_END_INFO13_FCS_ERR) + *errmap |= HAL_RX_MPDU_ERR_FCS; + + if (info & RX_MSDU_END_INFO13_DECRYPT_ERR) + *errmap |= HAL_RX_MPDU_ERR_DECRYPT; + + if (info & RX_MSDU_END_INFO13_TKIP_MIC_ERR) + *errmap |= HAL_RX_MPDU_ERR_TKIP_MIC; + + if (info & RX_MSDU_END_INFO13_A_MSDU_ERROR) + *errmap |= HAL_RX_MPDU_ERR_AMSDU_ERR; + + if (info & RX_MSDU_END_INFO13_OVERFLOW_ERR) + *errmap |= HAL_RX_MPDU_ERR_OVERFLOW; + + if (info & RX_MSDU_END_INFO13_MSDU_LEN_ERR) + *errmap |= HAL_RX_MPDU_ERR_MSDU_LEN; + + if (info & RX_MSDU_END_INFO13_MPDU_LEN_ERR) + *errmap |= HAL_RX_MPDU_ERR_MPDU_LEN; +} + +static int +ath12k_dp_mon_parse_status_msdu_end(struct ath12k_mon_data *pmon, + const struct hal_rx_msdu_end *msdu_end) +{ + struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; + + ath12k_dp_mon_parse_rx_msdu_end_err(__le32_to_cpu(msdu_end->info2), + &mon_mpdu->err_bitmap); + + mon_mpdu->decap_format = le32_get_bits(msdu_end->info1, + RX_MSDU_END_INFO11_DECAP_FORMAT); + + return 0; +} + +static int +ath12k_dp_mon_parse_status_buf(struct ath12k *ar, + struct ath12k_mon_data *pmon, + const struct dp_mon_packet_info *packet_info) +{ + struct ath12k_base *ab = ar->ab; + struct dp_rxdma_mon_ring *buf_ring = &ab->dp.rxdma_mon_buf_ring; + struct sk_buff *msdu; + int buf_id; + u32 offset; + + buf_id = u32_get_bits(packet_info->cookie, DP_RXDMA_BUF_COOKIE_BUF_ID); + + spin_lock_bh(&buf_ring->idr_lock); + msdu = idr_remove(&buf_ring->bufs_idr, buf_id); + spin_unlock_bh(&buf_ring->idr_lock); + + if (unlikely(!msdu)) { + ath12k_warn(ab, "mon dest desc with inval buf_id %d\n", buf_id); + return 0; + } + + dma_unmap_single(ab->dev, ATH12K_SKB_RXCB(msdu)->paddr, + msdu->len + skb_tailroom(msdu), + DMA_FROM_DEVICE); + + offset = packet_info->dma_length + ATH12K_MON_RX_DOT11_OFFSET; + if (ath12k_dp_pkt_set_pktlen(msdu, offset)) { + dev_kfree_skb_any(msdu); + goto dest_replenish; + } + + if (!pmon->mon_mpdu->head) + pmon->mon_mpdu->head = msdu; + else + pmon->mon_mpdu->tail->next = msdu; + + pmon->mon_mpdu->tail = msdu; + +dest_replenish: + ath12k_dp_mon_buf_replenish(ab, buf_ring, 1); + + return 0; +} + +static int +ath12k_dp_mon_parse_rx_dest_tlv(struct ath12k *ar, + struct ath12k_mon_data *pmon, + enum hal_rx_mon_status hal_status, + const void *tlv_data) +{ + switch (hal_status) { + case HAL_RX_MON_STATUS_MPDU_START: + if (WARN_ON_ONCE(pmon->mon_mpdu)) + break; + + pmon->mon_mpdu = kzalloc(sizeof(*pmon->mon_mpdu), GFP_ATOMIC); + if (!pmon->mon_mpdu) + return -ENOMEM; + break; + case HAL_RX_MON_STATUS_BUF_ADDR: + return ath12k_dp_mon_parse_status_buf(ar, pmon, tlv_data); + case HAL_RX_MON_STATUS_MPDU_END: + /* If no MSDU then free empty MPDU */ + if (pmon->mon_mpdu->tail) { + pmon->mon_mpdu->tail->next = NULL; + list_add_tail(&pmon->mon_mpdu->list, &pmon->dp_rx_mon_mpdu_list); + } else { + kfree(pmon->mon_mpdu); + } + pmon->mon_mpdu = NULL; + break; + case HAL_RX_MON_STATUS_MSDU_END: + return ath12k_dp_mon_parse_status_msdu_end(pmon, tlv_data); + default: + break; + } + + return 0; +} + static enum hal_rx_mon_status ath12k_dp_mon_parse_rx_dest(struct ath12k *ar, struct ath12k_mon_data *pmon, struct sk_buff *skb) @@ -2114,14 +2380,20 @@ ath12k_dp_mon_parse_rx_dest(struct ath12k *ar, struct ath12k_mon_data *pmon, tlv_len = le64_get_bits(tlv->tl, HAL_TLV_64_HDR_LEN); hal_status = ath12k_dp_mon_rx_parse_status_tlv(ar, pmon, tlv); + + if (ar->monitor_started && + ath12k_dp_mon_parse_rx_dest_tlv(ar, pmon, hal_status, tlv->value)) + return HAL_RX_MON_STATUS_PPDU_DONE; + ptr += sizeof(*tlv) + tlv_len; ptr = PTR_ALIGN(ptr, HAL_TLV_64_ALIGN); - if ((ptr - skb->data) >= DP_RX_BUFFER_SIZE) + if ((ptr - skb->data) > skb->len) break; } while ((hal_status == HAL_RX_MON_STATUS_PPDU_NOT_DONE) || (hal_status == HAL_RX_MON_STATUS_BUF_ADDR) || + (hal_status == HAL_RX_MON_STATUS_MPDU_START) || (hal_status == HAL_RX_MON_STATUS_MPDU_END) || (hal_status == HAL_RX_MON_STATUS_MSDU_END)); @@ -2141,23 +2413,21 @@ ath12k_dp_mon_rx_parse_mon_status(struct ath12k *ar, struct hal_rx_mon_ppdu_info *ppdu_info = &pmon->mon_ppdu_info; struct dp_mon_mpdu *tmp; struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; - struct sk_buff *head_msdu, *tail_msdu; - enum hal_rx_mon_status hal_status = HAL_RX_MON_STATUS_BUF_DONE; + enum hal_rx_mon_status hal_status; - ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + hal_status = ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + if (hal_status != HAL_RX_MON_STATUS_PPDU_DONE) + return hal_status; list_for_each_entry_safe(mon_mpdu, tmp, &pmon->dp_rx_mon_mpdu_list, list) { list_del(&mon_mpdu->list); - head_msdu = mon_mpdu->head; - tail_msdu = mon_mpdu->tail; - if (head_msdu && tail_msdu) { - ath12k_dp_mon_rx_deliver(ar, head_msdu, - tail_msdu, ppdu_info, napi); - } + if (mon_mpdu->head && mon_mpdu->tail) + ath12k_dp_mon_rx_deliver(ar, mon_mpdu, ppdu_info, napi); kfree(mon_mpdu); } + return hal_status; } @@ -2838,16 +3108,13 @@ ath12k_dp_mon_tx_process_ppdu_info(struct ath12k *ar, struct dp_mon_tx_ppdu_info *tx_ppdu_info) { struct dp_mon_mpdu *tmp, *mon_mpdu; - struct sk_buff *head_msdu, *tail_msdu; list_for_each_entry_safe(mon_mpdu, tmp, &tx_ppdu_info->dp_tx_mon_mpdu_list, list) { list_del(&mon_mpdu->list); - head_msdu = mon_mpdu->head; - tail_msdu = mon_mpdu->tail; - if (head_msdu) - ath12k_dp_mon_rx_deliver(ar, head_msdu, tail_msdu, + if (mon_mpdu->head) + ath12k_dp_mon_rx_deliver(ar, mon_mpdu, &tx_ppdu_info->rx_status, napi); kfree(mon_mpdu); @@ -3346,7 +3613,7 @@ int ath12k_dp_mon_srng_process(struct ath12k *ar, int *budget, ath12k_dp_mon_rx_memset_ppdu_info(ppdu_info); while ((skb = __skb_dequeue(&skb_list))) { - hal_status = ath12k_dp_mon_parse_rx_dest(ar, pmon, skb); + hal_status = ath12k_dp_mon_rx_parse_mon_status(ar, pmon, skb, napi); if (hal_status != HAL_RX_MON_STATUS_PPDU_DONE) { ppdu_info->ppdu_continuation = true; dev_kfree_skb_any(skb); diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.h b/drivers/net/wireless/ath/ath12k/dp_mon.h index e4368eb42aca83..b039f6b9277c69 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.h +++ b/drivers/net/wireless/ath/ath12k/dp_mon.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2019-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_DP_MON_H @@ -9,6 +9,8 @@ #include "core.h" +#define ATH12K_MON_RX_DOT11_OFFSET 5 + enum dp_monitor_mode { ATH12K_DP_TX_MONITOR_MODE, ATH12K_DP_RX_MONITOR_MODE diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 75bf4211ad4227..7fadd366ec13de 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -228,12 +228,6 @@ static void ath12k_dp_rx_desc_get_crypto_header(struct ath12k_base *ab, ab->hal_rx_ops->rx_desc_get_crypto_header(desc, crypto_hdr, enctype); } -static u16 ath12k_dp_rxdesc_get_mpdu_frame_ctrl(struct ath12k_base *ab, - struct hal_rx_desc *desc) -{ - return ab->hal_rx_ops->rx_desc_get_mpdu_frame_ctl(desc); -} - static inline u8 ath12k_dp_rx_get_msdu_src_link(struct ath12k_base *ab, struct hal_rx_desc *desc) { @@ -1823,6 +1817,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, struct hal_rx_desc *ldesc; int space_extra, rem_len, buf_len; u32 hal_rx_desc_sz = ar->ab->hal.hal_desc_sz; + bool is_continuation; /* As the msdu is spread across multiple rx buffers, * find the offset to the start of msdu for computing @@ -1871,7 +1866,8 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, rem_len = msdu_len - buf_first_len; while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) { rxcb = ATH12K_SKB_RXCB(skb); - if (rxcb->is_continuation) + is_continuation = rxcb->is_continuation; + if (is_continuation) buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz; else buf_len = rem_len; @@ -1889,7 +1885,7 @@ static int ath12k_dp_rx_msdu_coalesce(struct ath12k *ar, dev_kfree_skb_any(skb); rem_len -= buf_len; - if (!rxcb->is_continuation) + if (!is_continuation) break; } @@ -1914,21 +1910,14 @@ static struct sk_buff *ath12k_dp_rx_get_msdu_last_buf(struct sk_buff_head *msdu_ return NULL; } -static void ath12k_dp_rx_h_csum_offload(struct ath12k *ar, struct sk_buff *msdu) +static void ath12k_dp_rx_h_csum_offload(struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info) { - struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - struct ath12k_base *ab = ar->ab; - bool ip_csum_fail, l4_csum_fail; - - ip_csum_fail = ath12k_dp_rx_h_ip_cksum_fail(ab, rxcb->rx_desc); - l4_csum_fail = ath12k_dp_rx_h_l4_cksum_fail(ab, rxcb->rx_desc); - - msdu->ip_summed = (ip_csum_fail || l4_csum_fail) ? - CHECKSUM_NONE : CHECKSUM_UNNECESSARY; + msdu->ip_summed = (rx_info->ip_csum_fail || rx_info->l4_csum_fail) ? + CHECKSUM_NONE : CHECKSUM_UNNECESSARY; } -static int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, - enum hal_encrypt_type enctype) +int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, enum hal_encrypt_type enctype) { switch (enctype) { case HAL_ENCRYPT_TYPE_OPEN: @@ -2122,10 +2111,13 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, struct hal_rx_desc *rx_desc = rxcb->rx_desc; struct ath12k_base *ab = ar->ab; size_t hdr_len, crypto_len; - struct ieee80211_hdr *hdr; - u16 qos_ctl; - __le16 fc; - u8 *crypto_hdr; + struct ieee80211_hdr hdr; + __le16 qos_ctl; + u8 *crypto_hdr, mesh_ctrl; + + ath12k_dp_rx_desc_get_dot11_hdr(ab, rx_desc, &hdr); + hdr_len = ieee80211_hdrlen(hdr.frame_control); + mesh_ctrl = ath12k_dp_rx_h_mesh_ctl_present(ab, rx_desc); if (!(status->flag & RX_FLAG_IV_STRIPPED)) { crypto_len = ath12k_dp_rx_crypto_param_len(ar, enctype); @@ -2133,27 +2125,21 @@ static void ath12k_get_dot11_hdr_from_rx_desc(struct ath12k *ar, ath12k_dp_rx_desc_get_crypto_header(ab, rx_desc, crypto_hdr, enctype); } - fc = cpu_to_le16(ath12k_dp_rxdesc_get_mpdu_frame_ctrl(ab, rx_desc)); - hdr_len = ieee80211_hdrlen(fc); skb_push(msdu, hdr_len); - hdr = (struct ieee80211_hdr *)msdu->data; - hdr->frame_control = fc; - - /* Get wifi header from rx_desc */ - ath12k_dp_rx_desc_get_dot11_hdr(ab, rx_desc, hdr); + memcpy(msdu->data, &hdr, min(hdr_len, sizeof(hdr))); if (rxcb->is_mcbc) status->flag &= ~RX_FLAG_PN_VALIDATED; /* Add QOS header */ - if (ieee80211_is_data_qos(hdr->frame_control)) { - qos_ctl = rxcb->tid; - if (ath12k_dp_rx_h_mesh_ctl_present(ab, rx_desc)) - qos_ctl |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT; + if (ieee80211_is_data_qos(hdr.frame_control)) { + struct ieee80211_hdr *qos_ptr = (struct ieee80211_hdr *)msdu->data; - /* TODO: Add other QoS ctl fields when required */ - memcpy(msdu->data + (hdr_len - IEEE80211_QOS_CTL_LEN), - &qos_ctl, IEEE80211_QOS_CTL_LEN); + qos_ctl = cpu_to_le16(rxcb->tid & IEEE80211_QOS_CTL_TID_MASK); + if (mesh_ctrl) + qos_ctl |= cpu_to_le16(IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT); + + memcpy(ieee80211_get_qos_ctl(qos_ptr), &qos_ctl, IEEE80211_QOS_CTL_LEN); } } @@ -2229,10 +2215,10 @@ static void ath12k_dp_rx_h_undecap(struct ath12k *ar, struct sk_buff *msdu, } struct ath12k_peer * -ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu) +ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info) { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); - struct hal_rx_desc *rx_desc = rxcb->rx_desc; struct ath12k_peer *peer = NULL; lockdep_assert_held(&ab->base_lock); @@ -2243,39 +2229,35 @@ ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu) if (peer) return peer; - if (!rx_desc || !(ath12k_dp_rxdesc_mac_addr2_valid(ab, rx_desc))) - return NULL; + if (rx_info->addr2_present) + peer = ath12k_peer_find_by_addr(ab, rx_info->addr2); - peer = ath12k_peer_find_by_addr(ab, - ath12k_dp_rxdesc_get_mpdu_start_addr2(ab, - rx_desc)); return peer; } static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, struct sk_buff *msdu, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) + struct ath12k_dp_rx_info *rx_info) { - bool fill_crypto_hdr; struct ath12k_base *ab = ar->ab; struct ath12k_skb_rxcb *rxcb; enum hal_encrypt_type enctype; bool is_decrypted = false; struct ieee80211_hdr *hdr; struct ath12k_peer *peer; + struct ieee80211_rx_status *rx_status = rx_info->rx_status; u32 err_bitmap; /* PN for multicast packets will be checked in mac80211 */ rxcb = ATH12K_SKB_RXCB(msdu); - fill_crypto_hdr = ath12k_dp_rx_h_is_da_mcbc(ar->ab, rx_desc); - rxcb->is_mcbc = fill_crypto_hdr; + rxcb->is_mcbc = rx_info->is_mcbc; if (rxcb->is_mcbc) - rxcb->peer_id = ath12k_dp_rx_h_peer_id(ar->ab, rx_desc); + rxcb->peer_id = rx_info->peer_id; spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); + peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu, rx_info); if (peer) { if (rxcb->is_mcbc) enctype = peer->sec_type_grp; @@ -2305,7 +2287,7 @@ static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, if (is_decrypted) { rx_status->flag |= RX_FLAG_DECRYPTED | RX_FLAG_MMIC_STRIPPED; - if (fill_crypto_hdr) + if (rx_info->is_mcbc) rx_status->flag |= RX_FLAG_MIC_STRIPPED | RX_FLAG_ICV_STRIPPED; else @@ -2313,37 +2295,28 @@ static void ath12k_dp_rx_h_mpdu(struct ath12k *ar, RX_FLAG_PN_VALIDATED; } - ath12k_dp_rx_h_csum_offload(ar, msdu); + ath12k_dp_rx_h_csum_offload(msdu, rx_info); ath12k_dp_rx_h_undecap(ar, msdu, rx_desc, enctype, rx_status, is_decrypted); - if (!is_decrypted || fill_crypto_hdr) + if (!is_decrypted || rx_info->is_mcbc) return; - if (ath12k_dp_rx_h_decap_type(ar->ab, rx_desc) != - DP_RX_DECAP_TYPE_ETHERNET2_DIX) { + if (rx_info->decap_type != DP_RX_DECAP_TYPE_ETHERNET2_DIX) { hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); } } -static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) +static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info) { - struct ath12k_base *ab = ar->ab; struct ieee80211_supported_band *sband; - enum rx_msdu_start_pkt_type pkt_type; - u8 bw; - u8 rate_mcs, nss; - u8 sgi; + struct ieee80211_rx_status *rx_status = rx_info->rx_status; + enum rx_msdu_start_pkt_type pkt_type = rx_info->pkt_type; + u8 bw = rx_info->bw, sgi = rx_info->sgi; + u8 rate_mcs = rx_info->rate_mcs, nss = rx_info->nss; bool is_cck; - pkt_type = ath12k_dp_rx_h_pkt_type(ab, rx_desc); - bw = ath12k_dp_rx_h_rx_bw(ab, rx_desc); - rate_mcs = ath12k_dp_rx_h_rate_mcs(ab, rx_desc); - nss = ath12k_dp_rx_h_nss(ab, rx_desc); - sgi = ath12k_dp_rx_h_sgi(ab, rx_desc); - switch (pkt_type) { case RX_MSDU_START_PKT_TYPE_11A: case RX_MSDU_START_PKT_TYPE_11B: @@ -2412,10 +2385,35 @@ static void ath12k_dp_rx_h_rate(struct ath12k *ar, struct hal_rx_desc *rx_desc, } } -void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status) +void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, struct hal_rx_desc *rx_desc, + struct ath12k_dp_rx_info *rx_info) { - struct ath12k_base *ab = ar->ab; + rx_info->ip_csum_fail = ath12k_dp_rx_h_ip_cksum_fail(ab, rx_desc); + rx_info->l4_csum_fail = ath12k_dp_rx_h_l4_cksum_fail(ab, rx_desc); + rx_info->is_mcbc = ath12k_dp_rx_h_is_da_mcbc(ab, rx_desc); + rx_info->decap_type = ath12k_dp_rx_h_decap_type(ab, rx_desc); + rx_info->pkt_type = ath12k_dp_rx_h_pkt_type(ab, rx_desc); + rx_info->sgi = ath12k_dp_rx_h_sgi(ab, rx_desc); + rx_info->rate_mcs = ath12k_dp_rx_h_rate_mcs(ab, rx_desc); + rx_info->bw = ath12k_dp_rx_h_rx_bw(ab, rx_desc); + rx_info->nss = ath12k_dp_rx_h_nss(ab, rx_desc); + rx_info->tid = ath12k_dp_rx_h_tid(ab, rx_desc); + rx_info->peer_id = ath12k_dp_rx_h_peer_id(ab, rx_desc); + rx_info->phy_meta_data = ath12k_dp_rx_h_freq(ab, rx_desc); + + if (ath12k_dp_rxdesc_mac_addr2_valid(ab, rx_desc)) { + ether_addr_copy(rx_info->addr2, + ath12k_dp_rxdesc_get_mpdu_start_addr2(ab, rx_desc)); + rx_info->addr2_present = true; + } + + ath12k_dbg_dump(ab, ATH12K_DBG_DATA, NULL, "rx_desc: ", + rx_desc, sizeof(*rx_desc)); +} + +void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info) +{ + struct ieee80211_rx_status *rx_status = rx_info->rx_status; u8 channel_num; u32 center_freq, meta_data; struct ieee80211_channel *channel; @@ -2429,12 +2427,12 @@ void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; - meta_data = ath12k_dp_rx_h_freq(ab, rx_desc); + meta_data = rx_info->phy_meta_data; channel_num = meta_data; center_freq = meta_data >> 16; - if (center_freq >= ATH12K_MIN_6G_FREQ && - center_freq <= ATH12K_MAX_6G_FREQ) { + if (center_freq >= ATH12K_MIN_6GHZ_FREQ && + center_freq <= ATH12K_MAX_6GHZ_FREQ) { rx_status->band = NL80211_BAND_6GHZ; rx_status->freq = center_freq; } else if (channel_num >= 1 && channel_num <= 14) { @@ -2450,20 +2448,18 @@ void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, ieee80211_frequency_to_channel(channel->center_freq); } spin_unlock_bh(&ar->data_lock); - ath12k_dbg_dump(ar->ab, ATH12K_DBG_DATA, NULL, "rx_desc: ", - rx_desc, sizeof(*rx_desc)); } if (rx_status->band != NL80211_BAND_6GHZ) rx_status->freq = ieee80211_channel_to_frequency(channel_num, rx_status->band); - ath12k_dp_rx_h_rate(ar, rx_desc, rx_status); + ath12k_dp_rx_h_rate(ar, rx_info); } static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *napi, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; static const struct ieee80211_radiotap_he known = { @@ -2476,6 +2472,7 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap struct ieee80211_sta *pubsta; struct ath12k_peer *peer; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); + struct ieee80211_rx_status *status = rx_info->rx_status; u8 decap = DP_RX_DECAP_TYPE_RAW; bool is_mcbc = rxcb->is_mcbc; bool is_eapol = rxcb->is_eapol; @@ -2488,10 +2485,10 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap } if (!(status->flag & RX_FLAG_ONLY_MONITOR)) - decap = ath12k_dp_rx_h_decap_type(ab, rxcb->rx_desc); + decap = rx_info->decap_type; spin_lock_bh(&ab->base_lock); - peer = ath12k_dp_rx_h_find_peer(ab, msdu); + peer = ath12k_dp_rx_h_find_peer(ab, msdu, rx_info); pubsta = peer ? peer->sta : NULL; @@ -2574,7 +2571,7 @@ static bool ath12k_dp_rx_check_nwifi_hdr_len_valid(struct ath12k_base *ab, static int ath12k_dp_rx_process_msdu(struct ath12k *ar, struct sk_buff *msdu, struct sk_buff_head *msdu_list, - struct ieee80211_rx_status *rx_status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; struct hal_rx_desc *rx_desc, *lrx_desc; @@ -2634,10 +2631,11 @@ static int ath12k_dp_rx_process_msdu(struct ath12k *ar, goto free_out; } - ath12k_dp_rx_h_ppdu(ar, rx_desc, rx_status); - ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_status); + ath12k_dp_rx_h_fetch_info(ab, rx_desc, rx_info); + ath12k_dp_rx_h_ppdu(ar, rx_info); + ath12k_dp_rx_h_mpdu(ar, msdu, rx_desc, rx_info); - rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; + rx_info->rx_status->flag |= RX_FLAG_SKIP_MONITOR | RX_FLAG_DUP_VALIDATED; return 0; @@ -2657,12 +2655,16 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, struct ath12k *ar; struct ath12k_hw_link *hw_links = ag->hw_links; struct ath12k_base *partner_ab; + struct ath12k_dp_rx_info rx_info; u8 hw_link_id, pdev_id; int ret; if (skb_queue_empty(msdu_list)) return; + rx_info.addr2_present = false; + rx_info.rx_status = &rx_status; + rcu_read_lock(); while ((msdu = __skb_dequeue(msdu_list))) { @@ -2683,7 +2685,7 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, continue; } - ret = ath12k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_status); + ret = ath12k_dp_rx_process_msdu(ar, msdu, msdu_list, &rx_info); if (ret) { ath12k_dbg(ab, ATH12K_DBG_DATA, "Unable to process msdu %d", ret); @@ -2691,7 +2693,7 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, continue; } - ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_status); + ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_info); } rcu_read_unlock(); @@ -2984,6 +2986,7 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer struct ieee80211_rx_status *rxs = IEEE80211_SKB_RXCB(msdu); struct ieee80211_key_conf *key_conf; struct ieee80211_hdr *hdr; + struct ath12k_dp_rx_info rx_info; u8 mic[IEEE80211_CCMP_MIC_LEN]; int head_len, tail_len, ret; size_t data_len; @@ -2994,6 +2997,9 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer if (ath12k_dp_rx_h_enctype(ab, rx_desc) != HAL_ENCRYPT_TYPE_TKIP_MIC) return 0; + rx_info.addr2_present = false; + rx_info.rx_status = rxs; + hdr = (struct ieee80211_hdr *)(msdu->data + hal_rx_desc_sz); hdr_len = ieee80211_hdrlen(hdr->frame_control); head_len = hdr_len + hal_rx_desc_sz + IEEE80211_TKIP_IV_LEN; @@ -3020,6 +3026,8 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer (ATH12K_SKB_RXCB(msdu))->is_first_msdu = true; (ATH12K_SKB_RXCB(msdu))->is_last_msdu = true; + ath12k_dp_rx_h_fetch_info(ab, rx_desc, &rx_info); + rxs->flag |= RX_FLAG_MMIC_ERROR | RX_FLAG_MMIC_STRIPPED | RX_FLAG_IV_STRIPPED | RX_FLAG_DECRYPTED; skb_pull(msdu, hal_rx_desc_sz); @@ -3027,7 +3035,7 @@ static int ath12k_dp_rx_h_verify_tkip_mic(struct ath12k *ar, struct ath12k_peer if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, rx_desc, msdu))) return -EINVAL; - ath12k_dp_rx_h_ppdu(ar, rx_desc, rxs); + ath12k_dp_rx_h_ppdu(ar, &rx_info); ath12k_dp_rx_h_undecap(ar, msdu, rx_desc, HAL_ENCRYPT_TYPE_TKIP_MIC, rxs, true); ieee80211_rx(ath12k_ar_to_hw(ar), msdu); @@ -3716,7 +3724,7 @@ static void ath12k_dp_rx_null_q_desc_sg_drop(struct ath12k *ar, } static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status, + struct ath12k_dp_rx_info *rx_info, struct sk_buff_head *msdu_list) { struct ath12k_base *ab = ar->ab; @@ -3772,11 +3780,11 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, desc, msdu))) return -EINVAL; - ath12k_dp_rx_h_ppdu(ar, desc, status); - - ath12k_dp_rx_h_mpdu(ar, msdu, desc, status); + ath12k_dp_rx_h_fetch_info(ab, desc, rx_info); + ath12k_dp_rx_h_ppdu(ar, rx_info); + ath12k_dp_rx_h_mpdu(ar, msdu, desc, rx_info); - rxcb->tid = ath12k_dp_rx_h_tid(ab, desc); + rxcb->tid = rx_info->tid; /* Please note that caller will having the access to msdu and completing * rx with mac80211. Need not worry about cleaning up amsdu_list. @@ -3786,7 +3794,7 @@ static int ath12k_dp_rx_h_null_q_desc(struct ath12k *ar, struct sk_buff *msdu, } static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status, + struct ath12k_dp_rx_info *rx_info, struct sk_buff_head *msdu_list) { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); @@ -3796,7 +3804,7 @@ static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, switch (rxcb->err_code) { case HAL_REO_DEST_RING_ERROR_CODE_DESC_ADDR_ZERO: - if (ath12k_dp_rx_h_null_q_desc(ar, msdu, status, msdu_list)) + if (ath12k_dp_rx_h_null_q_desc(ar, msdu, rx_info, msdu_list)) drop = true; break; case HAL_REO_DEST_RING_ERROR_CODE_PN_CHECK_FAILED: @@ -3817,7 +3825,7 @@ static bool ath12k_dp_rx_h_reo_err(struct ath12k *ar, struct sk_buff *msdu, } static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; u16 msdu_len; @@ -3831,24 +3839,33 @@ static bool ath12k_dp_rx_h_tkip_mic_err(struct ath12k *ar, struct sk_buff *msdu, l3pad_bytes = ath12k_dp_rx_h_l3pad(ab, desc); msdu_len = ath12k_dp_rx_h_msdu_len(ab, desc); + + if ((hal_rx_desc_sz + l3pad_bytes + msdu_len) > DP_RX_BUFFER_SIZE) { + ath12k_dbg(ab, ATH12K_DBG_DATA, + "invalid msdu len in tkip mic err %u\n", msdu_len); + ath12k_dbg_dump(ab, ATH12K_DBG_DATA, NULL, "", desc, + sizeof(*desc)); + return true; + } + skb_put(msdu, hal_rx_desc_sz + l3pad_bytes + msdu_len); skb_pull(msdu, hal_rx_desc_sz + l3pad_bytes); if (unlikely(!ath12k_dp_rx_check_nwifi_hdr_len_valid(ab, desc, msdu))) return true; - ath12k_dp_rx_h_ppdu(ar, desc, status); + ath12k_dp_rx_h_ppdu(ar, rx_info); - status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | - RX_FLAG_DECRYPTED); + rx_info->rx_status->flag |= (RX_FLAG_MMIC_STRIPPED | RX_FLAG_MMIC_ERROR | + RX_FLAG_DECRYPTED); ath12k_dp_rx_h_undecap(ar, msdu, desc, - HAL_ENCRYPT_TYPE_TKIP_MIC, status, false); + HAL_ENCRYPT_TYPE_TKIP_MIC, rx_info->rx_status, false); return false; } static bool ath12k_dp_rx_h_rxdma_err(struct ath12k *ar, struct sk_buff *msdu, - struct ieee80211_rx_status *status) + struct ath12k_dp_rx_info *rx_info) { struct ath12k_base *ab = ar->ab; struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); @@ -3863,7 +3880,8 @@ static bool ath12k_dp_rx_h_rxdma_err(struct ath12k *ar, struct sk_buff *msdu, case HAL_REO_ENTR_RING_RXDMA_ECODE_TKIP_MIC_ERR: err_bitmap = ath12k_dp_rx_h_mpdu_err(ab, rx_desc); if (err_bitmap & HAL_RX_MPDU_ERR_TKIP_MIC) { - drop = ath12k_dp_rx_h_tkip_mic_err(ar, msdu, status); + ath12k_dp_rx_h_fetch_info(ab, rx_desc, rx_info); + drop = ath12k_dp_rx_h_tkip_mic_err(ar, msdu, rx_info); break; } fallthrough; @@ -3885,14 +3903,18 @@ static void ath12k_dp_rx_wbm_err(struct ath12k *ar, { struct ath12k_skb_rxcb *rxcb = ATH12K_SKB_RXCB(msdu); struct ieee80211_rx_status rxs = {0}; + struct ath12k_dp_rx_info rx_info; bool drop = true; + rx_info.addr2_present = false; + rx_info.rx_status = &rxs; + switch (rxcb->err_rel_src) { case HAL_WBM_REL_SRC_MODULE_REO: - drop = ath12k_dp_rx_h_reo_err(ar, msdu, &rxs, msdu_list); + drop = ath12k_dp_rx_h_reo_err(ar, msdu, &rx_info, msdu_list); break; case HAL_WBM_REL_SRC_MODULE_RXDMA: - drop = ath12k_dp_rx_h_rxdma_err(ar, msdu, &rxs); + drop = ath12k_dp_rx_h_rxdma_err(ar, msdu, &rx_info); break; default: /* msdu will get freed */ @@ -3904,7 +3926,7 @@ static void ath12k_dp_rx_wbm_err(struct ath12k *ar, return; } - ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rxs); + ath12k_dp_rx_deliver_msdu(ar, napi, msdu, &rx_info); } int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, @@ -4480,6 +4502,8 @@ int ath12k_dp_rx_pdev_mon_attach(struct ath12k *ar) pmon->mon_last_linkdesc_paddr = 0; pmon->mon_last_buf_cookie = DP_RX_DESC_COOKIE_MAX + 1; + INIT_LIST_HEAD(&pmon->dp_rx_mon_mpdu_list); + pmon->mon_mpdu = NULL; spin_lock_init(&pmon->mon_lock); return 0; diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index 88e42365a9d8bc..a4e179c6f2664f 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -65,6 +65,24 @@ struct ath12k_dp_rx_rfc1042_hdr { __be16 snap_type; } __packed; +struct ath12k_dp_rx_info { + struct ieee80211_rx_status *rx_status; + u32 phy_meta_data; + u16 peer_id; + u8 decap_type; + u8 pkt_type; + u8 sgi; + u8 rate_mcs; + u8 bw; + u8 nss; + u8 addr2[ETH_ALEN]; + u8 tid; + bool ip_csum_fail; + bool l4_csum_fail; + bool is_mcbc; + bool addr2_present; +}; + static inline u32 ath12k_he_gi_to_nl80211_he_gi(u8 sgi) { u32 ret = 0; @@ -131,13 +149,13 @@ int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev u8 ath12k_dp_rx_h_l3pad(struct ath12k_base *ab, struct hal_rx_desc *desc); struct ath12k_peer * -ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu); +ath12k_dp_rx_h_find_peer(struct ath12k_base *ab, struct sk_buff *msdu, + struct ath12k_dp_rx_info *rx_info); u8 ath12k_dp_rx_h_decap_type(struct ath12k_base *ab, struct hal_rx_desc *desc); u32 ath12k_dp_rx_h_mpdu_err(struct ath12k_base *ab, struct hal_rx_desc *desc); -void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct hal_rx_desc *rx_desc, - struct ieee80211_rx_status *rx_status); +void ath12k_dp_rx_h_ppdu(struct ath12k *ar, struct ath12k_dp_rx_info *rx_info); int ath12k_dp_rxdma_ring_sel_config_qcn9274(struct ath12k_base *ab); int ath12k_dp_rxdma_ring_sel_config_wcn7850(struct ath12k_base *ab); @@ -145,4 +163,9 @@ int ath12k_dp_htt_tlv_iter(struct ath12k_base *ab, const void *ptr, size_t len, int (*iter)(struct ath12k_base *ar, u16 tag, u16 len, const void *ptr, void *data), void *data); +void ath12k_dp_rx_h_fetch_info(struct ath12k_base *ab, struct hal_rx_desc *rx_desc, + struct ath12k_dp_rx_info *rx_info); + +int ath12k_dp_rx_crypto_mic_len(struct ath12k *ar, enum hal_encrypt_type enctype); + #endif /* ATH12K_DP_RX_H */ diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index ced232bf4aed01..f82d2c58eff3f6 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -229,7 +229,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); struct hal_tcl_data_cmd *hal_tcl_desc; struct hal_tx_msdu_ext_desc *msg; - struct sk_buff *skb_ext_desc; + struct sk_buff *skb_ext_desc = NULL; struct hal_srng *tcl_ring; struct ieee80211_hdr *hdr = (void *)skb->data; struct ath12k_vif *ahvif = arvif->ahvif; @@ -415,18 +415,15 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, if (ret < 0) { ath12k_dbg(ab, ATH12K_DBG_DP_TX, "Failed to add HTT meta data, dropping packet\n"); - kfree_skb(skb_ext_desc); - goto fail_unmap_dma; + goto fail_free_ext_skb; } } ti.paddr = dma_map_single(ab->dev, skb_ext_desc->data, skb_ext_desc->len, DMA_TO_DEVICE); ret = dma_mapping_error(ab->dev, ti.paddr); - if (ret) { - kfree_skb(skb_ext_desc); - goto fail_unmap_dma; - } + if (ret) + goto fail_free_ext_skb; ti.data_len = skb_ext_desc->len; ti.type = HAL_TCL_DESC_TYPE_EXT_DESC; @@ -462,7 +459,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, ring_selector++; } - goto fail_unmap_dma; + goto fail_unmap_dma_ext; } ath12k_hal_tx_cmd_desc_setup(ab, hal_tcl_desc, &ti); @@ -478,13 +475,16 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_link_vif *arvif, return 0; -fail_unmap_dma: - dma_unmap_single(ab->dev, ti.paddr, ti.data_len, DMA_TO_DEVICE); - +fail_unmap_dma_ext: if (skb_cb->paddr_ext_desc) dma_unmap_single(ab->dev, skb_cb->paddr_ext_desc, sizeof(struct hal_tx_msdu_ext_desc), DMA_TO_DEVICE); +fail_free_ext_skb: + kfree_skb(skb_ext_desc); + +fail_unmap_dma: + dma_unmap_single(ab->dev, ti.paddr, ti.data_len, DMA_TO_DEVICE); fail_remove_tx_buf: ath12k_dp_tx_release_txbuf(dp, tx_desc, pool_id); @@ -585,6 +585,7 @@ ath12k_dp_tx_process_htt_tx_complete(struct ath12k_base *ab, case HAL_WBM_REL_HTT_TX_COMP_STATUS_TTL: case HAL_WBM_REL_HTT_TX_COMP_STATUS_REINJ: case HAL_WBM_REL_HTT_TX_COMP_STATUS_INSPECT: + case HAL_WBM_REL_HTT_TX_COMP_STATUS_VDEVID_MISMATCH: ath12k_dp_tx_free_txbuf(ab, msdu, mac_id, tx_ring); break; case HAL_WBM_REL_HTT_TX_COMP_STATUS_MEC_NOTIFY: diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c index cd59ff8e6c7b0c..d00869a33fea06 100644 --- a/drivers/net/wireless/ath/ath12k/hal.c +++ b/drivers/net/wireless/ath/ath12k/hal.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include #include "hal_tx.h" @@ -511,11 +511,6 @@ static void ath12k_hw_qcn9274_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, crypto_hdr[7] = HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274.mpdu_start.pn[1]); } -static u16 ath12k_hw_qcn9274_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.qcn9274.mpdu_start.frame_ctrl); -} - static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) { struct ath12k_hal *hal = &ab->hal; @@ -552,9 +547,9 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) s->reg_start[1] = HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO_STATUS_HP; s = &hal->srng_config[HAL_TCL_DATA]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB(ab); s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_HP; - s->reg_size[0] = HAL_TCL2_RING_BASE_LSB - HAL_TCL1_RING_BASE_LSB; + s->reg_size[0] = HAL_TCL2_RING_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB(ab); s->reg_size[1] = HAL_TCL2_RING_HP - HAL_TCL1_RING_HP; s = &hal->srng_config[HAL_TCL_CMD]; @@ -566,29 +561,29 @@ static int ath12k_hal_srng_create_config_qcn9274(struct ath12k_base *ab) s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL_STATUS_RING_HP; s = &hal->srng_config[HAL_CE_SRC]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); s = &hal->srng_config[HAL_CE_DST]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_CE_DST_STATUS]; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_STATUS_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_WBM_IDLE_LINK]; s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab); @@ -736,7 +731,6 @@ const struct hal_rx_ops hal_rx_qcn9274_ops = { .rx_desc_is_da_mcbc = ath12k_hw_qcn9274_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_qcn9274_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = ath12k_hw_qcn9274_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_qcn9274_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_dp_rx_h_ip_cksum_fail, @@ -975,11 +969,6 @@ ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.qcn9274_compact.mpdu_start.pn[1]); } -static u16 ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.qcn9274_compact.mpdu_start.frame_ctrl); -} - static bool ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done(struct hal_rx_desc *desc) { return !!le32_get_bits(desc->u.qcn9274_compact.msdu_end.info14, @@ -1080,8 +1069,6 @@ const struct hal_rx_ops hal_rx_qcn9274_compact_ops = { .rx_desc_is_da_mcbc = ath12k_hw_qcn9274_compact_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_qcn9274_compact_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_qcn9274_compact_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = - ath12k_hw_qcn9274_compact_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_qcn9274_compact_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_qcn9274_compact_dp_rx_h_ip_cksum_fail, @@ -1330,11 +1317,6 @@ static void ath12k_hw_wcn7850_rx_desc_get_crypto_hdr(struct hal_rx_desc *desc, crypto_hdr[7] = HAL_RX_MPDU_INFO_PN_GET_BYTE2(desc->u.wcn7850.mpdu_start.pn[1]); } -static u16 ath12k_hw_wcn7850_rx_desc_get_mpdu_frame_ctl(struct hal_rx_desc *desc) -{ - return __le16_to_cpu(desc->u.wcn7850.mpdu_start.frame_ctrl); -} - static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) { struct ath12k_hal *hal = &ab->hal; @@ -1371,9 +1353,9 @@ static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) s = &hal->srng_config[HAL_TCL_DATA]; s->max_rings = 5; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_BASE_LSB(ab); s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_HP; - s->reg_size[0] = HAL_TCL2_RING_BASE_LSB - HAL_TCL1_RING_BASE_LSB; + s->reg_size[0] = HAL_TCL2_RING_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB(ab); s->reg_size[1] = HAL_TCL2_RING_HP - HAL_TCL1_RING_HP; s = &hal->srng_config[HAL_TCL_CMD]; @@ -1386,31 +1368,31 @@ static int ath12k_hal_srng_create_config_wcn7850(struct ath12k_base *ab) s = &hal->srng_config[HAL_CE_SRC]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG - - HAL_SEQ_WCSS_UMAC_CE0_SRC_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab); s = &hal->srng_config[HAL_CE_DST]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_BASE_LSB; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_CE_DST_STATUS]; s->max_rings = 12; - s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + + s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_BASE_LSB; - s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_STATUS_RING_HP; - s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; - s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG - - HAL_SEQ_WCSS_UMAC_CE0_DST_REG; + s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_HP; + s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); + s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) - + HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab); s = &hal->srng_config[HAL_WBM_IDLE_LINK]; s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab); @@ -1555,7 +1537,6 @@ const struct hal_rx_ops hal_rx_wcn7850_ops = { .rx_desc_is_da_mcbc = ath12k_hw_wcn7850_rx_desc_is_da_mcbc, .rx_desc_get_dot11_hdr = ath12k_hw_wcn7850_rx_desc_get_dot11_hdr, .rx_desc_get_crypto_header = ath12k_hw_wcn7850_rx_desc_get_crypto_hdr, - .rx_desc_get_mpdu_frame_ctl = ath12k_hw_wcn7850_rx_desc_get_mpdu_frame_ctl, .dp_rx_h_msdu_done = ath12k_hw_wcn7850_dp_rx_h_msdu_done, .dp_rx_h_l4_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_l4_cksum_fail, .dp_rx_h_ip_cksum_fail = ath12k_hw_wcn7850_dp_rx_h_ip_cksum_fail, @@ -1756,7 +1737,7 @@ static void ath12k_hal_srng_src_hw_init(struct ath12k_base *ab, HAL_TCL1_RING_BASE_MSB_RING_BASE_ADDR_MSB) | u32_encode_bits((srng->entry_size * srng->num_entries), HAL_TCL1_RING_BASE_MSB_RING_SIZE); - ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET, val); + ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET(ab), val); val = u32_encode_bits(srng->entry_size, HAL_REO1_RING_ID_ENTRY_SIZE); ath12k_hif_write32(ab, reg_base + HAL_TCL1_RING_ID_OFFSET(ab), val); diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 94e2e873595831..c8205672cd3dd5 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_HAL_H @@ -44,10 +44,14 @@ struct ath12k_base; #define HAL_SEQ_WCSS_UMAC_OFFSET 0x00a00000 #define HAL_SEQ_WCSS_UMAC_REO_REG 0x00a38000 #define HAL_SEQ_WCSS_UMAC_TCL_REG 0x00a44000 -#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG 0x01b80000 -#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG 0x01b81000 -#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG 0x01b82000 -#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG 0x01b83000 +#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce0_src_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce0_dest_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce1_src_reg_base) +#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) \ + ((ab)->hw_params->regs->hal_umac_ce1_dest_reg_base) #define HAL_SEQ_WCSS_UMAC_WBM_REG 0x00a34000 #define HAL_CE_WFSS_CE_REG_BASE 0x01b80000 @@ -57,8 +61,10 @@ struct ath12k_base; /* SW2TCL(x) R0 ring configuration address */ #define HAL_TCL1_RING_CMN_CTRL_REG 0x00000020 #define HAL_TCL1_RING_DSCP_TID_MAP 0x00000240 -#define HAL_TCL1_RING_BASE_LSB 0x00000900 -#define HAL_TCL1_RING_BASE_MSB 0x00000904 +#define HAL_TCL1_RING_BASE_LSB(ab) \ + ((ab)->hw_params->regs->hal_tcl1_ring_base_lsb) +#define HAL_TCL1_RING_BASE_MSB(ab) \ + ((ab)->hw_params->regs->hal_tcl1_ring_base_msb) #define HAL_TCL1_RING_ID(ab) ((ab)->hw_params->regs->hal_tcl1_ring_id) #define HAL_TCL1_RING_MISC(ab) \ ((ab)->hw_params->regs->hal_tcl1_ring_misc) @@ -76,30 +82,31 @@ struct ath12k_base; ((ab)->hw_params->regs->hal_tcl1_ring_msi1_base_msb) #define HAL_TCL1_RING_MSI1_DATA(ab) \ ((ab)->hw_params->regs->hal_tcl1_ring_msi1_data) -#define HAL_TCL2_RING_BASE_LSB 0x00000978 +#define HAL_TCL2_RING_BASE_LSB(ab) \ + ((ab)->hw_params->regs->hal_tcl2_ring_base_lsb) #define HAL_TCL_RING_BASE_LSB(ab) \ ((ab)->hw_params->regs->hal_tcl_ring_base_lsb) -#define HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_BASE_LSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MSI1_BASE_MSB_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_BASE_MSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MSI1_DATA_OFFSET(ab) \ - (HAL_TCL1_RING_MSI1_DATA(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_BASE_MSB_OFFSET \ - (HAL_TCL1_RING_BASE_MSB - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_ID_OFFSET(ab) \ - (HAL_TCL1_RING_ID(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_CONSR_INT_SETUP_IX0_OFFSET(ab) \ - (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX0(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_CONSR_INT_SETUP_IX1_OFFSET(ab) \ - (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX1(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_TP_ADDR_LSB_OFFSET(ab) \ - (HAL_TCL1_RING_TP_ADDR_LSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_TP_ADDR_MSB_OFFSET(ab) \ - (HAL_TCL1_RING_TP_ADDR_MSB(ab) - HAL_TCL1_RING_BASE_LSB) -#define HAL_TCL1_RING_MISC_OFFSET(ab) \ - (HAL_TCL1_RING_MISC(ab) - HAL_TCL1_RING_BASE_LSB) +#define HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_BASE_LSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MSI1_BASE_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_BASE_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MSI1_DATA_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MSI1_DATA(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_BASE_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_BASE_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_ID_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_ID(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_CONSR_INT_SETUP_IX0_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX0(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_CONSR_INT_SETUP_IX1_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_CONSUMER_INT_SETUP_IX1(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_TP_ADDR_LSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_TP_ADDR_LSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_TP_ADDR_MSB_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_TP_ADDR_MSB(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) +#define HAL_TCL1_RING_MISC_OFFSET(ab) ({ typeof(ab) _ab = (ab); \ + (HAL_TCL1_RING_MISC(_ab) - HAL_TCL1_RING_BASE_LSB(_ab)); }) /* SW2TCL(x) R2 ring pointers (head/tail) address */ #define HAL_TCL1_RING_HP 0x00002000 @@ -1068,7 +1075,6 @@ struct hal_rx_ops { bool (*rx_desc_is_da_mcbc)(struct hal_rx_desc *desc); void (*rx_desc_get_dot11_hdr)(struct hal_rx_desc *desc, struct ieee80211_hdr *hdr); - u16 (*rx_desc_get_mpdu_frame_ctl)(struct hal_rx_desc *desc); void (*rx_desc_get_crypto_header)(struct hal_rx_desc *desc, u8 *crypto_hdr, enum hal_encrypt_type enctype); diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h index 3e8983b85de863..63d279fab32249 100644 --- a/drivers/net/wireless/ath/ath12k/hal_desc.h +++ b/drivers/net/wireless/ath/ath12k/hal_desc.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022, 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" @@ -1298,6 +1298,7 @@ enum hal_wbm_htt_tx_comp_status { HAL_WBM_REL_HTT_TX_COMP_STATUS_REINJ, HAL_WBM_REL_HTT_TX_COMP_STATUS_INSPECT, HAL_WBM_REL_HTT_TX_COMP_STATUS_MEC_NOTIFY, + HAL_WBM_REL_HTT_TX_COMP_STATUS_VDEVID_MISMATCH, HAL_WBM_REL_HTT_TX_COMP_STATUS_MAX, }; diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.h b/drivers/net/wireless/ath/ath12k/hal_rx.h index 6bdcd0867d86e3..c753eb2a03ad24 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.h +++ b/drivers/net/wireless/ath/ath12k/hal_rx.h @@ -108,11 +108,12 @@ enum hal_rx_mon_status { HAL_RX_MON_STATUS_PPDU_DONE, HAL_RX_MON_STATUS_BUF_DONE, HAL_RX_MON_STATUS_BUF_ADDR, + HAL_RX_MON_STATUS_MPDU_START, HAL_RX_MON_STATUS_MPDU_END, HAL_RX_MON_STATUS_MSDU_END, }; -#define HAL_RX_MAX_MPDU 256 +#define HAL_RX_MAX_MPDU 1024 #define HAL_RX_NUM_WORDS_PER_PPDU_BITMAP (HAL_RX_MAX_MPDU >> 5) struct hal_rx_user_status { @@ -506,6 +507,18 @@ struct hal_rx_mpdu_start { __le32 rsvd2[16]; } __packed; +struct hal_rx_msdu_end { + __le32 info0; + __le32 rsvd0[9]; + __le16 info00; + __le16 info01; + __le32 rsvd00[8]; + __le32 info1; + __le32 rsvd1[10]; + __le32 info2; + __le32 rsvd2; +} __packed; + #define HAL_RX_PPDU_END_DURATION GENMASK(23, 0) struct hal_rx_ppdu_end_duration { __le32 rsvd0[9]; diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index a106ebed7870de..1bfb11bae7add3 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -619,6 +619,9 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -681,6 +684,14 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000a84, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs qcn9274_v2_regs = { @@ -695,6 +706,9 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -761,6 +775,14 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000aa0, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs wcn7850_regs = { @@ -775,6 +797,9 @@ static const struct ath12k_hw_regs wcn7850_regs = { .hal_tcl1_ring_msi1_base_msb = 0x0000094c, .hal_tcl1_ring_msi1_data = 0x00000950, .hal_tcl_ring_base_lsb = 0x00000b58, + .hal_tcl1_ring_base_lsb = 0x00000900, + .hal_tcl1_ring_base_msb = 0x00000904, + .hal_tcl2_ring_base_lsb = 0x00000978, /* TCL STATUS ring address */ .hal_tcl_status_ring_base_lsb = 0x00000d38, @@ -837,6 +862,14 @@ static const struct ath12k_hw_regs wcn7850_regs = { /* REO status ring address */ .hal_reo_status_ring_base = 0x00000a84, + + /* CE base address */ + .hal_umac_ce0_src_reg_base = 0x01b80000, + .hal_umac_ce0_dest_reg_base = 0x01b81000, + .hal_umac_ce1_src_reg_base = 0x01b82000, + .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e40304, }; static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = { diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index 8d52182e28aef4..862b11325a9021 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH12K_HW_H @@ -293,6 +293,9 @@ struct ath12k_hw_regs { u32 hal_tcl1_ring_msi1_base_msb; u32 hal_tcl1_ring_msi1_data; u32 hal_tcl_ring_base_lsb; + u32 hal_tcl1_ring_base_lsb; + u32 hal_tcl1_ring_base_msb; + u32 hal_tcl2_ring_base_lsb; u32 hal_tcl_status_ring_base_lsb; @@ -316,6 +319,11 @@ struct ath12k_hw_regs { u32 pcie_qserdes_sysclk_en_sel; u32 pcie_pcs_osc_dtct_config_base; + u32 hal_umac_ce0_src_reg_base; + u32 hal_umac_ce0_dest_reg_base; + u32 hal_umac_ce1_src_reg_base; + u32 hal_umac_ce1_dest_reg_base; + u32 hal_ppe_rel_ring_base; u32 hal_reo2_ring_base; @@ -347,6 +355,8 @@ struct ath12k_hw_regs { u32 hal_reo_cmd_ring_base; u32 hal_reo_status_ring_base; + + u32 gcc_gcc_pcie_hot_rst; }; static inline const char *ath12k_bd_ie_type_str(enum ath12k_bd_ie_type type) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index dfa05f0ee6c9f7..331bcf5e6c4cce 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -229,7 +229,8 @@ ath12k_phymodes[NUM_NL80211_BANDS][ATH12K_CHAN_WIDTH_NUM] = { const struct htt_rx_ring_tlv_filter ath12k_mac_mon_status_filter_default = { .rx_filter = HTT_RX_FILTER_TLV_FLAGS_MPDU_START | HTT_RX_FILTER_TLV_FLAGS_PPDU_END | - HTT_RX_FILTER_TLV_FLAGS_PPDU_END_STATUS_DONE, + HTT_RX_FILTER_TLV_FLAGS_PPDU_END_STATUS_DONE | + HTT_RX_FILTER_TLV_FLAGS_PPDU_START_USER_INFO, .pkt_filter_flags0 = HTT_RX_FP_MGMT_FILTER_FLAGS0, .pkt_filter_flags1 = HTT_RX_FP_MGMT_FILTER_FLAGS1, .pkt_filter_flags2 = HTT_RX_FP_CTRL_FILTER_FLASG2, @@ -874,12 +875,12 @@ static bool ath12k_mac_band_match(enum nl80211_band band1, enum WMI_HOST_WLAN_BA { switch (band1) { case NL80211_BAND_2GHZ: - if (band2 & WMI_HOST_WLAN_2G_CAP) + if (band2 & WMI_HOST_WLAN_2GHZ_CAP) return true; break; case NL80211_BAND_5GHZ: case NL80211_BAND_6GHZ: - if (band2 & WMI_HOST_WLAN_5G_CAP) + if (band2 & WMI_HOST_WLAN_5GHZ_CAP) return true; break; default: @@ -980,7 +981,7 @@ static int ath12k_mac_txpower_recalc(struct ath12k *ar) ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "txpower to set in hw %d\n", txpower / 2); - if ((pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) && + if ((pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) && ar->txpower_limit_2g != txpower) { param = WMI_PDEV_PARAM_TXPOWER_LIMIT2G; ret = ath12k_wmi_pdev_set_param(ar, param, @@ -990,7 +991,7 @@ static int ath12k_mac_txpower_recalc(struct ath12k *ar) ar->txpower_limit_2g = txpower; } - if ((pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) && + if ((pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) && ar->txpower_limit_5g != txpower) { param = WMI_PDEV_PARAM_TXPOWER_LIMIT5G; ret = ath12k_wmi_pdev_set_param(ar, param, @@ -1272,12 +1273,12 @@ static int ath12k_mac_monitor_vdev_create(struct ath12k *ar) arg.pdev_id = pdev->pdev_id; arg.if_stats_id = ATH12K_INVAL_VDEV_STATS_ID; - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { arg.chains[NL80211_BAND_2GHZ].tx = ar->num_tx_chains; arg.chains[NL80211_BAND_2GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { arg.chains[NL80211_BAND_5GHZ].tx = ar->num_tx_chains; arg.chains[NL80211_BAND_5GHZ].rx = ar->num_rx_chains; } @@ -3988,7 +3989,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, else rateidx = ffs(info->basic_rates) - 1; - if (ar->pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) + if (ar->pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) rateidx += ATH12K_MAC_FIRST_OFDM_RATE_IDX; bitrate = ath12k_legacy_rates[rateidx].bitrate; @@ -4162,9 +4163,9 @@ ath12k_mac_select_scan_device(struct ieee80211_hw *hw, * split the hw request and perform multiple scans */ - if (center_freq < ATH12K_MIN_5G_FREQ) + if (center_freq < ATH12K_MIN_5GHZ_FREQ) band = NL80211_BAND_2GHZ; - else if (center_freq < ATH12K_MIN_6G_FREQ) + else if (center_freq < ATH12K_MIN_6GHZ_FREQ) band = NL80211_BAND_5GHZ; else band = NL80211_BAND_6GHZ; @@ -4605,7 +4606,6 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, .macaddr = macaddr, }; struct ath12k_vif *ahvif = arvif->ahvif; - struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -4624,8 +4624,8 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, switch (key->cipher) { case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: arg.key_cipher = WMI_CIPHER_AES_CCM; - /* TODO: Re-check if flag is valid */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; case WLAN_CIPHER_SUITE_TKIP: @@ -4633,12 +4633,10 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, arg.key_txmic_len = 8; arg.key_rxmic_len = 8; break; - case WLAN_CIPHER_SUITE_CCMP_256: - arg.key_cipher = WMI_CIPHER_AES_CCM; - break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: arg.key_cipher = WMI_CIPHER_AES_GCM; + key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV_MGMT; break; default: ath12k_warn(ar->ab, "cipher %d is not supported\n", key->cipher); @@ -4658,7 +4656,7 @@ static int ath12k_install_key(struct ath12k_link_vif *arvif, if (!wait_for_completion_timeout(&ar->install_key_done, 1 * HZ)) return -ETIMEDOUT; - if (ether_addr_equal(macaddr, vif->addr)) + if (ether_addr_equal(macaddr, arvif->bssid)) ahvif->key_cipher = key->cipher; return ar->install_key_status ? -EINVAL : 0; @@ -6475,7 +6473,7 @@ static void ath12k_mac_setup_ht_vht_cap(struct ath12k *ar, rate_cap_tx_chainmask = ar->cfg_tx_chainmask >> cap->tx_chain_mask_shift; rate_cap_rx_chainmask = ar->cfg_rx_chainmask >> cap->rx_chain_mask_shift; - if (cap->supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { band = &ar->mac.sbands[NL80211_BAND_2GHZ]; ht_cap = cap->band[NL80211_BAND_2GHZ].ht_cap_info; if (ht_cap_info) @@ -6484,7 +6482,7 @@ static void ath12k_mac_setup_ht_vht_cap(struct ath12k *ar, rate_cap_rx_chainmask); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP && (ar->ab->hw_params->single_pdev_only || !ar->supports_6ghz)) { band = &ar->mac.sbands[NL80211_BAND_5GHZ]; @@ -6893,7 +6891,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, enum nl80211_band band; int count; - if (cap->supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { band = NL80211_BAND_2GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, ar->mac.iftype[band], @@ -6903,7 +6901,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, count); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { band = NL80211_BAND_5GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, ar->mac.iftype[band], @@ -6913,7 +6911,7 @@ static void ath12k_mac_setup_sband_iftype_data(struct ath12k *ar, count); } - if (cap->supported_bands & WMI_HOST_WLAN_5G_CAP && + if (cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { band = NL80211_BAND_6GHZ; count = ath12k_mac_copy_sband_iftype_data(ar, cap, @@ -7042,6 +7040,8 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv struct ath12k_base *ab = ar->ab; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_tx_info *info; + enum hal_encrypt_type enctype; + unsigned int mic_len; dma_addr_t paddr; int buf_id; int ret; @@ -7057,12 +7057,16 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv return -ENOSPC; info = IEEE80211_SKB_CB(skb); - if (!(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) { + if ((ATH12K_SKB_CB(skb)->flags & ATH12K_SKB_CIPHER_SET) && + !(info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)) { if ((ieee80211_is_action(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control) || ieee80211_is_disassoc(hdr->frame_control)) && ieee80211_has_protected(hdr->frame_control)) { - skb_put(skb, IEEE80211_CCMP_MIC_LEN); + enctype = + ath12k_dp_tx_get_encrypt_type(ATH12K_SKB_CB(skb)->cipher); + mic_len = ath12k_dp_rx_crypto_mic_len(ar, enctype); + skb_put(skb, mic_len); } } @@ -7429,7 +7433,6 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, info_flags); skb_cb = ATH12K_SKB_CB(msdu_copied); - info = IEEE80211_SKB_CB(msdu_copied); skb_cb->link_id = link_id; /* For open mode, skip peer find logic */ @@ -7452,7 +7455,6 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, if (key) { skb_cb->cipher = key->cipher; skb_cb->flags |= ATH12K_SKB_CIPHER_SET; - info->control.hw_key = key; hdr = (struct ieee80211_hdr *)msdu_copied->data; if (!ieee80211_has_protected(hdr->frame_control)) @@ -7903,15 +7905,15 @@ static int ath12k_mac_setup_vdev_create_arg(struct ath12k_link_vif *arvif, return ret; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { arg->chains[NL80211_BAND_2GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_2GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) { + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { arg->chains[NL80211_BAND_5GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_5GHZ].rx = ar->num_rx_chains; } - if (pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP && + if (pdev->cap.supported_bands & WMI_HOST_WLAN_5GHZ_CAP && ar->supports_6ghz) { arg->chains[NL80211_BAND_6GHZ].tx = ar->num_tx_chains; arg->chains[NL80211_BAND_6GHZ].rx = ar->num_rx_chains; @@ -7940,7 +7942,7 @@ ath12k_mac_prepare_he_mode(struct ath12k_pdev *pdev, u32 viftype) u32 *hecap_phy_ptr = NULL; u32 hemode; - if (pdev->cap.supported_bands & WMI_HOST_WLAN_2G_CAP) + if (pdev->cap.supported_bands & WMI_HOST_WLAN_2GHZ_CAP) cap_band = &pdev_cap->band[NL80211_BAND_2GHZ]; else cap_band = &pdev_cap->band[NL80211_BAND_5GHZ]; @@ -9462,8 +9464,8 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, ar = ath12k_mac_assign_vif_to_vdev(hw, arvif, ctx); if (!ar) { - ath12k_warn(arvif->ar->ab, "failed to assign chanctx for vif %pM link id %u link vif is already started", - vif->addr, link_id); + ath12k_hw_warn(ah, "failed to assign chanctx for vif %pM link id %u link vif is already started", + vif->addr, link_id); return -EINVAL; } @@ -10640,10 +10642,10 @@ static u32 ath12k_get_phy_id(struct ath12k *ar, u32 band) struct ath12k_pdev *pdev = ar->pdev; struct ath12k_pdev_cap *pdev_cap = &pdev->cap; - if (band == WMI_HOST_WLAN_2G_CAP) + if (band == WMI_HOST_WLAN_2GHZ_CAP) return pdev_cap->band[NL80211_BAND_2GHZ].phy_id; - if (band == WMI_HOST_WLAN_5G_CAP) + if (band == WMI_HOST_WLAN_5GHZ_CAP) return pdev_cap->band[NL80211_BAND_5GHZ].phy_id; ath12k_warn(ar->ab, "unsupported phy cap:%d\n", band); @@ -10668,7 +10670,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, reg_cap = &ar->ab->hal_reg_cap[ar->pdev_idx]; - if (supported_bands & WMI_HOST_WLAN_2G_CAP) { + if (supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { channels = kmemdup(ath12k_2ghz_channels, sizeof(ath12k_2ghz_channels), GFP_KERNEL); @@ -10684,7 +10686,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, bands[NL80211_BAND_2GHZ] = band; if (ar->ab->hw_params->single_pdev_only) { - phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_2G_CAP); + phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_2GHZ_CAP); reg_cap = &ar->ab->hal_reg_cap[phy_id]; } ath12k_mac_update_ch_list(ar, band, @@ -10692,8 +10694,8 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, reg_cap->high_2ghz_chan); } - if (supported_bands & WMI_HOST_WLAN_5G_CAP) { - if (reg_cap->high_5ghz_chan >= ATH12K_MIN_6G_FREQ) { + if (supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { + if (reg_cap->high_5ghz_chan >= ATH12K_MIN_6GHZ_FREQ) { channels = kmemdup(ath12k_6ghz_channels, sizeof(ath12k_6ghz_channels), GFP_KERNEL); if (!channels) { @@ -10715,7 +10717,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, ah->use_6ghz_regd = true; } - if (reg_cap->low_5ghz_chan < ATH12K_MIN_6G_FREQ) { + if (reg_cap->low_5ghz_chan < ATH12K_MIN_6GHZ_FREQ) { channels = kmemdup(ath12k_5ghz_channels, sizeof(ath12k_5ghz_channels), GFP_KERNEL); @@ -10734,7 +10736,7 @@ static int ath12k_mac_setup_channels_rates(struct ath12k *ar, bands[NL80211_BAND_5GHZ] = band; if (ar->ab->hw_params->single_pdev_only) { - phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_5G_CAP); + phy_id = ath12k_get_phy_id(ar, WMI_HOST_WLAN_5GHZ_CAP); reg_cap = &ar->ab->hal_reg_cap[phy_id]; } @@ -11572,7 +11574,6 @@ void ath12k_mac_mlo_teardown(struct ath12k_hw_group *ag) int ath12k_mac_register(struct ath12k_hw_group *ag) { - struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; int ret; @@ -11585,8 +11586,6 @@ int ath12k_mac_register(struct ath12k_hw_group *ag) goto err; } - set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); - return 0; err: @@ -11603,12 +11602,9 @@ int ath12k_mac_register(struct ath12k_hw_group *ag) void ath12k_mac_unregister(struct ath12k_hw_group *ag) { - struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; - clear_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); - for (i = ag->num_hw - 1; i >= 0; i--) { ah = ath12k_ag_to_ah(ag, i); if (!ah) diff --git a/drivers/net/wireless/ath/ath12k/mhi.c b/drivers/net/wireless/ath/ath12k/mhi.c index 2f6d14382ed70c..4d40c4ec4b8110 100644 --- a/drivers/net/wireless/ath/ath12k/mhi.c +++ b/drivers/net/wireless/ath/ath12k/mhi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2020-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -285,8 +285,11 @@ static void ath12k_mhi_op_status_cb(struct mhi_controller *mhi_cntrl, break; } - if (!(test_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags))) + if (!(test_bit(ATH12K_FLAG_UNREGISTERING, &ab->dev_flags))) { + set_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); + set_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); queue_work(ab->workqueue_aux, &ab->reset_work); + } break; default: break; diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index b474696ac6d8c9..2e7d302ace679d 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -292,10 +292,10 @@ static void ath12k_pci_enable_ltssm(struct ath12k_base *ab) ath12k_dbg(ab, ATH12K_DBG_PCI, "pci ltssm 0x%x\n", val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); val |= GCC_GCC_PCIE_HOT_RST_VAL; - ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST, val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST(ab), val); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); ath12k_dbg(ab, ATH12K_DBG_PCI, "pci pcie_hot_rst 0x%x\n", val); @@ -1710,12 +1710,12 @@ static int ath12k_pci_probe(struct pci_dev *pdev, err_mhi_unregister: ath12k_mhi_unregister(ab_pci); -err_pci_msi_free: - ath12k_pci_msi_free(ab_pci); - err_irq_affinity_cleanup: ath12k_pci_set_irq_affinity_hint(ab_pci, NULL); +err_pci_msi_free: + ath12k_pci_msi_free(ab_pci); + err_pci_free_region: ath12k_pci_free_region(ab_pci); @@ -1734,8 +1734,6 @@ static void ath12k_pci_remove(struct pci_dev *pdev) if (test_bit(ATH12K_FLAG_QMI_FAIL, &ab->dev_flags)) { ath12k_pci_power_down(ab, false); - ath12k_qmi_deinit_service(ab); - ath12k_core_hw_group_unassign(ab); goto qmi_fail; } @@ -1743,9 +1741,10 @@ static void ath12k_pci_remove(struct pci_dev *pdev) cancel_work_sync(&ab->reset_work); cancel_work_sync(&ab->dump_work); - ath12k_core_deinit(ab); + ath12k_core_hw_group_cleanup(ab->ag); qmi_fail: + ath12k_core_deinit(ab); ath12k_fw_unmap(ab); ath12k_mhi_unregister(ab_pci); diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h index 31584a7ad80eb9..9321674eef8b8f 100644 --- a/drivers/net/wireless/ath/ath12k/pci.h +++ b/drivers/net/wireless/ath/ath12k/pci.h @@ -28,7 +28,9 @@ #define PCIE_PCIE_PARF_LTSSM 0x1e081b0 #define PARM_LTSSM_VALUE 0x111 -#define GCC_GCC_PCIE_HOT_RST 0x1e38338 +#define GCC_GCC_PCIE_HOT_RST(ab) \ + ((ab)->hw_params->regs->gcc_gcc_pcie_hot_rst) + #define GCC_GCC_PCIE_HOT_RST_VAL 0x10 #define PCIE_PCIE_INT_ALL_CLEAR 0x1e08228 diff --git a/drivers/net/wireless/ath/ath12k/reg.c b/drivers/net/wireless/ath/ath12k/reg.c index 439d61f284d892..7fa7cd301b7579 100644 --- a/drivers/net/wireless/ath/ath12k/reg.c +++ b/drivers/net/wireless/ath/ath12k/reg.c @@ -777,8 +777,12 @@ void ath12k_reg_free(struct ath12k_base *ab) { int i; + mutex_lock(&ab->core_lock); for (i = 0; i < ab->hw_params->max_radios; i++) { kfree(ab->default_regd[i]); kfree(ab->new_regd[i]); + ab->default_regd[i] = NULL; + ab->new_regd[i] = NULL; } + mutex_unlock(&ab->core_lock); } diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 6d1ea5f3a791b0..fe50c3d3cb8201 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -520,10 +520,10 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, * band to band for a single radio, need to see how this should be * handled. */ - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2GHZ_CAP) { pdev_cap->tx_chain_mask = le32_to_cpu(mac_caps->tx_chain_mask_2g); pdev_cap->rx_chain_mask = le32_to_cpu(mac_caps->rx_chain_mask_2g); - } else if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5G_CAP) { + } else if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5GHZ_CAP) { pdev_cap->vht_cap = le32_to_cpu(mac_caps->vht_cap_info_5g); pdev_cap->vht_mcs = le32_to_cpu(mac_caps->vht_supp_mcs_5g); pdev_cap->he_mcs = le32_to_cpu(mac_caps->he_supp_mcs_5g); @@ -546,7 +546,7 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, pdev_cap->rx_chain_mask_shift = find_first_bit((unsigned long *)&pdev_cap->rx_chain_mask, 32); - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_2GHZ_CAP) { cap_band = &pdev_cap->band[NL80211_BAND_2GHZ]; cap_band->phy_id = le32_to_cpu(mac_caps->phy_id); cap_band->max_bw_supported = le32_to_cpu(mac_caps->max_bw_supported_2g); @@ -566,7 +566,7 @@ ath12k_pull_mac_phy_cap_svc_ready_ext(struct ath12k_wmi_pdev *wmi_handle, le32_to_cpu(mac_caps->he_ppet2g.ppet16_ppet8_ru3_ru0[i]); } - if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5G_CAP) { + if (le32_to_cpu(mac_caps->supported_bands) & WMI_HOST_WLAN_5GHZ_CAP) { cap_band = &pdev_cap->band[NL80211_BAND_5GHZ]; cap_band->phy_id = le32_to_cpu(mac_caps->phy_id); cap_band->max_bw_supported = @@ -2351,7 +2351,7 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, for (i = 0; i < arg->peer_eht_mcs_count; i++) { eht_mcs = ptr; - eht_mcs->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_HE_RATE_SET, + eht_mcs->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_EHT_RATE_SET, sizeof(*eht_mcs)); eht_mcs->rx_mcs_set = cpu_to_le32(arg->peer_eht_rx_mcs_set[i]); @@ -3646,15 +3646,15 @@ ath12k_fill_band_to_mac_param(struct ath12k_base *soc, arg[i].pdev_id = pdev->pdev_id; switch (pdev->cap.supported_bands) { - case WMI_HOST_WLAN_2G_5G_CAP: + case WMI_HOST_WLAN_2GHZ_5GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_2ghz_chan; arg[i].end_freq = hal_reg_cap->high_5ghz_chan; break; - case WMI_HOST_WLAN_2G_CAP: + case WMI_HOST_WLAN_2GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_2ghz_chan; arg[i].end_freq = hal_reg_cap->high_2ghz_chan; break; - case WMI_HOST_WLAN_5G_CAP: + case WMI_HOST_WLAN_5GHZ_CAP: arg[i].start_freq = hal_reg_cap->low_5ghz_chan; arg[i].end_freq = hal_reg_cap->high_5ghz_chan; break; @@ -4601,6 +4601,7 @@ static int ath12k_service_ready_ext_event(struct ath12k_base *ab, return 0; err: + kfree(svc_rdy_ext.mac_phy_caps); ath12k_wmi_free_dbring_caps(ab); return ret; } @@ -4699,7 +4700,7 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab, bands = pdev->cap.supported_bands; } - if (bands & WMI_HOST_WLAN_2G_CAP) { + if (bands & WMI_HOST_WLAN_2GHZ_CAP) { ath12k_wmi_eht_caps_parse(pdev, NL80211_BAND_2GHZ, caps->eht_cap_mac_info_2ghz, caps->eht_cap_phy_info_2ghz, @@ -4708,7 +4709,7 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab, caps->eht_cap_info_internal); } - if (bands & WMI_HOST_WLAN_5G_CAP) { + if (bands & WMI_HOST_WLAN_5GHZ_CAP) { ath12k_wmi_eht_caps_parse(pdev, NL80211_BAND_5GHZ, caps->eht_cap_mac_info_5ghz, caps->eht_cap_phy_info_5ghz, @@ -4922,7 +4923,7 @@ static u8 ath12k_wmi_ignore_num_extra_rules(struct ath12k_wmi_reg_rule_ext_param for (count = 0; count < num_reg_rules; count++) { start_freq = le32_get_bits(rule[count].freq_info, REG_RULE_START_FREQ); - if (start_freq >= ATH12K_MIN_6G_FREQ) + if (start_freq >= ATH12K_MIN_6GHZ_FREQ) num_invalid_5ghz_rules++; } @@ -4992,9 +4993,9 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) { num_6g_reg_rules_ap[i] = reg_info->num_6g_reg_rules_ap[i]; - if (num_6g_reg_rules_ap[i] > MAX_6G_REG_RULES) { + if (num_6g_reg_rules_ap[i] > MAX_6GHZ_REG_RULES) { ath12k_warn(ab, "Num 6G reg rules for AP mode(%d) exceeds max limit (num_6g_reg_rules_ap: %d, max_rules: %d)\n", - i, num_6g_reg_rules_ap[i], MAX_6G_REG_RULES); + i, num_6g_reg_rules_ap[i], MAX_6GHZ_REG_RULES); kfree(tb); return -EINVAL; } @@ -5015,9 +5016,9 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, reg_info->num_6g_reg_rules_cl[WMI_REG_VLP_AP][i]; total_reg_rules += num_6g_reg_rules_cl[WMI_REG_VLP_AP][i]; - if (num_6g_reg_rules_cl[WMI_REG_INDOOR_AP][i] > MAX_6G_REG_RULES || - num_6g_reg_rules_cl[WMI_REG_STD_POWER_AP][i] > MAX_6G_REG_RULES || - num_6g_reg_rules_cl[WMI_REG_VLP_AP][i] > MAX_6G_REG_RULES) { + if (num_6g_reg_rules_cl[WMI_REG_INDOOR_AP][i] > MAX_6GHZ_REG_RULES || + num_6g_reg_rules_cl[WMI_REG_STD_POWER_AP][i] > MAX_6GHZ_REG_RULES || + num_6g_reg_rules_cl[WMI_REG_VLP_AP][i] > MAX_6GHZ_REG_RULES) { ath12k_warn(ab, "Num 6g client reg rules exceeds max limit, for client(type: %d)\n", i); kfree(tb); @@ -6317,13 +6318,13 @@ static void ath12k_mgmt_rx_event(struct ath12k_base *ab, struct sk_buff *skb) if (rx_ev.status & WMI_RX_STATUS_ERR_MIC) status->flag |= RX_FLAG_MMIC_ERROR; - if (rx_ev.chan_freq >= ATH12K_MIN_6G_FREQ && - rx_ev.chan_freq <= ATH12K_MAX_6G_FREQ) { + if (rx_ev.chan_freq >= ATH12K_MIN_6GHZ_FREQ && + rx_ev.chan_freq <= ATH12K_MAX_6GHZ_FREQ) { status->band = NL80211_BAND_6GHZ; status->freq = rx_ev.chan_freq; } else if (rx_ev.channel >= 1 && rx_ev.channel <= 14) { status->band = NL80211_BAND_2GHZ; - } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH12K_MAX_5G_CHAN) { + } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH12K_MAX_5GHZ_CHAN) { status->band = NL80211_BAND_5GHZ; } else { /* Shouldn't happen unless list of advertised channels to diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 1ba33e30ddd279..be4ac91dd34f50 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -216,9 +216,9 @@ enum wmi_host_hw_mode_priority { }; enum WMI_HOST_WLAN_BAND { - WMI_HOST_WLAN_2G_CAP = 1, - WMI_HOST_WLAN_5G_CAP = 2, - WMI_HOST_WLAN_2G_5G_CAP = 3, + WMI_HOST_WLAN_2GHZ_CAP = 1, + WMI_HOST_WLAN_5GHZ_CAP = 2, + WMI_HOST_WLAN_2GHZ_5GHZ_CAP = 3, }; enum wmi_cmd_group { @@ -2690,8 +2690,8 @@ enum wmi_channel_width { * 2 - index for 160 MHz, first 3 bytes valid * 3 - index for 320 MHz, first 3 bytes valid */ -#define WMI_MAX_EHT_SUPP_MCS_2G_SIZE 2 -#define WMI_MAX_EHT_SUPP_MCS_5G_SIZE 4 +#define WMI_MAX_EHT_SUPP_MCS_2GHZ_SIZE 2 +#define WMI_MAX_EHT_SUPP_MCS_5GHZ_SIZE 4 #define WMI_EHTCAP_TXRX_MCS_NSS_IDX_80 0 #define WMI_EHTCAP_TXRX_MCS_NSS_IDX_160 1 @@ -2730,8 +2730,8 @@ struct ath12k_wmi_caps_ext_params { struct ath12k_wmi_ppe_threshold_params eht_ppet_2ghz; struct ath12k_wmi_ppe_threshold_params eht_ppet_5ghz; __le32 eht_cap_info_internal; - __le32 eht_supp_mcs_ext_2ghz[WMI_MAX_EHT_SUPP_MCS_2G_SIZE]; - __le32 eht_supp_mcs_ext_5ghz[WMI_MAX_EHT_SUPP_MCS_5G_SIZE]; + __le32 eht_supp_mcs_ext_2ghz[WMI_MAX_EHT_SUPP_MCS_2GHZ_SIZE]; + __le32 eht_supp_mcs_ext_5ghz[WMI_MAX_EHT_SUPP_MCS_5GHZ_SIZE]; __le32 eml_capability; __le32 mld_capability; } __packed; @@ -4108,7 +4108,7 @@ struct ath12k_wmi_eht_rate_set_params { #define MAX_REG_RULES 10 #define REG_ALPHA2_LEN 2 -#define MAX_6G_REG_RULES 5 +#define MAX_6GHZ_REG_RULES 5 enum wmi_start_event_param { WMI_VDEV_START_RESP_EVENT = 0, diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c index 547634f82183d6..81fa7cbad89213 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c @@ -290,6 +290,9 @@ void ath9k_htc_swba(struct ath9k_htc_priv *priv, struct ath_common *common = ath9k_hw_common(priv->ah); int slot; + if (!priv->cur_beacon_conf.enable_beacon) + return; + if (swba->beacon_pending != 0) { priv->beacon.bmisscnt++; if (priv->beacon.bmisscnt > BSTUCK_THRESHOLD) { diff --git a/drivers/net/wireless/ath/carl9170/fw.c b/drivers/net/wireless/ath/carl9170/fw.c index 4c1aecd1163cee..419f5530f885c2 100644 --- a/drivers/net/wireless/ath/carl9170/fw.c +++ b/drivers/net/wireless/ath/carl9170/fw.c @@ -15,7 +15,7 @@ #include "fwcmd.h" #include "version.h" -static const u8 otus_magic[4] = { OTUS_MAGIC }; +static const u8 otus_magic[4] __nonstring = { OTUS_MAGIC }; static const void *carl9170_fw_find_desc(struct ar9170 *ar, const u8 descid[4], const unsigned int len, const u8 compatible_revision) diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 4f01189b7c4b9c..6842c2b02b393f 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -2552,7 +2552,7 @@ static void at76_disconnect(struct usb_interface *interface) wiphy_info(priv->hw->wiphy, "disconnecting\n"); at76_delete_device(priv); - usb_put_dev(priv->udev); + usb_put_dev(interface_to_usbdev(interface)); dev_info(&interface->dev, "disconnected\n"); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index cfcf01eb0daa54..f26e4679e4ff02 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -561,8 +561,10 @@ struct brcmf_mp_device *brcmf_get_module_param(struct device *dev, if (!found) { /* No platform data for this device, try OF and DMI data */ brcmf_dmi_probe(settings, chip, chiprev); - if (brcmf_of_probe(dev, bus_type, settings) == -EPROBE_DEFER) + if (brcmf_of_probe(dev, bus_type, settings) == -EPROBE_DEFER) { + kfree(settings); return ERR_PTR(-EPROBE_DEFER); + } brcmf_acpi_probe(dev, bus_type, settings); } return settings; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 2821c27f317ee0..d06c724f63d9c6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -896,14 +896,16 @@ brcmf_usb_dl_writeimage(struct brcmf_usbdev_info *devinfo, u8 *fw, int fwlen) } /* 1) Prepare USB boot loader for runtime image */ - brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + err = brcmf_usb_dl_cmd(devinfo, DL_START, &state, sizeof(state)); + if (err) + goto fail; rdlstate = le32_to_cpu(state.state); rdlbytes = le32_to_cpu(state.bytes); /* 2) Check we are in the Waiting state */ if (rdlstate != DL_WAITING) { - brcmf_err("Failed to DL_START\n"); + brcmf_err("Invalid DL state: %u\n", rdlstate); err = -EINVAL; goto fail; } diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c index 670031fd60dc9f..59af36960f9c54 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c @@ -142,8 +142,6 @@ const struct iwl_cfg_trans_params iwl_sc_trans_cfg = { .ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US, }; -const char iwl_sp_name[] = "Intel(R) Wi-Fi 7 BE213 160MHz"; - const struct iwl_cfg iwl_cfg_sc = { .fw_name_mac = "sc", IWL_DEVICE_SC, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index b9bd89bfdd7480..acafee538b8ab0 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -2,7 +2,7 @@ /* * Copyright (C) 2005-2014, 2018-2021 Intel Corporation * Copyright (C) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #ifndef __IWL_CONFIG_H__ #define __IWL_CONFIG_H__ @@ -451,8 +451,11 @@ struct iwl_cfg { #define IWL_CFG_RF_ID_HR 0x7 #define IWL_CFG_RF_ID_HR1 0x4 -#define IWL_CFG_BW_NO_LIM (U16_MAX - 1) -#define IWL_CFG_BW_ANY U16_MAX +#define IWL_CFG_NO_160 0x1 +#define IWL_CFG_160 0x0 + +#define IWL_CFG_NO_320 0x1 +#define IWL_CFG_320 0x0 #define IWL_CFG_CORES_BT 0x0 #define IWL_CFG_CORES_BT_GNSS 0x5 @@ -464,7 +467,7 @@ struct iwl_cfg { #define IWL_CFG_IS_JACKET 0x1 #define IWL_SUBDEVICE_RF_ID(subdevice) ((u16)((subdevice) & 0x00F0) >> 4) -#define IWL_SUBDEVICE_BW_LIM(subdevice) ((u16)((subdevice) & 0x0200) >> 9) +#define IWL_SUBDEVICE_NO_160(subdevice) ((u16)((subdevice) & 0x0200) >> 9) #define IWL_SUBDEVICE_CORES(subdevice) ((u16)((subdevice) & 0x1C00) >> 10) struct iwl_dev_info { @@ -472,10 +475,10 @@ struct iwl_dev_info { u16 subdevice; u16 mac_type; u16 rf_type; - u16 bw_limit; u8 mac_step; u8 rf_step; u8 rf_id; + u8 no_160; u8 cores; u8 cdb; u8 jacket; @@ -489,7 +492,7 @@ extern const unsigned int iwl_dev_info_table_size; const struct iwl_dev_info * iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, - u8 jacket, u8 rf_id, u8 bw_limit, u8 cores, u8 rf_step); + u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step); extern const struct pci_device_id iwl_hw_card_ids[]; #endif @@ -550,7 +553,6 @@ extern const char iwl_ax231_name[]; extern const char iwl_ax411_name[]; extern const char iwl_fm_name[]; extern const char iwl_wh_name[]; -extern const char iwl_sp_name[]; extern const char iwl_gl_name[]; extern const char iwl_mtp_name[]; extern const char iwl_dr_name[]; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index be9e464c9b7b08..3ff493e920d284 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -148,6 +148,7 @@ * during a error FW error. */ #define CSR_FUNC_SCRATCH_INIT_VALUE (0x01010101) +#define CSR_FUNC_SCRATCH_POWER_OFF_MASK 0xFFFF /* Bits for CSR_HW_IF_CONFIG_REG */ #define CSR_HW_IF_CONFIG_REG_MSK_MAC_STEP_DASH (0x0000000F) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index cd1b0048bb6daf..c381511e9ec65b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2005-2014, 2018-2023, 2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2023 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -944,8 +944,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK); break; case NL80211_BAND_6GHZ: - if (!trans->reduced_cap_sku && - trans->bw_limit >= 320) { + if (!trans->reduced_cap_sku) { iftype_data->eht_cap.eht_cap_elem.phy_cap_info[0] |= IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; iftype_data->eht_cap.eht_cap_elem.phy_cap_info[1] |= @@ -1095,22 +1094,19 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, iftype_data->eht_cap.eht_mcs_nss_supp.bw._320.rx_tx_mcs13_max_nss = 0; } - if (trans->bw_limit < 160) + if (trans->no_160) iftype_data->he_cap.he_cap_elem.phy_cap_info[0] &= ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; - if (trans->bw_limit < 320 || trans->reduced_cap_sku) { + if (trans->reduced_cap_sku) { memset(&iftype_data->eht_cap.eht_mcs_nss_supp.bw._320, 0, sizeof(iftype_data->eht_cap.eht_mcs_nss_supp.bw._320)); - iftype_data->eht_cap.eht_cap_elem.phy_cap_info[2] &= - ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK; - } - - if (trans->reduced_cap_sku) { iftype_data->eht_cap.eht_mcs_nss_supp.bw._80.rx_tx_mcs13_max_nss = 0; iftype_data->eht_cap.eht_mcs_nss_supp.bw._160.rx_tx_mcs13_max_nss = 0; iftype_data->eht_cap.eht_cap_elem.phy_cap_info[8] &= ~IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA; + iftype_data->eht_cap.eht_cap_elem.phy_cap_info[2] &= + ~IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK; } } diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index c1607b6d0759e0..e7b2e08645efa7 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -21,6 +21,7 @@ struct iwl_trans_dev_restart_data { struct list_head list; unsigned int restart_count; time64_t last_error; + bool backoff; char name[]; }; @@ -125,13 +126,20 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) if (!data) return at_least; - if (ktime_get_boottime_seconds() - data->last_error >= + if (!data->backoff && + ktime_get_boottime_seconds() - data->last_error >= IWL_TRANS_RESET_OK_TIME) data->restart_count = 0; index = data->restart_count; - if (index >= ARRAY_SIZE(escalation_list)) + if (index >= ARRAY_SIZE(escalation_list)) { index = ARRAY_SIZE(escalation_list) - 1; + if (!data->backoff) { + data->backoff = true; + return IWL_RESET_MODE_BACKOFF; + } + data->backoff = false; + } return max(at_least, escalation_list[index]); } @@ -140,7 +148,8 @@ iwl_trans_determine_restart_mode(struct iwl_trans *trans) static void iwl_trans_restart_wk(struct work_struct *wk) { - struct iwl_trans *trans = container_of(wk, typeof(*trans), restart.wk); + struct iwl_trans *trans = container_of(wk, typeof(*trans), + restart.wk.work); struct iwl_trans_reprobe *reprobe; enum iwl_reset_mode mode; @@ -168,6 +177,12 @@ static void iwl_trans_restart_wk(struct work_struct *wk) return; mode = iwl_trans_determine_restart_mode(trans); + if (mode == IWL_RESET_MODE_BACKOFF) { + IWL_ERR(trans, "Too many device errors - delay next reset\n"); + queue_delayed_work(system_unbound_wq, &trans->restart.wk, + IWL_TRANS_RESET_DELAY); + return; + } iwl_trans_inc_restart_count(trans->dev); @@ -227,7 +242,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, trans->dev = dev; trans->num_rx_queues = 1; - INIT_WORK(&trans->restart.wk, iwl_trans_restart_wk); + INIT_DELAYED_WORK(&trans->restart.wk, iwl_trans_restart_wk); return trans; } @@ -271,7 +286,7 @@ int iwl_trans_init(struct iwl_trans *trans) void iwl_trans_free(struct iwl_trans *trans) { - cancel_work_sync(&trans->restart.wk); + cancel_delayed_work_sync(&trans->restart.wk); kmem_cache_destroy(trans->dev_cmd_pool); } @@ -403,7 +418,7 @@ void iwl_trans_op_mode_leave(struct iwl_trans *trans) iwl_trans_pcie_op_mode_leave(trans); - cancel_work_sync(&trans->restart.wk); + cancel_delayed_work_sync(&trans->restart.wk); trans->op_mode = NULL; @@ -540,7 +555,6 @@ void __releases(nic_access) iwl_trans_release_nic_access(struct iwl_trans *trans) { iwl_trans_pcie_release_nic_access(trans); - __release(nic_access); } IWL_EXPORT_SYMBOL(iwl_trans_release_nic_access); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 25fb4c50e38b1f..44a249a753ecf6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2018-2023, 2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2023 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -328,6 +328,7 @@ iwl_trans_get_rb_size_order(enum iwl_amsdu_size rb_size) case IWL_AMSDU_4K: return get_order(4 * 1024); case IWL_AMSDU_8K: + return get_order(8 * 1024); case IWL_AMSDU_12K: return get_order(16 * 1024); default: @@ -876,7 +877,7 @@ struct iwl_txq { * only valid for discrete (not integrated) NICs * @invalid_tx_cmd: invalid TX command buffer * @reduced_cap_sku: reduced capability supported SKU - * @bw_limit: the max bandwidth + * @no_160: device not supporting 160 MHz * @step_urm: STEP is in URM, no support for MCS>9 in 320 MHz * @restart: restart worker data * @restart.wk: restart worker @@ -911,8 +912,7 @@ struct iwl_trans { char hw_id_str[52]; u32 sku_id[3]; bool reduced_cap_sku; - u16 bw_limit; - bool step_urm; + u8 no_160:1, step_urm:1; u8 dsbr_urm_fw_dependent:1, dsbr_urm_permanent:1; @@ -962,7 +962,7 @@ struct iwl_trans { struct iwl_dma_ptr invalid_tx_cmd; struct { - struct work_struct wk; + struct delayed_work wk; struct iwl_fw_error_dump_mode mode; bool during_reset; } restart; @@ -1163,7 +1163,7 @@ static inline void iwl_trans_schedule_reset(struct iwl_trans *trans, */ trans->restart.during_reset = test_bit(STATUS_IN_SW_RESET, &trans->status); - queue_work(system_unbound_wq, &trans->restart.wk); + queue_delayed_work(system_unbound_wq, &trans->restart.wk, 0); } static inline void iwl_trans_fw_error(struct iwl_trans *trans, @@ -1262,6 +1262,9 @@ enum iwl_reset_mode { IWL_RESET_MODE_RESCAN, IWL_RESET_MODE_FUNC_RESET, IWL_RESET_MODE_PROD_RESET, + + /* keep last - special backoff value */ + IWL_RESET_MODE_BACKOFF, }; void iwl_trans_pcie_reset(struct iwl_trans *trans, enum iwl_reset_mode mode); diff --git a/drivers/net/wireless/intel/iwlwifi/mld/agg.c b/drivers/net/wireless/intel/iwlwifi/mld/agg.c index db9e0f04f4b77d..687a9450ac9840 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/agg.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/agg.c @@ -124,9 +124,9 @@ void iwl_mld_handle_bar_frame_release_notif(struct iwl_mld *mld, rcu_read_lock(); baid_data = rcu_dereference(mld->fw_id_to_ba[baid]); - if (!IWL_FW_CHECK(mld, !baid_data, - "Got valid BAID %d but not allocated, invalid BAR release!\n", - baid)) + if (IWL_FW_CHECK(mld, !baid_data, + "Got valid BAID %d but not allocated, invalid BAR release!\n", + baid)) goto out_unlock; if (IWL_FW_CHECK(mld, tid != baid_data->tid || diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index 5a7207accd8677..ee99298eebf595 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1099,7 +1099,8 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, if (!match) return; - netdetect_info->matches[netdetect_info->n_matches++] = match; + netdetect_info->matches[netdetect_info->n_matches] = match; + netdetect_info->n_matches++; /* We inverted the order of the SSIDs in the scan * request, so invert the index here. @@ -1116,9 +1117,11 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, for_each_set_bit(j, (unsigned long *)&matches[i].matching_channels[0], - sizeof(matches[i].matching_channels)) - match->channels[match->n_channels++] = + sizeof(matches[i].matching_channels)) { + match->channels[match->n_channels] = netdetect_cfg->channels[j]->center_freq; + match->n_channels++; + } } } @@ -1895,7 +1898,6 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) int link_id; int ret; bool fw_err = false; - bool keep_connection; lockdep_assert_wiphy(mld->wiphy); @@ -1965,7 +1967,7 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) iwl_mld_process_netdetect_res(mld, bss_vif, &resume_data); mld->netdetect = false; } else { - keep_connection = + bool keep_connection = iwl_mld_process_wowlan_status(mld, bss_vif, resume_data.wowlan_status); @@ -1973,11 +1975,10 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) if (keep_connection) iwl_mld_unblock_emlsr(mld, bss_vif, IWL_MLD_EMLSR_BLOCKED_WOWLAN); + else + ieee80211_resume_disconnect(bss_vif); } - if (!mld->netdetect && !keep_connection) - ieee80211_resume_disconnect(bss_vif); - goto out; err: diff --git a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c index 453ce2ba39d1f9..93f9f78e4276bd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c @@ -396,8 +396,8 @@ static ssize_t iwl_dbgfs_tas_get_status_read(struct iwl_mld *mld, char *buf, .data[0] = &cmd, }; struct iwl_dhc_tas_status_resp *resp = NULL; + u32 resp_len = 0; ssize_t pos = 0; - u32 resp_len; u32 status; int ret; @@ -949,8 +949,9 @@ void iwl_mld_add_vif_debugfs(struct ieee80211_hw *hw, snprintf(name, sizeof(name), "%pd", vif->debugfs_dir); snprintf(target, sizeof(target), "../../../%pd3/iwlmld", vif->debugfs_dir); - mld_vif->dbgfs_slink = - debugfs_create_symlink(name, mld->debugfs_dir, target); + if (!mld_vif->dbgfs_slink) + mld_vif->dbgfs_slink = + debugfs_create_symlink(name, mld->debugfs_dir, target); if (iwlmld_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && vif->type == NL80211_IFTYPE_STATION) { diff --git a/drivers/net/wireless/intel/iwlwifi/mld/fw.c b/drivers/net/wireless/intel/iwlwifi/mld/fw.c index 62da137e102479..4b083d447ee2f7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/fw.c @@ -333,19 +333,22 @@ int iwl_mld_load_fw(struct iwl_mld *mld) ret = iwl_trans_start_hw(mld->trans); if (ret) - return ret; + goto err; ret = iwl_mld_run_fw_init_sequence(mld); if (ret) - return ret; + goto err; ret = iwl_mld_init_mcc(mld); if (ret) - return ret; + goto err; mld->fw_status.running = true; return 0; +err: + iwl_mld_stop_fw(mld); + return ret; } void iwl_mld_stop_fw(struct iwl_mld *mld) @@ -358,6 +361,10 @@ void iwl_mld_stop_fw(struct iwl_mld *mld) iwl_trans_stop_device(mld->trans); + wiphy_work_cancel(mld->wiphy, &mld->async_handlers_wk); + + iwl_mld_purge_async_handlers_list(mld); + mld->fw_status.running = false; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.h b/drivers/net/wireless/intel/iwlwifi/mld/iface.h index d1d56b081bf634..ec14d0736cee6e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.h @@ -166,7 +166,7 @@ struct iwl_mld_vif { struct iwl_mld_emlsr emlsr; -#if CONFIG_PM_SLEEP +#ifdef CONFIG_PM_SLEEP struct iwl_mld_wowlan_data wowlan_data; #endif #ifdef CONFIG_IWLWIFI_DEBUGFS diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 6851064b82da18..68d97d3b8f0260 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -475,8 +475,8 @@ static int iwl_mld_mac80211_start(struct ieee80211_hw *hw) { struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); - int ret; bool in_d3 = false; + int ret = 0; lockdep_assert_wiphy(mld->wiphy); @@ -537,7 +537,8 @@ void iwl_mld_mac80211_stop(struct ieee80211_hw *hw, bool suspend) /* if the suspend flow fails the fw is in error. Stop it here, and it * will be started upon wakeup */ - if (!suspend || iwl_mld_no_wowlan_suspend(mld)) + if (!suspend || + (IS_ENABLED(CONFIG_PM_SLEEP) && iwl_mld_no_wowlan_suspend(mld))) iwl_mld_stop_fw(mld); /* HW is stopped, no more coming RX. OTOH, the worker can't run as the @@ -650,6 +651,7 @@ void iwl_mld_mac80211_remove_interface(struct ieee80211_hw *hw, #ifdef CONFIG_IWLWIFI_DEBUGFS debugfs_remove(iwl_mld_vif_from_mac80211(vif)->dbgfs_slink); + iwl_mld_vif_from_mac80211(vif)->dbgfs_slink = NULL; #endif iwl_mld_rm_vif(mld, vif); @@ -1943,6 +1945,7 @@ static void iwl_mld_sta_rc_update(struct ieee80211_hw *hw, } } +#ifdef CONFIG_PM_SLEEP static void iwl_mld_set_wakeup(struct ieee80211_hw *hw, bool enabled) { struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); @@ -1994,6 +1997,7 @@ static int iwl_mld_resume(struct ieee80211_hw *hw) return 0; } +#endif static int iwl_mld_alloc_ptk_pn(struct iwl_mld *mld, struct iwl_mld_sta *mld_sta, diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index d4a99ae64074f1..7a098942dc8021 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -75,6 +75,7 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, /* Setup async RX handling */ spin_lock_init(&mld->async_handlers_lock); + INIT_LIST_HEAD(&mld->async_handlers_list); wiphy_work_init(&mld->async_handlers_wk, iwl_mld_async_handlers_wk); @@ -414,9 +415,14 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, wiphy_unlock(mld->wiphy); rtnl_unlock(); iwl_fw_flush_dumps(&mld->fwrt); - goto free_hw; + goto err; } + /* We are about to stop the FW. Notifications may require an + * operational FW, so handle them all here before we stop. + */ + wiphy_work_flush(mld->wiphy, &mld->async_handlers_wk); + iwl_mld_stop_fw(mld); wiphy_unlock(mld->wiphy); @@ -455,7 +461,8 @@ iwl_op_mode_mld_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, iwl_mld_leds_exit(mld); free_nvm: kfree(mld->nvm_data); -free_hw: +err: + iwl_trans_op_mode_leave(mld->trans); ieee80211_free_hw(mld->hw); return ERR_PTR(ret); } @@ -631,7 +638,8 @@ iwl_mld_nic_error(struct iwl_op_mode *op_mode, * It might not actually be true that we'll restart, but the * setting doesn't matter if we're going to be unbound either. */ - if (type != IWL_ERR_TYPE_RESET_HS_TIMEOUT) + if (type != IWL_ERR_TYPE_RESET_HS_TIMEOUT && + mld->fw_status.running) mld->fw_status.in_hw_restart = true; } diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.h b/drivers/net/wireless/intel/iwlwifi/mld/mld.h index 5eceaaf7696db6..a4a16da6ebf3fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.h @@ -298,11 +298,6 @@ iwl_cleanup_mld(struct iwl_mld *mld) #endif iwl_mld_low_latency_restart_cleanup(mld); - - /* Empty the list of async notification handlers so we won't process - * notifications from the dead fw after the reconfig flow. - */ - iwl_mld_purge_async_handlers_list(mld); } enum iwl_power_scheme { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 068c58e9c1eb4e..c2729dab8e79e5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2,6 +2,7 @@ /****************************************************************************** * * Copyright(c) 2005 - 2014, 2018 - 2023 Intel Corporation. All rights reserved. + * Copyright(c) 2025 Intel Corporation * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH *****************************************************************************/ @@ -2709,6 +2710,7 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta, optimal_rate); iwl_mvm_hwrate_to_tx_rate_v1(last_ucode_rate, info->band, &txrc->reported_rate); + txrc->reported_rate.count = 1; } spin_unlock_bh(&lq_sta->pers.lock); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 93446c37400814..00056e76ea3dd6 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2005-2014, 2018-2025 Intel Corporation + * Copyright (C) 2005-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -552,17 +552,16 @@ MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids); EXPORT_SYMBOL_IF_IWLWIFI_KUNIT(iwl_hw_card_ids); #define _IWL_DEV_INFO(_device, _subdevice, _mac_type, _mac_step, _rf_type, \ - _rf_id, _rf_step, _bw_limit, _cores, _cdb, _cfg, _name) \ + _rf_id, _rf_step, _no_160, _cores, _cdb, _cfg, _name) \ { .device = (_device), .subdevice = (_subdevice), .cfg = &(_cfg), \ .name = _name, .mac_type = _mac_type, .rf_type = _rf_type, .rf_step = _rf_step, \ - .bw_limit = _bw_limit, .cores = _cores, .rf_id = _rf_id, \ + .no_160 = _no_160, .cores = _cores, .rf_id = _rf_id, \ .mac_step = _mac_step, .cdb = _cdb, .jacket = IWL_CFG_ANY } #define IWL_DEV_INFO(_device, _subdevice, _cfg, _name) \ _IWL_DEV_INFO(_device, _subdevice, IWL_CFG_ANY, IWL_CFG_ANY, \ - IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, \ - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, \ - _cfg, _name) + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, \ + IWL_CFG_ANY, _cfg, _name) VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { #if IS_ENABLED(CONFIG_IWLMVM) @@ -589,6 +588,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x7A70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x7AF0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), IWL_DEV_INFO(0x7AF0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x7F70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x7F70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name), IWL_DEV_INFO(0x7E40, 0x1691, iwl_cfg_ma, iwl_ax411_killer_1690s_name), @@ -725,66 +726,66 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_2ac_cfg_soc, iwl9560_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9270_160_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT_GNSS, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9270_name), _IWL_DEV_INFO(0x271B, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9162_160_name), _IWL_DEV_INFO(0x271B, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9162_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9260_160_name), _IWL_DEV_INFO(0x2526, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_TH, IWL_CFG_ANY, IWL_CFG_RF_TYPE_TH, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9260_2ac_cfg, iwl9260_name), /* Qu with Jf */ @@ -792,132 +793,132 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_b0_jf_b0_cfg, iwl9560_killer_1550i_name), /* Qu C step */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_qu_c0_jf_b0_cfg, iwl9560_killer_1550i_name), /* QuZ */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9462_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1551, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_killer_1550s_name), _IWL_DEV_INFO(IWL_CFG_ANY, 0x1552, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwl9560_quz_a0_jf_b0_cfg, iwl9560_killer_1550i_name), /* Qu with Hr */ @@ -925,189 +926,189 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_b0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_b0_hr_b0, iwl_ax203_name), /* Qu C step */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr_b0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_qu_c0_hr_b0, iwl_ax201_name), /* QuZ */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_quz_a0_hr1_b0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_quz_a0_hr_b0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_QUZ, SILICON_B_STEP, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_quz_a0_hr_b0, iwl_ax201_name), /* Ma */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_ma, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_ma, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_ma, iwl_ax231_name), /* So with Hr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax201_name), /* So-F with Hr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax203_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, - 80, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax101_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_so_a0_hr_a0, iwl_ax201_name), /* So-F with Gf */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_CDB, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_name), /* SoF with JF2 */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), /* SoF with JF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), /* So with GF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_CDB, + IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_CDB, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_name), /* So with JF2 */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), /* So with JF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, IWL_CFG_ANY, - 80, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), #endif /* CONFIG_IWLMVM */ @@ -1116,13 +1117,13 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, @@ -1134,119 +1135,104 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_wh_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax201_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BZ_W, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_bz, iwl_wh_name), /* Ga (Gl) */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_320, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_gl, iwl_gl_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_NO_CDB, + IWL_CFG_NO_320, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_gl, iwl_mtp_name), /* Sc */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc, iwl_sp_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC2, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc2, iwl_sp_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_ax211_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_fm_name), _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_NO_LIM, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_sc2f, iwl_wh_name), - _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_MAC_TYPE_SC2F, IWL_CFG_ANY, - IWL_CFG_RF_TYPE_WH, IWL_CFG_ANY, IWL_CFG_ANY, - 160, IWL_CFG_ANY, IWL_CFG_ANY, - iwl_cfg_sc2f, iwl_sp_name), /* Dr */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_DR, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_dr, iwl_dr_name), /* Br */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_BR, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, - IWL_CFG_BW_ANY, IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_br, iwl_br_name), #endif /* CONFIG_IWLMLD */ }; @@ -1398,7 +1384,7 @@ static int map_crf_id(struct iwl_trans *iwl_trans) VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info * iwl_pci_find_dev_info(u16 device, u16 subsystem_device, u16 mac_type, u8 mac_step, u16 rf_type, u8 cdb, - u8 jacket, u8 rf_id, u8 bw_limit, u8 cores, u8 rf_step) + u8 jacket, u8 rf_id, u8 no_160, u8 cores, u8 rf_step) { int num_devices = ARRAY_SIZE(iwl_dev_info_table); int i; @@ -1441,15 +1427,8 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device, dev_info->rf_id != rf_id) continue; - /* - * Check that bw_limit have the same "boolean" value since - * IWL_SUBDEVICE_BW_LIM can only return a boolean value and - * dev_info->bw_limit encodes a non-boolean value. - * dev_info->bw_limit == IWL_CFG_BW_NO_LIM must be equal to - * !bw_limit to have a match. - */ - if (dev_info->bw_limit != IWL_CFG_BW_ANY && - (dev_info->bw_limit == IWL_CFG_BW_NO_LIM) == !!bw_limit) + if (dev_info->no_160 != (u8)IWL_CFG_ANY && + dev_info->no_160 != no_160) continue; if (dev_info->cores != (u8)IWL_CFG_ANY && @@ -1587,13 +1566,13 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) CSR_HW_RFID_IS_CDB(iwl_trans->hw_rf_id), CSR_HW_RFID_IS_JACKET(iwl_trans->hw_rf_id), IWL_SUBDEVICE_RF_ID(pdev->subsystem_device), - IWL_SUBDEVICE_BW_LIM(pdev->subsystem_device), + IWL_SUBDEVICE_NO_160(pdev->subsystem_device), IWL_SUBDEVICE_CORES(pdev->subsystem_device), CSR_HW_RFID_STEP(iwl_trans->hw_rf_id)); if (dev_info) { iwl_trans->cfg = dev_info->cfg; iwl_trans->name = dev_info->name; - iwl_trans->bw_limit = dev_info->bw_limit; + iwl_trans->no_160 = dev_info->no_160 == IWL_CFG_NO_160; } #if IS_ENABLED(CONFIG_IWLMVM) @@ -1759,11 +1738,27 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Scratch value was altered, this means the device was powered off, we * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * so assume that any bits there mean that the device is usable. + * but not bits [15:8]. So if we have bits set in lower word, assume + * the device is alive. + * For older devices, just try silently to grab the NIC. */ - if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && - !iwl_read32(trans, CSR_FUNC_SCRATCH)) - device_was_powered_off = true; + if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & + CSR_FUNC_SCRATCH_POWER_OFF_MASK)) + device_was_powered_off = true; + } else { + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } + } if (restore || device_was_powered_off) { trans->state = IWL_TRANS_NO_FW; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 45460f93d24add..114a9195ad7f74 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -558,10 +558,10 @@ void iwl_trans_pcie_free(struct iwl_trans *trans); void iwl_trans_pcie_free_pnvm_dram_regions(struct iwl_dram_regions *dram_regions, struct device *dev); -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans); -#define _iwl_trans_pcie_grab_nic_access(trans) \ +bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent); +#define _iwl_trans_pcie_grab_nic_access(trans, silent) \ __cond_lock(nic_access_nobh, \ - likely(__iwl_trans_pcie_grab_nic_access(trans))) + likely(__iwl_trans_pcie_grab_nic_access(trans, silent))) void iwl_trans_pcie_check_product_reset_status(struct pci_dev *pdev); void iwl_trans_pcie_check_product_reset_mode(struct pci_dev *pdev); @@ -1105,7 +1105,8 @@ void iwl_trans_pcie_set_bits_mask(struct iwl_trans *trans, u32 reg, int iwl_trans_pcie_read_config32(struct iwl_trans *trans, u32 ofs, u32 *val); bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans); -void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans); +void __releases(nic_access_nobh) +iwl_trans_pcie_release_nic_access(struct iwl_trans *trans); /* transport gen 1 exported functions */ void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index 3ece34e30d5806..472f26f83ba833 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -147,8 +147,14 @@ static void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans) return; if (trans->state >= IWL_TRANS_FW_STARTED && - trans_pcie->fw_reset_handshake) + trans_pcie->fw_reset_handshake) { + /* + * Reset handshake can dump firmware on timeout, but that + * should assume that the firmware is already dead. + */ + trans->state = IWL_TRANS_NO_FW; iwl_trans_pcie_fw_reset_handshake(trans); + } trans_pcie->is_down = true; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index c917ed4c19bcc3..102a6123bba0e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2351,7 +2351,8 @@ void iwl_trans_pcie_reset(struct iwl_trans *trans, enum iwl_reset_mode mode) struct iwl_trans_pcie_removal *removal; char _msg = 0, *msg = &_msg; - if (WARN_ON(mode < IWL_RESET_MODE_REMOVE_ONLY)) + if (WARN_ON(mode < IWL_RESET_MODE_REMOVE_ONLY || + mode == IWL_RESET_MODE_BACKOFF)) return; if (test_bit(STATUS_TRANS_DEAD, &trans->status)) @@ -2405,7 +2406,7 @@ EXPORT_SYMBOL(iwl_trans_pcie_reset); * This version doesn't disable BHs but rather assumes they're * already disabled. */ -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) +bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent) { int ret; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -2457,6 +2458,11 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) if (unlikely(ret < 0)) { u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL); + if (silent) { + spin_unlock(&trans_pcie->reg_lock); + return false; + } + WARN_ONCE(1, "Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n", cntrl); @@ -2488,7 +2494,7 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) bool ret; local_bh_disable(); - ret = __iwl_trans_pcie_grab_nic_access(trans); + ret = __iwl_trans_pcie_grab_nic_access(trans, false); if (ret) { /* keep BHs disabled until iwl_trans_pcie_release_nic_access */ return ret; @@ -2497,7 +2503,8 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) return false; } -void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) +void __releases(nic_access_nobh) +iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -2524,6 +2531,7 @@ void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans) * scheduled on different CPUs (after we drop reg_lock). */ out: + __release(nic_access_nobh); spin_unlock_bh(&trans_pcie->reg_lock); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index bb90bcfc676399..9fc4e98693ebf8 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1021,7 +1021,7 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans, * returned. This needs to be done only on NICs that have * apmg_wake_up_wa set (see above.) */ - if (!_iwl_trans_pcie_grab_nic_access(trans)) + if (!_iwl_trans_pcie_grab_nic_access(trans, false)) return -EIO; /* diff --git a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c index 7ef5e89c6af27c..d0bda23c628aa6 100644 --- a/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c +++ b/drivers/net/wireless/intel/iwlwifi/tests/devinfo.c @@ -2,7 +2,7 @@ /* * KUnit tests for the iwlwifi device info table * - * Copyright (C) 2023-2025 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation */ #include #include @@ -13,9 +13,9 @@ MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); static void iwl_pci_print_dev_info(const char *pfx, const struct iwl_dev_info *di) { - printk(KERN_DEBUG "%sdev=%.4x,subdev=%.4x,mac_type=%.4x,mac_step=%.4x,rf_type=%.4x,cdb=%d,jacket=%d,rf_id=%.2x,bw_limit=%d,cores=%.2x\n", + printk(KERN_DEBUG "%sdev=%.4x,subdev=%.4x,mac_type=%.4x,mac_step=%.4x,rf_type=%.4x,cdb=%d,jacket=%d,rf_id=%.2x,no_160=%d,cores=%.2x\n", pfx, di->device, di->subdevice, di->mac_type, di->mac_step, - di->rf_type, di->cdb, di->jacket, di->rf_id, di->bw_limit, + di->rf_type, di->cdb, di->jacket, di->rf_id, di->no_160, di->cores); } @@ -31,13 +31,8 @@ static void devinfo_table_order(struct kunit *test) di->mac_type, di->mac_step, di->rf_type, di->cdb, di->jacket, di->rf_id, - di->bw_limit != IWL_CFG_BW_NO_LIM, - di->cores, di->rf_step); - if (!ret) { - iwl_pci_print_dev_info("No entry found for: ", di); - KUNIT_FAIL(test, - "No entry found for entry at index %d\n", idx); - } else if (ret != di) { + di->no_160, di->cores, di->rf_step); + if (ret != di) { iwl_pci_print_dev_info("searched: ", di); iwl_pci_print_dev_info("found: ", ret); KUNIT_FAIL(test, diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index 738bafc3749b0a..66f0f5377ac181 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -403,14 +403,12 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && bss_desc->bcn_ht_oper->ht_param & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) { - chan_list->chan_scan_param[0].radio_type |= - CHAN_BW_40MHZ << 2; + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) SET_SECONDARYCHAN(chan_list->chan_scan_param[0]. radio_type, (bss_desc->bcn_ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET)); - } + *buffer += struct_size(chan_list, chan_scan_param, 1); ret_len += struct_size(chan_list, chan_scan_param, 1); } diff --git a/drivers/net/wireless/mediatek/mt76/channel.c b/drivers/net/wireless/mediatek/mt76/channel.c index e7b839e7429034..cc2d888e3f17a5 100644 --- a/drivers/net/wireless/mediatek/mt76/channel.c +++ b/drivers/net/wireless/mediatek/mt76/channel.c @@ -302,11 +302,13 @@ void mt76_put_vif_phy_link(struct mt76_phy *phy, struct ieee80211_vif *vif, struct mt76_vif_link *mlink) { struct mt76_dev *dev = phy->dev; - struct mt76_vif_data *mvif = mlink->mvif; + struct mt76_vif_data *mvif; if (IS_ERR_OR_NULL(mlink) || !mlink->offchannel) return; + mvif = mlink->mvif; + rcu_assign_pointer(mvif->offchannel_link, NULL); dev->drv->vif_link_remove(phy, vif, &vif->bss_conf, mlink); kfree(mlink); diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 844af16ee55131..35b4ec91979e6a 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -1011,6 +1011,7 @@ void mt76_dma_cleanup(struct mt76_dev *dev) int i; mt76_worker_disable(&dev->tx_worker); + napi_disable(&dev->tx_napi); netif_napi_del(&dev->tx_napi); for (i = 0; i < ARRAY_SIZE(dev->phys); i++) { diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index b88d7e10742ee6..e9605dc222910f 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -449,8 +449,10 @@ mt76_phy_init(struct mt76_phy *phy, struct ieee80211_hw *hw) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_AQL); - wiphy->available_antennas_tx = phy->antenna_mask; - wiphy->available_antennas_rx = phy->antenna_mask; + if (!wiphy->available_antennas_tx) + wiphy->available_antennas_tx = phy->antenna_mask; + if (!wiphy->available_antennas_rx) + wiphy->available_antennas_rx = phy->antenna_mask; wiphy->sar_capa = &mt76_sar_capa; phy->frp = devm_kcalloc(dev->dev, wiphy->sar_capa->num_freq_ranges, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index 876f0692850a2e..9c4d5cea0c42e9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -651,6 +651,9 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, wed->wlan.base = devm_ioremap(dev->mt76.dev, pci_resource_start(pci_dev, 0), pci_resource_len(pci_dev, 0)); + if (!wed->wlan.base) + return -ENOMEM; + wed->wlan.phy_base = pci_resource_start(pci_dev, 0); wed->wlan.wpdma_int = pci_resource_start(pci_dev, 0) + MT_INT_WED_SOURCE_CSR; @@ -678,6 +681,9 @@ int mt7915_mmio_wed_init(struct mt7915_dev *dev, void *pdev_ptr, wed->wlan.bus_type = MTK_WED_BUS_AXI; wed->wlan.base = devm_ioremap(dev->mt76.dev, res->start, resource_size(res)); + if (!wed->wlan.base) + return -ENOMEM; + wed->wlan.phy_base = res->start; wed->wlan.wpdma_int = res->start + MT_INT_SOURCE_CSR; wed->wlan.wpdma_mask = res->start + MT_INT_MASK_CSR; diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/init.c b/drivers/net/wireless/mediatek/mt76/mt7925/init.c index 63cb08f4d87cc4..79639be0d29aca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/init.c @@ -89,7 +89,7 @@ void mt7925_regd_be_ctrl(struct mt792x_dev *dev, u8 *alpha2) } /* Check the last one */ - if (rule->flag && BIT(0)) + if (rule->flag & BIT(0)) break; pos += sizeof(*rule); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c index e61da76b2097bf..dea5b9bcb3fdfb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/mcu.c @@ -783,7 +783,7 @@ int mt7925_mcu_fw_log_2_host(struct mt792x_dev *dev, u8 ctrl) int ret; ret = mt76_mcu_send_and_get_msg(&dev->mt76, MCU_UNI_CMD(WSYS_CONFIG), - &req, sizeof(req), false, NULL); + &req, sizeof(req), true, NULL); return ret; } @@ -1424,7 +1424,7 @@ int mt7925_mcu_set_eeprom(struct mt792x_dev *dev) }; return mt76_mcu_send_and_get_msg(&dev->mt76, MCU_UNI_CMD(EFUSE_CTRL), - &req, sizeof(req), false, NULL); + &req, sizeof(req), true, NULL); } EXPORT_SYMBOL_GPL(mt7925_mcu_set_eeprom); @@ -1924,14 +1924,14 @@ mt7925_mcu_sta_cmd(struct mt76_phy *phy, mt7925_mcu_sta_mld_tlv(skb, info->vif, info->link_sta->sta); mt7925_mcu_sta_eht_mld_tlv(skb, info->vif, info->link_sta->sta); } - - mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta); } if (!info->enable) { mt7925_mcu_sta_remove_tlv(skb); mt76_connac_mcu_add_tlv(skb, STA_REC_MLD_OFF, sizeof(struct tlv)); + } else { + mt7925_mcu_sta_hdr_trans_tlv(skb, info->vif, info->link_sta); } return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true); @@ -2046,8 +2046,6 @@ int mt7925_mcu_set_sniffer(struct mt792x_dev *dev, struct ieee80211_vif *vif, }, }; - mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD(SNIFFER), &req, sizeof(req), true); - return mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD(SNIFFER), &req, sizeof(req), true); } @@ -2764,7 +2762,7 @@ int mt7925_mcu_set_dbdc(struct mt76_phy *phy, bool enable) conf->band = 0; /* unused */ err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SET_DBDC_PARMS), - false); + true); return err; } @@ -2790,6 +2788,9 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, struct tlv *tlv; int max_len; + if (test_bit(MT76_HW_SCANNING, &phy->state)) + return -EBUSY; + max_len = sizeof(*hdr) + sizeof(*req) + sizeof(*ssid) + sizeof(*bssid) + sizeof(*chan_info) + sizeof(*misc) + sizeof(*ie); @@ -2869,7 +2870,7 @@ int mt7925_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif, } err = mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), - false); + true); if (err < 0) clear_bit(MT76_HW_SCANNING, &phy->state); @@ -2975,7 +2976,7 @@ int mt7925_mcu_sched_scan_req(struct mt76_phy *phy, } return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), - false); + true); } EXPORT_SYMBOL_GPL(mt7925_mcu_sched_scan_req); @@ -3011,7 +3012,7 @@ mt7925_mcu_sched_scan_enable(struct mt76_phy *phy, clear_bit(MT76_HW_SCHED_SCANNING, &phy->state); return mt76_mcu_skb_send_msg(mdev, skb, MCU_UNI_CMD(SCAN_REQ), - false); + true); } int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy, @@ -3050,7 +3051,7 @@ int mt7925_mcu_cancel_hw_scan(struct mt76_phy *phy, } return mt76_mcu_send_msg(phy->dev, MCU_UNI_CMD(SCAN_REQ), - &req, sizeof(req), false); + &req, sizeof(req), true); } EXPORT_SYMBOL_GPL(mt7925_mcu_cancel_hw_scan); @@ -3155,7 +3156,7 @@ int mt7925_mcu_set_channel_domain(struct mt76_phy *phy) memcpy(__skb_push(skb, sizeof(req)), &req, sizeof(req)); return mt76_mcu_skb_send_msg(dev, skb, MCU_UNI_CMD(SET_DOMAIN_INFO), - false); + true); } EXPORT_SYMBOL_GPL(mt7925_mcu_set_channel_domain); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c index 69a7d9b2e38bd7..4b68d2fc5e0949 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/dma.c @@ -493,7 +493,7 @@ int mt7996_dma_init(struct mt7996_dev *dev) ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU], MT_RXQ_ID(MT_RXQ_MCU), MT7996_RX_MCU_RING_SIZE, - MT_RX_BUF_SIZE, + MT7996_RX_MCU_BUF_SIZE, MT_RXQ_RING_BASE(MT_RXQ_MCU)); if (ret) return ret; @@ -502,7 +502,7 @@ int mt7996_dma_init(struct mt7996_dev *dev) ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU_WA], MT_RXQ_ID(MT_RXQ_MCU_WA), MT7996_RX_MCU_RING_SIZE_WA, - MT_RX_BUF_SIZE, + MT7996_RX_MCU_BUF_SIZE, MT_RXQ_RING_BASE(MT_RXQ_MCU_WA)); if (ret) return ret; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c index 53dfac02f8af0b..f0c76aac175dff 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/eeprom.c @@ -304,6 +304,7 @@ int mt7996_eeprom_parse_hw_cap(struct mt7996_dev *dev, struct mt7996_phy *phy) phy->has_aux_rx = true; mphy->antenna_mask = BIT(nss) - 1; + phy->orig_antenna_mask = mphy->antenna_mask; mphy->chainmask = (BIT(path) - 1) << dev->chainshift[band_idx]; phy->orig_chainmask = mphy->chainmask; dev->chainmask |= mphy->chainmask; diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/init.c b/drivers/net/wireless/mediatek/mt76/mt7996/init.c index 6b660424aedc31..4906b0ecc73e02 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/init.c @@ -217,6 +217,9 @@ static int mt7996_thermal_init(struct mt7996_phy *phy) name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7996_%s.%d", wiphy_name(wiphy), phy->mt76->band_idx); + if (!name) + return -ENOMEM; + snprintf(cname, sizeof(cname), "cooling_device%d", phy->mt76->band_idx); cdev = thermal_cooling_device_register(name, phy, &mt7996_thermal_ops); @@ -1113,12 +1116,12 @@ mt7996_set_stream_he_txbf_caps(struct mt7996_phy *phy, c = IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE; - if (is_mt7996(phy->mt76->dev)) - c |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 | - (IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4 * non_2g); - else + if (is_mt7992(phy->mt76->dev)) c |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5 | (IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5 * non_2g); + else + c |= IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 | + (IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4 * non_2g); elem->phy_cap_info[4] |= c; @@ -1318,6 +1321,9 @@ mt7996_init_eht_caps(struct mt7996_phy *phy, enum nl80211_band band, u8_encode_bits(IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454, IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK); + eht_cap_elem->mac_cap_info[1] |= + IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK; + eht_cap_elem->phy_cap_info[0] = IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI | IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER | diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 91c64e3a0860ff..a3295b22523a61 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -68,11 +68,13 @@ static int mt7996_start(struct ieee80211_hw *hw) static void mt7996_stop_phy(struct mt7996_phy *phy) { - struct mt7996_dev *dev = phy->dev; + struct mt7996_dev *dev; if (!phy || !test_bit(MT76_STATE_RUNNING, &phy->mt76->state)) return; + dev = phy->dev; + cancel_delayed_work_sync(&phy->mt76->mac_work); mutex_lock(&dev->mt76.mutex); @@ -414,11 +416,13 @@ static void mt7996_phy_set_rxfilter(struct mt7996_phy *phy) static void mt7996_set_monitor(struct mt7996_phy *phy, bool enabled) { - struct mt7996_dev *dev = phy->dev; + struct mt7996_dev *dev; if (!phy) return; + dev = phy->dev; + if (enabled == !(phy->rxfilter & MT_WF_RFCR_DROP_OTHER_UC)) return; @@ -998,16 +1002,22 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif, continue; link_conf = link_conf_dereference_protected(vif, link_id); - if (!link_conf) + if (!link_conf) { + err = -EINVAL; goto error_unlink; + } link = mt7996_vif_link(dev, vif, link_id); - if (!link) + if (!link) { + err = -EINVAL; goto error_unlink; + } link_sta = link_sta_dereference_protected(sta, link_id); - if (!link_sta) + if (!link_sta) { + err = -EINVAL; goto error_unlink; + } err = mt7996_mac_sta_init_link(dev, link_conf, link_sta, link, link_id); @@ -1518,7 +1528,8 @@ mt7996_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant) u8 shift = dev->chainshift[band_idx]; phy->mt76->chainmask = tx_ant & phy->orig_chainmask; - phy->mt76->antenna_mask = phy->mt76->chainmask >> shift; + phy->mt76->antenna_mask = (phy->mt76->chainmask >> shift) & + phy->orig_antenna_mask; mt76_set_stream_caps(phy->mt76, true); mt7996_set_stream_vht_txbf_caps(phy); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c index 13b188e281bdb9..af9169030bad99 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c @@ -323,6 +323,9 @@ int mt7996_mmio_wed_init(struct mt7996_dev *dev, void *pdev_ptr, wed->wlan.base = devm_ioremap(dev->mt76.dev, pci_resource_start(pci_dev, 0), pci_resource_len(pci_dev, 0)); + if (!wed->wlan.base) + return -ENOMEM; + wed->wlan.phy_base = pci_resource_start(pci_dev, 0); if (hif2) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h index 43e646ed6094cb..77605403b39661 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mt7996.h @@ -29,6 +29,9 @@ #define MT7996_RX_RING_SIZE 1536 #define MT7996_RX_MCU_RING_SIZE 512 #define MT7996_RX_MCU_RING_SIZE_WA 1024 +/* scatter-gather of mcu event is not supported in connac3 */ +#define MT7996_RX_MCU_BUF_SIZE (2048 + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MT7996_FIRMWARE_WA "mediatek/mt7996/mt7996_wa.bin" #define MT7996_FIRMWARE_WM "mediatek/mt7996/mt7996_wm.bin" @@ -293,6 +296,7 @@ struct mt7996_phy { struct mt76_channel_state state_ts; u16 orig_chainmask; + u16 orig_antenna_mask; bool has_aux_rx; bool counter_reset; diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index eae93efa615044..82d1bf7edba20d 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -102,7 +102,6 @@ int plfxlc_mac_init_hw(struct ieee80211_hw *hw) void plfxlc_mac_release(struct plfxlc_mac *mac) { plfxlc_chip_release(&mac->chip); - lockdep_assert_held(&mac->lock); } int plfxlc_op_start(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c index c929db1e53ca63..64904278ddad7d 100644 --- a/drivers/net/wireless/realtek/rtw88/coex.c +++ b/drivers/net/wireless/realtek/rtw88/coex.c @@ -309,7 +309,7 @@ static void rtw_coex_tdma_timer_base(struct rtw_dev *rtwdev, u8 type) { struct rtw_coex *coex = &rtwdev->coex; struct rtw_coex_stat *coex_stat = &coex->stat; - u8 para[2] = {0}; + u8 para[6] = {}; u8 times; u16 tbtt_interval = coex_stat->wl_beacon_interval; diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index 5e53e0db177efe..8937a7b656edb1 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -3951,7 +3951,8 @@ static void rtw8822c_dpk_cal_coef1(struct rtw_dev *rtwdev) rtw_write32(rtwdev, REG_NCTL0, 0x00001148); rtw_write32(rtwdev, REG_NCTL0, 0x00001149); - check_hw_ready(rtwdev, 0x2d9c, MASKBYTE0, 0x55); + if (!check_hw_ready(rtwdev, 0x2d9c, MASKBYTE0, 0x55)) + rtw_warn(rtwdev, "DPK stuck, performance may be suboptimal"); rtw_write8(rtwdev, 0x1b10, 0x0); rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SUBPAGE, 0x0000000c); diff --git a/drivers/net/wireless/realtek/rtw88/sdio.c b/drivers/net/wireless/realtek/rtw88/sdio.c index 6209a49312f176..410f637b1add58 100644 --- a/drivers/net/wireless/realtek/rtw88/sdio.c +++ b/drivers/net/wireless/realtek/rtw88/sdio.c @@ -718,10 +718,7 @@ static u8 rtw_sdio_get_tx_qsel(struct rtw_dev *rtwdev, struct sk_buff *skb, case RTW_TX_QUEUE_H2C: return TX_DESC_QSEL_H2C; case RTW_TX_QUEUE_MGMT: - if (rtw_chip_wcpu_11n(rtwdev)) - return TX_DESC_QSEL_HIGH; - else - return TX_DESC_QSEL_MGMT; + return TX_DESC_QSEL_MGMT; case RTW_TX_QUEUE_HI0: return TX_DESC_QSEL_HIGH; default: @@ -1227,10 +1224,7 @@ static void rtw_sdio_process_tx_queue(struct rtw_dev *rtwdev, return; } - if (queue <= RTW_TX_QUEUE_VO) - rtw_sdio_indicate_tx_status(rtwdev, skb); - else - dev_kfree_skb_any(skb); + rtw_sdio_indicate_tx_status(rtwdev, skb); } static void rtw_sdio_tx_handler(struct work_struct *work) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 8643b17866f897..6c52b0425f2ea9 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -5477,7 +5477,7 @@ int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num, return 0; } -#define RTW89_SCAN_DELAY_TSF_UNIT 104800 +#define RTW89_SCAN_DELAY_TSF_UNIT 1000000 int rtw89_fw_h2c_scan_offload_ax(struct rtw89_dev *rtwdev, struct rtw89_scan_option *option, struct rtw89_vif_link *rtwvif_link, diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c index c2fe5a898dc717..064f6a94010731 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.c +++ b/drivers/net/wireless/realtek/rtw89/pci.c @@ -228,7 +228,7 @@ int rtw89_pci_sync_skb_for_device_and_validate_rx_info(struct rtw89_dev *rtwdev, struct sk_buff *skb) { struct rtw89_pci_rx_info *rx_info = RTW89_PCI_RX_SKB_CB(skb); - int rx_tag_retry = 100; + int rx_tag_retry = 1000; int ret; do { @@ -3105,17 +3105,26 @@ static bool rtw89_pci_is_dac_compatible_bridge(struct rtw89_dev *rtwdev) return false; } -static void rtw89_pci_cfg_dac(struct rtw89_dev *rtwdev) +static int rtw89_pci_cfg_dac(struct rtw89_dev *rtwdev, bool force) { struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv; + struct pci_dev *pdev = rtwpci->pdev; + int ret; + u8 val; - if (!rtwpci->enable_dac) - return; + if (!rtwpci->enable_dac && !force) + return 0; if (!rtw89_pci_chip_is_manual_dac(rtwdev)) - return; + return 0; - rtw89_pci_config_byte_set(rtwdev, RTW89_PCIE_L1_CTRL, RTW89_PCIE_BIT_EN_64BITS); + /* Configure DAC only via PCI config API, not DBI interfaces */ + ret = pci_read_config_byte(pdev, RTW89_PCIE_L1_CTRL, &val); + if (ret) + return ret; + + val |= RTW89_PCIE_BIT_EN_64BITS; + return pci_write_config_byte(pdev, RTW89_PCIE_L1_CTRL, val); } static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev, @@ -3133,13 +3142,16 @@ static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev, } if (!rtw89_pci_is_dac_compatible_bridge(rtwdev)) - goto no_dac; + goto try_dac_done; ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36)); if (!ret) { - rtwpci->enable_dac = true; - rtw89_pci_cfg_dac(rtwdev); - } else { + ret = rtw89_pci_cfg_dac(rtwdev, true); + if (!ret) { + rtwpci->enable_dac = true; + goto try_dac_done; + } + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { rtw89_err(rtwdev, @@ -3147,7 +3159,7 @@ static int rtw89_pci_setup_mapping(struct rtw89_dev *rtwdev, goto err_release_regions; } } -no_dac: +try_dac_done: resource_len = pci_resource_len(pdev, bar_id); rtwpci->mmap = pci_iomap(pdev, bar_id, resource_len); @@ -4302,7 +4314,7 @@ static void rtw89_pci_l2_hci_ldo(struct rtw89_dev *rtwdev) void rtw89_pci_basic_cfg(struct rtw89_dev *rtwdev, bool resume) { if (resume) - rtw89_pci_cfg_dac(rtwdev); + rtw89_pci_cfg_dac(rtwdev, false); rtw89_pci_disable_eq(rtwdev); rtw89_pci_filter_out(rtwdev); diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c index 474b603c121cba..adb4840b048932 100644 --- a/drivers/net/wireless/ti/wl1251/tx.c +++ b/drivers/net/wireless/ti/wl1251/tx.c @@ -342,8 +342,10 @@ void wl1251_tx_work(struct work_struct *work) while ((skb = skb_dequeue(&wl->tx_queue))) { if (!woken_up) { ret = wl1251_ps_elp_wakeup(wl); - if (ret < 0) + if (ret < 0) { + skb_queue_head(&wl->tx_queue, skb); goto out; + } woken_up = true; } diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index 8755c5e6a65b30..c814fbd756a1e7 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -550,8 +550,8 @@ static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id, struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); struct mhi_mbim_context *mbim = ctxt; - link->session = if_id; link->mbim = mbim; + link->session = mhi_mbim_get_link_mux_id(link->mbim->mdev->mhi_cntrl) + if_id; link->ndev = ndev; u64_stats_init(&link->rx_syncp); u64_stats_init(&link->tx_syncp); @@ -607,7 +607,7 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id { struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; struct mhi_mbim_context *mbim; - int err, link_id; + int err; mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL); if (!mbim) @@ -628,11 +628,8 @@ static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id /* Number of transfer descriptors determines size of the queue */ mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); - /* Get the corresponding mux_id from mhi */ - link_id = mhi_mbim_get_link_mux_id(cntrl); - /* Register wwan link ops with MHI controller representing WWAN instance */ - return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, link_id); + return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0); } static void mhi_mbim_remove(struct mhi_device *mhi_dev) diff --git a/drivers/net/wwan/t7xx/t7xx_netdev.c b/drivers/net/wwan/t7xx/t7xx_netdev.c index 91fa082e9cab80..fc0a7cb181df2c 100644 --- a/drivers/net/wwan/t7xx/t7xx_netdev.c +++ b/drivers/net/wwan/t7xx/t7xx_netdev.c @@ -302,7 +302,7 @@ static int t7xx_ccmni_wwan_newlink(void *ctxt, struct net_device *dev, u32 if_id ccmni->ctlb = ctlb; ccmni->dev = dev; atomic_set(&ccmni->usage, 0); - ctlb->ccmni_inst[if_id] = ccmni; + WRITE_ONCE(ctlb->ccmni_inst[if_id], ccmni); ret = register_netdevice(dev); if (ret) @@ -324,6 +324,7 @@ static void t7xx_ccmni_wwan_dellink(void *ctxt, struct net_device *dev, struct l if (WARN_ON(ctlb->ccmni_inst[if_id] != ccmni)) return; + WRITE_ONCE(ctlb->ccmni_inst[if_id], NULL); unregister_netdevice(dev); } @@ -419,7 +420,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu skb_cb = T7XX_SKB_CB(skb); netif_id = skb_cb->netif_idx; - ccmni = ccmni_ctlb->ccmni_inst[netif_id]; + ccmni = READ_ONCE(ccmni_ctlb->ccmni_inst[netif_id]); if (!ccmni) { dev_kfree_skb(skb); return; @@ -441,7 +442,7 @@ static void t7xx_ccmni_recv_skb(struct t7xx_ccmni_ctrl *ccmni_ctlb, struct sk_bu static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno) { - struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0]; + struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]); struct netdev_queue *net_queue; if (netif_running(ccmni->dev) && atomic_read(&ccmni->usage) > 0) { @@ -453,7 +454,7 @@ static void t7xx_ccmni_queue_tx_irq_notify(struct t7xx_ccmni_ctrl *ctlb, int qno static void t7xx_ccmni_queue_tx_full_notify(struct t7xx_ccmni_ctrl *ctlb, int qno) { - struct t7xx_ccmni *ccmni = ctlb->ccmni_inst[0]; + struct t7xx_ccmni *ccmni = READ_ONCE(ctlb->ccmni_inst[0]); struct netdev_queue *net_queue; if (atomic_read(&ccmni->usage) > 0) { @@ -471,7 +472,7 @@ static void t7xx_ccmni_queue_state_notify(struct t7xx_pci_dev *t7xx_dev, if (ctlb->md_sta != MD_STATE_READY) return; - if (!ctlb->ccmni_inst[0]) { + if (!READ_ONCE(ctlb->ccmni_inst[0])) { dev_warn(&t7xx_dev->pdev->dev, "No netdev registered yet\n"); return; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index fc52d5c4c69b70..5091e1fa4a0df6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -985,20 +985,27 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_TX: - get_page(pdata); xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + trace_xdp_exception(queue->info->netdev, prog, act); + break; + } + get_page(pdata); err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0); - if (unlikely(!err)) + if (unlikely(err <= 0)) { + if (err < 0) + trace_xdp_exception(queue->info->netdev, prog, act); xdp_return_frame_rx_napi(xdpf); - else if (unlikely(err < 0)) - trace_xdp_exception(queue->info->netdev, prog, act); + } break; case XDP_REDIRECT: get_page(pdata); err = xdp_do_redirect(queue->info->netdev, xdp, prog); *need_xdp_flush = true; - if (unlikely(err)) + if (unlikely(err)) { trace_xdp_exception(queue->info->netdev, prog, act); + xdp_return_buff(xdp); + } break; case XDP_PASS: case XDP_DROP: diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index d47dfa80fb9560..4d64b6935bb915 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -102,6 +102,7 @@ config NVME_TCP_TLS depends on NVME_TCP select NET_HANDSHAKE select KEYS + select TLS help Enables TLS encryption for NVMe TCP using the netlink handshake API. diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 2b9e6cfaf2a80a..1a0058be582104 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -145,7 +145,7 @@ static const char * const nvme_statuses[] = { [NVME_SC_BAD_ATTRIBUTES] = "Conflicting Attributes", [NVME_SC_INVALID_PI] = "Invalid Protection Information", [NVME_SC_READ_ONLY] = "Attempted Write to Read Only Range", - [NVME_SC_ONCS_NOT_SUPPORTED] = "ONCS Not Supported", + [NVME_SC_CMD_SIZE_LIM_EXCEEDED ] = "Command Size Limits Exceeded", [NVME_SC_ZONE_BOUNDARY_ERROR] = "Zoned Boundary Error", [NVME_SC_ZONE_FULL] = "Zone Is Full", [NVME_SC_ZONE_READ_ONLY] = "Zone Is Read Only", diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cc23035148b4ba..93a8119ad5ca66 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -286,7 +286,6 @@ static blk_status_t nvme_error_status(u16 status) case NVME_SC_NS_NOT_READY: return BLK_STS_TARGET; case NVME_SC_BAD_ATTRIBUTES: - case NVME_SC_ONCS_NOT_SUPPORTED: case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: case NVME_SC_INVALID_NS: @@ -2059,7 +2058,21 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id, if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else - atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; + atomic_bs = (1 + ns->ctrl->awupf) * bs; + + /* + * Set subsystem atomic bs. + */ + if (ns->ctrl->subsys->atomic_bs) { + if (atomic_bs != ns->ctrl->subsys->atomic_bs) { + dev_err_ratelimited(ns->ctrl->device, + "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n", + ns->disk ? ns->disk->disk_name : "?", + ns->ctrl->subsys->atomic_bs, + atomic_bs); + } + } else + ns->ctrl->subsys->atomic_bs = atomic_bs; nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs); } @@ -2201,6 +2214,17 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; + + /* + * Validate the max atomic write size fits within the subsystem's + * atomic write capabilities. + */ + if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) { + blk_mq_unfreeze_queue(ns->disk->queue, memflags); + ret = -ENXIO; + goto out; + } + nvme_config_discard(ns, &lim); if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) @@ -3031,7 +3055,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) kfree(subsys); return -EINVAL; } - subsys->awupf = le16_to_cpu(id->awupf); nvme_mpath_default_iopolicy(subsys); subsys->dev.class = &nvme_subsys_class; @@ -3441,7 +3464,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - + ctrl->awupf = le16_to_cpu(id->awupf); out_free: kfree(id); return ret; @@ -4295,6 +4318,15 @@ static void nvme_scan_work(struct work_struct *work) nvme_scan_ns_sequential(ctrl); } mutex_unlock(&ctrl->scan_lock); + + /* Requeue if we have missed AENs */ + if (test_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) + nvme_queue_scan(ctrl); +#ifdef CONFIG_NVME_MULTIPATH + else if (ctrl->ana_log_buf) + /* Re-read the ANA log page to not miss updates */ + queue_work(nvme_wq, &ctrl->ana_work); +#endif } /* @@ -4484,7 +4516,8 @@ static void nvme_fw_act_work(struct work_struct *work) msleep(100); } - if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) + if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING) || + !nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) return; nvme_unquiesce_io_queues(ctrl); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index ca86d3bf7ea49d..f29107d95ff26d 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -521,7 +521,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (d.data_len) { ret = nvme_map_user_request(req, d.addr, d.data_len, nvme_to_user_ptr(d.metadata), d.metadata_len, - map_iter, vec); + map_iter, vec ? NVME_IOCTL_VEC : 0); if (ret) goto out_free_req; } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 89be5911b25d2d..cf0ef4745564c3 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -427,7 +427,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) struct nvme_ns *ns; if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) - return NULL; + return false; list_for_each_entry_srcu(ns, &head->list, siblings, srcu_read_lock_held(&head->srcu)) { @@ -638,7 +638,8 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_set_stacking_limits(&lim); lim.dma_alignment = 3; - lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL; + lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; if (head->ids.csi == NVME_CSI_ZNS) lim.features |= BLK_FEAT_ZONED; @@ -1050,6 +1051,13 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { + /* + * Ensure that ns path disk node is already added otherwise we + * may get invalid kobj name for target + */ + if (!test_bit(GD_ADDED, &ns->disk->state)) + continue; + /* * Avoid creating link if it already exists for the given path. * When path ana state transitions from optimized to non- @@ -1065,13 +1073,6 @@ void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) continue; - /* - * Ensure that ns path disk node is already added otherwise we - * may get invalid kobj name for target - */ - if (!test_bit(GD_ADDED, &ns->disk->state)) - continue; - target = disk_to_dev(ns->disk); /* * Create sysfs link from head gendisk kobject @kobj to the diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 51e07864212710..8fc4683418a3a6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,6 +410,7 @@ struct nvme_ctrl { enum nvme_ctrl_type cntrltype; enum nvme_dctype dctype; + u16 awupf; /* 0's based value. */ }; static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl) @@ -442,11 +443,11 @@ struct nvme_subsystem { u8 cmic; enum nvme_subsys_type subtype; u16 vendor_id; - u16 awupf; /* 0's based awupf value. */ struct ida ns_ida; #ifdef CONFIG_NVME_MULTIPATH enum nvme_iopolicy iopolicy; #endif + u32 atomic_bs; }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b178d52eac1b7f..f1dd804151b1c9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -390,7 +390,7 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, * as it only leads to a small amount of wasted memory for the lifetime of * the I/O. */ -static int nvme_pci_npages_prp(void) +static __always_inline int nvme_pci_npages_prp(void) { unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); @@ -1202,7 +1202,9 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + spin_lock(&nvmeq->cq_poll_lock); nvme_poll_cq(nvmeq, NULL); + spin_unlock(&nvmeq->cq_poll_lock); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } @@ -3575,7 +3577,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - if (!nvme_try_sched_reset(&dev->ctrl)) + if (nvme_try_sched_reset(&dev->ctrl)) nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } @@ -3623,6 +3625,9 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */ .driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, }, + { PCI_DEVICE(0x126f, 0x1001), /* Silicon Motion generic */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_BOGUS_NID, }, @@ -3646,6 +3651,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */ .driver_data = NVME_QUIRK_BROKEN_MSI }, + { PCI_DEVICE(0x15b7, 0x5009), /* Sandisk SN550 */ + .driver_data = NVME_QUIRK_BROKEN_MSI | + NVME_QUIRK_NO_DEEPEST_PS }, { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ @@ -3731,6 +3739,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, + { PCI_DEVICE(0x025e, 0xf1ac), /* SOLIDIGM P44 pro SSDPFKKW020X7 */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */ diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index cf2d2c5039ddbf..ca6a74607b1397 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -82,8 +82,6 @@ static int nvme_status_to_pr_err(int status) return PR_STS_SUCCESS; case NVME_SC_RESERVATION_CONFLICT: return PR_STS_RESERVATION_CONFLICT; - case NVME_SC_ONCS_NOT_SUPPORTED: - return -EOPNOTSUPP; case NVME_SC_BAD_ATTRIBUTES: case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 6d31226f7a4f84..a5bc3bb483d505 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -306,13 +306,41 @@ static const struct attribute_group nvme_ns_attr_group = { }; #ifdef CONFIG_NVME_MULTIPATH +/* + * NOTE: The dummy attribute does not appear in sysfs. It exists solely to allow + * control over the visibility of the multipath sysfs node. Without at least one + * attribute defined in nvme_ns_mpath_attrs[], the sysfs implementation does not + * invoke the multipath_sysfs_group_visible() method. As a result, we would not + * be able to control the visibility of the multipath sysfs node. + */ +static struct attribute dummy_attr = { + .name = "dummy", +}; + static struct attribute *nvme_ns_mpath_attrs[] = { + &dummy_attr, NULL, }; +static bool multipath_sysfs_group_visible(struct kobject *kobj) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + return nvme_disk_is_ns_head(dev_to_disk(dev)); +} + +static bool multipath_sysfs_attr_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + return false; +} + +DEFINE_SYSFS_GROUP_VISIBLE(multipath_sysfs) + const struct attribute_group nvme_ns_mpath_attr_group = { .name = "multipath", .attrs = nvme_ns_mpath_attrs, + .is_visible = SYSFS_GROUP_VISIBLE(multipath_sysfs), }; #endif diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 26c459f0198d9e..aba365f97cf6b4 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1803,6 +1803,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, ret = PTR_ERR(sock_file); goto err_destroy_mutex; } + + sk_net_refcnt_upgrade(queue->sock->sk); nvme_tcp_reclassify_socket(queue->sock); /* Single syn retry */ @@ -1944,7 +1946,7 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) cancel_work_sync(&queue->io_work); } -static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1963,6 +1965,31 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) +{ + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[qid]; + int timeout = 100; + + while (timeout > 0) { + if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || + !sk_wmem_alloc_get(queue->sock->sk)) + return; + msleep(2); + timeout -= 2; + } + dev_warn(nctrl->device, + "qid %d: timeout draining sock wmem allocation expired\n", + qid); +} + +static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) +{ + nvme_tcp_stop_queue_nowait(nctrl, qid); + nvme_tcp_wait_queue(nctrl, qid); +} + + static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) { write_lock_bh(&queue->sock->sk->sk_callback_lock); @@ -2030,7 +2057,9 @@ static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) int i; for (i = 1; i < ctrl->queue_count; i++) - nvme_tcp_stop_queue(ctrl, i); + nvme_tcp_stop_queue_nowait(ctrl, i); + for (i = 1; i < ctrl->queue_count; i++) + nvme_tcp_wait_queue(ctrl, i); } static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index fb7446d6d6829b..4c253b433bf78d 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -98,6 +98,7 @@ config NVME_TARGET_TCP_TLS bool "NVMe over Fabrics TCP target TLS encryption support" depends on NVME_TARGET_TCP select NET_HANDSHAKE + select TLS help Enables TLS encryption for the NVMe TCP target using the netlink handshake API. diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 0b0645ac5df478..9429b821840856 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -240,7 +240,7 @@ void nvmet_auth_sq_free(struct nvmet_sq *sq) { cancel_delayed_work(&sq->auth_expired_work); #ifdef CONFIG_NVME_TARGET_TCP_TLS - sq->tls_key = 0; + sq->tls_key = NULL; #endif kfree(sq->dhchap_c1); sq->dhchap_c1 = NULL; @@ -600,13 +600,12 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq) pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); tls_key = NULL; - kfree_sensitive(tls_psk); } if (sq->ctrl->tls_key) key_put(sq->ctrl->tls_key); sq->ctrl->tls_key = tls_key; #endif - + kfree_sensitive(tls_psk); out_free_digest: kfree_sensitive(digest); out_free_psk: diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 71f8d06998d602..69b1ddff6731fc 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -62,14 +62,7 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno) return NVME_SC_LBA_RANGE | NVME_STATUS_DNR; case -EOPNOTSUPP: req->error_loc = offsetof(struct nvme_common_command, opcode); - switch (req->cmd->common.opcode) { - case nvme_cmd_dsm: - case nvme_cmd_write_zeroes: - return NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR; - default: - return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; - } - break; + return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; case -ENODATA: req->error_loc = offsetof(struct nvme_rw_command, nsid); return NVME_SC_ACCESS_DENIED; @@ -324,6 +317,9 @@ int nvmet_enable_port(struct nvmet_port *port) lockdep_assert_held(&nvmet_config_sem); + if (port->disc_addr.trtype == NVMF_TRTYPE_MAX) + return -EINVAL; + ops = nvmet_transports[port->disc_addr.trtype]; if (!ops) { up_write(&nvmet_config_sem); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 7318b736d41417..7b50130f10f657 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -995,16 +995,6 @@ nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport) return kref_get_unless_zero(&hostport->ref); } -static void -nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport) -{ - /* if LLDD not implemented, leave as NULL */ - if (!hostport || !hostport->hosthandle) - return; - - nvmet_fc_hostport_put(hostport); -} - static struct nvmet_fc_hostport * nvmet_fc_match_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { @@ -1028,33 +1018,24 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) struct nvmet_fc_hostport *newhost, *match = NULL; unsigned long flags; + /* + * Caller holds a reference on tgtport. + */ + /* if LLDD not implemented, leave as NULL */ if (!hosthandle) return NULL; - /* - * take reference for what will be the newly allocated hostport if - * we end up using a new allocation - */ - if (!nvmet_fc_tgtport_get(tgtport)) - return ERR_PTR(-EINVAL); - spin_lock_irqsave(&tgtport->lock, flags); match = nvmet_fc_match_hostport(tgtport, hosthandle); spin_unlock_irqrestore(&tgtport->lock, flags); - if (match) { - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); + if (match) return match; - } newhost = kzalloc(sizeof(*newhost), GFP_KERNEL); - if (!newhost) { - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); + if (!newhost) return ERR_PTR(-ENOMEM); - } spin_lock_irqsave(&tgtport->lock, flags); match = nvmet_fc_match_hostport(tgtport, hosthandle); @@ -1063,6 +1044,7 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) kfree(newhost); newhost = match; } else { + nvmet_fc_tgtport_get(tgtport); newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; INIT_LIST_HEAD(&newhost->host_list); @@ -1075,13 +1057,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) return newhost; } -static void -nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) -{ - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); -} - static void nvmet_fc_delete_assoc_work(struct work_struct *work) { @@ -1089,7 +1064,8 @@ nvmet_fc_delete_assoc_work(struct work_struct *work) container_of(work, struct nvmet_fc_tgt_assoc, del_work); struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_assoc(assoc); + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); nvmet_fc_tgtport_put(tgtport); } @@ -1097,7 +1073,8 @@ static void nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) { nvmet_fc_tgtport_get(assoc->tgtport); - queue_work(nvmet_wq, &assoc->del_work); + if (!queue_work(nvmet_wq, &assoc->del_work)) + nvmet_fc_tgtport_put(assoc->tgtport); } static bool @@ -1143,6 +1120,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) goto out_ida; assoc->tgtport = tgtport; + nvmet_fc_tgtport_get(tgtport); assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); @@ -1190,7 +1168,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); - nvmet_fc_free_hostport(assoc->hostport); + nvmet_fc_hostport_put(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1244,6 +1222,8 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); + + nvmet_fc_tgtport_put(tgtport); } static struct nvmet_fc_tgt_assoc * @@ -1455,11 +1435,6 @@ nvmet_fc_free_tgtport(struct kref *ref) struct nvmet_fc_tgtport *tgtport = container_of(ref, struct nvmet_fc_tgtport, ref); struct device *dev = tgtport->dev; - unsigned long flags; - - spin_lock_irqsave(&nvmet_fc_tgtlock, flags); - list_del(&tgtport->tgt_list); - spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); nvmet_fc_free_ls_iodlist(tgtport); @@ -1620,6 +1595,11 @@ int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + unsigned long flags; + + spin_lock_irqsave(&nvmet_fc_tgtlock, flags); + list_del(&tgtport->tgt_list); + spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); nvmet_fc_portentry_unbind_tgt(tgtport); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index e1abb27927ff74..20becea1ad9683 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -208,6 +208,7 @@ struct fcloop_lport { struct nvme_fc_local_port *localport; struct list_head lport_list; struct completion unreg_done; + refcount_t ref; }; struct fcloop_lport_priv { @@ -239,7 +240,7 @@ struct fcloop_nport { struct fcloop_tport *tport; struct fcloop_lport *lport; struct list_head nport_list; - struct kref ref; + refcount_t ref; u64 node_name; u64 port_name; u32 port_role; @@ -274,7 +275,7 @@ struct fcloop_fcpreq { u32 inistate; bool active; bool aborted; - struct kref ref; + refcount_t ref; struct work_struct fcp_rcv_work; struct work_struct abort_rcv_work; struct work_struct tio_done_work; @@ -478,7 +479,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, if (targetport) { tport = targetport->private; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); } @@ -534,24 +535,18 @@ fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport) } static void -fcloop_tfcp_req_free(struct kref *ref) +fcloop_tfcp_req_put(struct fcloop_fcpreq *tfcp_req) { - struct fcloop_fcpreq *tfcp_req = - container_of(ref, struct fcloop_fcpreq, ref); + if (!refcount_dec_and_test(&tfcp_req->ref)) + return; kfree(tfcp_req); } -static void -fcloop_tfcp_req_put(struct fcloop_fcpreq *tfcp_req) -{ - kref_put(&tfcp_req->ref, fcloop_tfcp_req_free); -} - static int fcloop_tfcp_req_get(struct fcloop_fcpreq *tfcp_req) { - return kref_get_unless_zero(&tfcp_req->ref); + return refcount_inc_not_zero(&tfcp_req->ref); } static void @@ -623,12 +618,13 @@ fcloop_fcp_recv_work(struct work_struct *work) { struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, fcp_rcv_work); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; unsigned long flags; int ret = 0; bool aborted = false; spin_lock_irqsave(&tfcp_req->reqlock, flags); + fcpreq = tfcp_req->fcpreq; switch (tfcp_req->inistate) { case INI_IO_START: tfcp_req->inistate = INI_IO_ACTIVE; @@ -643,16 +639,19 @@ fcloop_fcp_recv_work(struct work_struct *work) } spin_unlock_irqrestore(&tfcp_req->reqlock, flags); - if (unlikely(aborted)) - ret = -ECANCELED; - else { - if (likely(!check_for_drop(tfcp_req))) - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, - &tfcp_req->tgt_fcp_req, - fcpreq->cmdaddr, fcpreq->cmdlen); - else - pr_info("%s: dropped command ********\n", __func__); + if (unlikely(aborted)) { + /* the abort handler will call fcloop_call_host_done */ + return; } + + if (unlikely(check_for_drop(tfcp_req))) { + pr_info("%s: dropped command ********\n", __func__); + return; + } + + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + &tfcp_req->tgt_fcp_req, + fcpreq->cmdaddr, fcpreq->cmdlen); if (ret) fcloop_call_host_done(fcpreq, tfcp_req, ret); } @@ -667,9 +666,10 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) unsigned long flags; spin_lock_irqsave(&tfcp_req->reqlock, flags); - fcpreq = tfcp_req->fcpreq; switch (tfcp_req->inistate) { case INI_IO_ABORTED: + fcpreq = tfcp_req->fcpreq; + tfcp_req->fcpreq = NULL; break; case INI_IO_COMPLETED: completed = true; @@ -691,10 +691,6 @@ fcloop_fcp_abort_recv_work(struct work_struct *work) nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport, &tfcp_req->tgt_fcp_req); - spin_lock_irqsave(&tfcp_req->reqlock, flags); - tfcp_req->fcpreq = NULL; - spin_unlock_irqrestore(&tfcp_req->reqlock, flags); - fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED); /* call_host_done releases reference for abort downcall */ } @@ -748,7 +744,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, INIT_WORK(&tfcp_req->fcp_rcv_work, fcloop_fcp_recv_work); INIT_WORK(&tfcp_req->abort_rcv_work, fcloop_fcp_abort_recv_work); INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); - kref_init(&tfcp_req->ref); + refcount_set(&tfcp_req->ref, 1); queue_work(nvmet_wq, &tfcp_req->fcp_rcv_work); @@ -1001,24 +997,39 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, } static void -fcloop_nport_free(struct kref *ref) +fcloop_lport_put(struct fcloop_lport *lport) { - struct fcloop_nport *nport = - container_of(ref, struct fcloop_nport, ref); + unsigned long flags; - kfree(nport); + if (!refcount_dec_and_test(&lport->ref)) + return; + + spin_lock_irqsave(&fcloop_lock, flags); + list_del(&lport->lport_list); + spin_unlock_irqrestore(&fcloop_lock, flags); + + kfree(lport); +} + +static int +fcloop_lport_get(struct fcloop_lport *lport) +{ + return refcount_inc_not_zero(&lport->ref); } static void fcloop_nport_put(struct fcloop_nport *nport) { - kref_put(&nport->ref, fcloop_nport_free); + if (!refcount_dec_and_test(&nport->ref)) + return; + + kfree(nport); } static int fcloop_nport_get(struct fcloop_nport *nport) { - return kref_get_unless_zero(&nport->ref); + return refcount_inc_not_zero(&nport->ref); } static void @@ -1029,6 +1040,8 @@ fcloop_localport_delete(struct nvme_fc_local_port *localport) /* release any threads waiting for the unreg to complete */ complete(&lport->unreg_done); + + fcloop_lport_put(lport); } static void @@ -1140,6 +1153,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, lport->localport = localport; INIT_LIST_HEAD(&lport->lport_list); + refcount_set(&lport->ref, 1); spin_lock_irqsave(&fcloop_lock, flags); list_add_tail(&lport->lport_list, &fcloop_lports); @@ -1156,13 +1170,6 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr, return ret ? ret : count; } - -static void -__unlink_local_port(struct fcloop_lport *lport) -{ - list_del(&lport->lport_list); -} - static int __wait_localport_unreg(struct fcloop_lport *lport) { @@ -1175,8 +1182,6 @@ __wait_localport_unreg(struct fcloop_lport *lport) if (!ret) wait_for_completion(&lport->unreg_done); - kfree(lport); - return ret; } @@ -1199,8 +1204,9 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, list_for_each_entry(tlport, &fcloop_lports, lport_list) { if (tlport->localport->node_name == nodename && tlport->localport->port_name == portname) { + if (!fcloop_lport_get(tlport)) + break; lport = tlport; - __unlink_local_port(lport); break; } } @@ -1210,6 +1216,7 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr, return -ENOENT; ret = __wait_localport_unreg(lport); + fcloop_lport_put(lport); return ret ? ret : count; } @@ -1249,7 +1256,7 @@ fcloop_alloc_nport(const char *buf, size_t count, bool remoteport) newnport->port_role = opts->roles; if (opts->mask & NVMF_OPT_FCADDR) newnport->port_id = opts->fcaddr; - kref_init(&newnport->ref); + refcount_set(&newnport->ref, 1); spin_lock_irqsave(&fcloop_lock, flags); @@ -1637,17 +1644,17 @@ static void __exit fcloop_exit(void) for (;;) { lport = list_first_entry_or_null(&fcloop_lports, typeof(*lport), lport_list); - if (!lport) + if (!lport || !fcloop_lport_get(lport)) break; - __unlink_local_port(lport); - spin_unlock_irqrestore(&fcloop_lock, flags); ret = __wait_localport_unreg(lport); if (ret) pr_warn("%s: Failed deleting local port\n", __func__); + fcloop_lport_put(lport); + spin_lock_irqsave(&fcloop_lock, flags); } diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 83be0657e6df4e..1cfa13d029bfa2 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -145,15 +145,8 @@ u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) req->error_loc = offsetof(struct nvme_rw_command, slba); break; case BLK_STS_NOTSUPP: + status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; req->error_loc = offsetof(struct nvme_common_command, opcode); - switch (req->cmd->common.opcode) { - case nvme_cmd_dsm: - case nvme_cmd_write_zeroes: - status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_STATUS_DNR; - break; - default: - status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; - } break; case BLK_STS_MEDIUM: status = NVME_SC_ACCESS_DENIED; diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 51c27b32248d01..7123c855b5a67c 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -62,8 +62,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex); #define NVMET_PCI_EPF_CQ_RETRY_INTERVAL msecs_to_jiffies(1) enum nvmet_pci_epf_queue_flags { - NVMET_PCI_EPF_Q_IS_SQ = 0, /* The queue is a submission queue */ - NVMET_PCI_EPF_Q_LIVE, /* The queue is live */ + NVMET_PCI_EPF_Q_LIVE = 0, /* The queue is live */ NVMET_PCI_EPF_Q_IRQ_ENABLED, /* IRQ is enabled for this queue */ }; @@ -596,9 +595,6 @@ static bool nvmet_pci_epf_should_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct nvmet_pci_epf_irq_vector *iv = cq->iv; bool ret; - if (!test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) - return false; - /* IRQ coalescing for the admin queue is not allowed. */ if (!cq->qid) return true; @@ -625,7 +621,8 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, struct pci_epf *epf = nvme_epf->epf; int ret = 0; - if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) + if (!test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) || + !test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) return; mutex_lock(&ctrl->irq_lock); @@ -636,14 +633,16 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, switch (nvme_epf->irq_type) { case PCI_IRQ_MSIX: case PCI_IRQ_MSI: + /* + * If we fail to raise an MSI or MSI-X interrupt, it is likely + * because the host is using legacy INTX IRQs (e.g. BIOS, + * grub), but we can fallback to the INTX type only if the + * endpoint controller supports this type. + */ ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, nvme_epf->irq_type, cq->vector + 1); - if (!ret) + if (!ret || !nvme_epf->epc_features->intx_capable) break; - /* - * If we got an error, it is likely because the host is using - * legacy IRQs (e.g. BIOS, grub). - */ fallthrough; case PCI_IRQ_INTX: ret = pci_epc_raise_irq(epf->epc, epf->func_no, epf->vfunc_no, @@ -656,7 +655,9 @@ static void nvmet_pci_epf_raise_irq(struct nvmet_pci_epf_ctrl *ctrl, } if (ret) - dev_err(ctrl->dev, "Failed to raise IRQ (err=%d)\n", ret); + dev_err_ratelimited(ctrl->dev, + "CQ[%u]: Failed to raise IRQ (err=%d)\n", + cq->qid, ret); unlock: mutex_unlock(&ctrl->irq_lock); @@ -1319,8 +1320,14 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); - dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", - cqid, qsize, cq->qes, cq->vector); + if (test_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", + cqid, qsize, cq->qes, cq->vector); + else + dev_dbg(ctrl->dev, + "CQ[%u]: %u entries of %zu B, IRQ disabled\n", + cqid, qsize, cq->qes); return NVME_SC_SUCCESS; @@ -1344,7 +1351,8 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid) cancel_delayed_work_sync(&cq->work); nvmet_pci_epf_drain_queue(cq); - nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); + if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) + nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); return NVME_SC_SUCCESS; @@ -1533,7 +1541,6 @@ static void nvmet_pci_epf_init_queue(struct nvmet_pci_epf_ctrl *ctrl, if (sq) { queue = &ctrl->sq[qid]; - set_bit(NVMET_PCI_EPF_Q_IS_SQ, &queue->flags); } else { queue = &ctrl->cq[qid]; INIT_DELAYED_WORK(&queue->work, nvmet_pci_epf_cq_work); @@ -1648,16 +1655,17 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl, { struct nvmet_pci_epf_iod *iod; int ret, n = 0; + u16 head = sq->head; sq->tail = nvmet_pci_epf_bar_read32(ctrl, sq->db); - while (sq->head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) { + while (head != sq->tail && (!ctrl->sq_ab || n < ctrl->sq_ab)) { iod = nvmet_pci_epf_alloc_iod(sq); if (!iod) break; /* Get the NVMe command submitted by the host. */ ret = nvmet_pci_epf_transfer(ctrl, &iod->cmd, - sq->pci_addr + sq->head * sq->qes, + sq->pci_addr + head * sq->qes, sq->qes, DMA_FROM_DEVICE); if (ret) { /* Not much we can do... */ @@ -1666,12 +1674,13 @@ static int nvmet_pci_epf_process_sq(struct nvmet_pci_epf_ctrl *ctrl, } dev_dbg(ctrl->dev, "SQ[%u]: head %u, tail %u, command %s\n", - sq->qid, sq->head, sq->tail, + sq->qid, head, sq->tail, nvmet_pci_epf_iod_name(iod)); - sq->head++; - if (sq->head == sq->depth) - sq->head = 0; + head++; + if (head == sq->depth) + head = 0; + WRITE_ONCE(sq->head, head); n++; queue_work_on(WORK_CPU_UNBOUND, sq->iod_wq, &iod->work); @@ -1761,8 +1770,17 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) if (!iod) break; - /* Post the IOD completion entry. */ + /* + * Post the IOD completion entry. If the IOD request was + * executed (req->execute() called), the CQE is already + * initialized. However, the IOD may have been failed before + * that, leaving the CQE not properly initialized. So always + * initialize it here. + */ cqe = &iod->cqe; + cqe->sq_head = cpu_to_le16(READ_ONCE(iod->sq->head)); + cqe->sq_id = cpu_to_le16(iod->sq->qid); + cqe->command_id = iod->cmd.common.command_id; cqe->status = cpu_to_le16((iod->status << 1) | cq->phase); dev_dbg(ctrl->dev, @@ -1800,6 +1818,21 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) NVMET_PCI_EPF_CQ_RETRY_INTERVAL); } +static void nvmet_pci_epf_clear_ctrl_config(struct nvmet_pci_epf_ctrl *ctrl) +{ + struct nvmet_ctrl *tctrl = ctrl->tctrl; + + /* Initialize controller status. */ + tctrl->csts = 0; + ctrl->csts = 0; + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); + + /* Initialize controller configuration and start polling. */ + tctrl->cc = 0; + ctrl->cc = 0; + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); +} + static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) { u64 pci_addr, asq, acq; @@ -1865,18 +1898,20 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) return 0; err: - ctrl->csts = 0; + nvmet_pci_epf_clear_ctrl_config(ctrl); return -EINVAL; } -static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) +static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl, + bool shutdown) { int qid; if (!ctrl->enabled) return; - dev_info(ctrl->dev, "Disabling controller\n"); + dev_info(ctrl->dev, "%s controller\n", + shutdown ? "Shutting down" : "Disabling"); ctrl->enabled = false; cancel_delayed_work_sync(&ctrl->poll_sqs); @@ -1893,6 +1928,11 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); ctrl->csts &= ~NVME_CSTS_RDY; + if (shutdown) { + ctrl->csts |= NVME_CSTS_SHST_CMPLT; + ctrl->cc &= ~NVME_CC_ENABLE; + nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); + } } static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) @@ -1919,12 +1959,10 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) } if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) - nvmet_pci_epf_disable_ctrl(ctrl); + nvmet_pci_epf_disable_ctrl(ctrl, false); - if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) { - nvmet_pci_epf_disable_ctrl(ctrl); - ctrl->csts |= NVME_CSTS_SHST_CMPLT; - } + if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) + nvmet_pci_epf_disable_ctrl(ctrl, true); if (!nvmet_cc_shn(new_cc) && nvmet_cc_shn(old_cc)) ctrl->csts &= ~NVME_CSTS_SHST_CMPLT; @@ -1963,16 +2001,10 @@ static void nvmet_pci_epf_init_bar(struct nvmet_pci_epf_ctrl *ctrl) /* Clear Controller Memory Buffer Supported (CMBS). */ ctrl->cap &= ~(0x1ULL << 57); - /* Controller configuration. */ - ctrl->cc = tctrl->cc & (~NVME_CC_ENABLE); - - /* Controller status. */ - ctrl->csts = ctrl->tctrl->csts; - nvmet_pci_epf_bar_write64(ctrl, NVME_REG_CAP, ctrl->cap); nvmet_pci_epf_bar_write32(ctrl, NVME_REG_VS, tctrl->subsys->ver); - nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); - nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CC, ctrl->cc); + + nvmet_pci_epf_clear_ctrl_config(ctrl); } static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf, @@ -2070,14 +2102,22 @@ static int nvmet_pci_epf_create_ctrl(struct nvmet_pci_epf *nvme_epf, static void nvmet_pci_epf_start_ctrl(struct nvmet_pci_epf_ctrl *ctrl) { + + dev_info(ctrl->dev, "PCI link up\n"); + ctrl->link_up = true; + schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL); } static void nvmet_pci_epf_stop_ctrl(struct nvmet_pci_epf_ctrl *ctrl) { + dev_info(ctrl->dev, "PCI link down\n"); + ctrl->link_up = false; + cancel_delayed_work_sync(&ctrl->poll_cc); - nvmet_pci_epf_disable_ctrl(ctrl); + nvmet_pci_epf_disable_ctrl(ctrl, false); + nvmet_pci_epf_clear_ctrl_config(ctrl); } static void nvmet_pci_epf_destroy_ctrl(struct nvmet_pci_epf_ctrl *ctrl) @@ -2300,10 +2340,8 @@ static int nvmet_pci_epf_epc_init(struct pci_epf *epf) if (ret) goto out_clear_bar; - if (!epc_features->linkup_notifier) { - ctrl->link_up = true; + if (!epc_features->linkup_notifier) nvmet_pci_epf_start_ctrl(&nvme_epf->ctrl); - } return 0; @@ -2319,7 +2357,6 @@ static void nvmet_pci_epf_epc_deinit(struct pci_epf *epf) struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; - ctrl->link_up = false; nvmet_pci_epf_destroy_ctrl(ctrl); nvmet_pci_epf_deinit_dma(nvme_epf); @@ -2331,7 +2368,6 @@ static int nvmet_pci_epf_link_up(struct pci_epf *epf) struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; - ctrl->link_up = true; nvmet_pci_epf_start_ctrl(ctrl); return 0; @@ -2342,7 +2378,6 @@ static int nvmet_pci_epf_link_down(struct pci_epf *epf) struct nvmet_pci_epf *nvme_epf = epf_get_drvdata(epf); struct nvmet_pci_epf_ctrl *ctrl = &nvme_epf->ctrl; - ctrl->link_up = false; nvmet_pci_epf_stop_ctrl(ctrl); return 0; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f2d0c920269b94..12a5cb8641ca30 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1560,6 +1560,9 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) { struct socket *sock = queue->sock; + if (!queue->state_change) + return; + write_lock_bh(&sock->sk->sk_callback_lock); sock->sk->sk_data_ready = queue->data_ready; sock->sk->sk_state_change = queue->state_change; diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig index 8671b7c974b933..eceb3cdb421ffb 100644 --- a/drivers/nvmem/Kconfig +++ b/drivers/nvmem/Kconfig @@ -260,6 +260,7 @@ config NVMEM_RCAR_EFUSE config NVMEM_RMEM tristate "Reserved Memory Based Driver Support" depends on HAS_IOMEM + select CRC32 help This driver maps reserved memory into an nvmem device. It might be useful to expose information left by firmware in memory. diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index fff85bbf0ecd0f..e206efc29a0044 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -594,9 +594,11 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, cell->nbits = info->nbits; cell->np = info->np; - if (cell->nbits) + if (cell->nbits) { cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset, BITS_PER_BYTE); + cell->raw_len = ALIGN(cell->bytes, nvmem->word_size); + } if (!IS_ALIGNED(cell->offset, nvmem->stride)) { dev_err(&nvmem->dev, @@ -605,6 +607,18 @@ static int nvmem_cell_info_to_nvmem_cell_entry_nodup(struct nvmem_device *nvmem, return -EINVAL; } + if (!IS_ALIGNED(cell->raw_len, nvmem->word_size)) { + dev_err(&nvmem->dev, + "cell %s raw len %zd unaligned to nvmem word size %d\n", + cell->name ?: "", cell->raw_len, + nvmem->word_size); + + if (info->raw_len) + return -EINVAL; + + cell->raw_len = ALIGN(cell->raw_len, nvmem->word_size); + } + return 0; } @@ -837,7 +851,9 @@ static int nvmem_add_cells_from_dt(struct nvmem_device *nvmem, struct device_nod if (addr && len == (2 * sizeof(u32))) { info.bit_offset = be32_to_cpup(addr++); info.nbits = be32_to_cpup(addr); - if (info.bit_offset >= BITS_PER_BYTE || info.nbits < 1) { + if (info.bit_offset >= BITS_PER_BYTE * info.bytes || + info.nbits < 1 || + info.bit_offset + info.nbits > BITS_PER_BYTE * info.bytes) { dev_err(dev, "nvmem: invalid bits on %pOF\n", child); of_node_put(child); return -EINVAL; @@ -1630,21 +1646,29 @@ EXPORT_SYMBOL_GPL(nvmem_cell_put); static void nvmem_shift_read_buffer_in_place(struct nvmem_cell_entry *cell, void *buf) { u8 *p, *b; - int i, extra, bit_offset = cell->bit_offset; + int i, extra, bytes_offset; + int bit_offset = cell->bit_offset; p = b = buf; - if (bit_offset) { + + bytes_offset = bit_offset / BITS_PER_BYTE; + b += bytes_offset; + bit_offset %= BITS_PER_BYTE; + + if (bit_offset % BITS_PER_BYTE) { /* First shift */ - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; /* setup rest of the bytes if any */ for (i = 1; i < cell->bytes; i++) { /* Get bits from next byte and shift them towards msb */ - *p |= *b << (BITS_PER_BYTE - bit_offset); + *p++ |= *b << (BITS_PER_BYTE - bit_offset); - p = b; - *b++ >>= bit_offset; + *p = *b++ >> bit_offset; } + } else if (p != b) { + memmove(p, b, cell->bytes - bytes_offset); + p += cell->bytes - 1; } else { /* point to the msb */ p += cell->bytes - 1; diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c index 116a39e804c70b..a872c640b8c5a5 100644 --- a/drivers/nvmem/qfprom.c +++ b/drivers/nvmem/qfprom.c @@ -321,19 +321,32 @@ static int qfprom_reg_read(void *context, unsigned int reg, void *_val, size_t bytes) { struct qfprom_priv *priv = context; - u8 *val = _val; - int i = 0, words = bytes; + u32 *val = _val; void __iomem *base = priv->qfpcorrected; + int words = DIV_ROUND_UP(bytes, sizeof(u32)); + int i; if (read_raw_data && priv->qfpraw) base = priv->qfpraw; - while (words--) - *val++ = readb(base + reg + i++); + for (i = 0; i < words; i++) + *val++ = readl(base + reg + i * sizeof(u32)); return 0; } +/* Align reads to word boundary */ +static void qfprom_fixup_dt_cell_info(struct nvmem_device *nvmem, + struct nvmem_cell_info *cell) +{ + unsigned int byte_offset = cell->offset % sizeof(u32); + + cell->bit_offset += byte_offset * BITS_PER_BYTE; + cell->offset -= byte_offset; + if (byte_offset && !cell->nbits) + cell->nbits = cell->bytes * BITS_PER_BYTE; +} + static void qfprom_runtime_disable(void *data) { pm_runtime_disable(data); @@ -358,10 +371,11 @@ static int qfprom_probe(struct platform_device *pdev) struct nvmem_config econfig = { .name = "qfprom", .add_legacy_fixed_of_cells = true, - .stride = 1, - .word_size = 1, + .stride = 4, + .word_size = 4, .id = NVMEM_DEVID_AUTO, .reg_read = qfprom_reg_read, + .fixup_dt_cell_info = qfprom_fixup_dt_cell_info, }; struct device *dev = &pdev->dev; struct resource *res; diff --git a/drivers/nvmem/rockchip-otp.c b/drivers/nvmem/rockchip-otp.c index ebc3f0b24166bc..d88f12c5324264 100644 --- a/drivers/nvmem/rockchip-otp.c +++ b/drivers/nvmem/rockchip-otp.c @@ -59,7 +59,6 @@ #define RK3588_OTPC_AUTO_EN 0x08 #define RK3588_OTPC_INT_ST 0x84 #define RK3588_OTPC_DOUT0 0x20 -#define RK3588_NO_SECURE_OFFSET 0x300 #define RK3588_NBYTES 4 #define RK3588_BURST_NUM 1 #define RK3588_BURST_SHIFT 8 @@ -69,6 +68,7 @@ struct rockchip_data { int size; + int read_offset; const char * const *clks; int num_clks; nvmem_reg_read_t reg_read; @@ -196,7 +196,7 @@ static int rk3588_otp_read(void *context, unsigned int offset, addr_start = round_down(offset, RK3588_NBYTES) / RK3588_NBYTES; addr_end = round_up(offset + bytes, RK3588_NBYTES) / RK3588_NBYTES; addr_len = addr_end - addr_start; - addr_start += RK3588_NO_SECURE_OFFSET; + addr_start += otp->data->read_offset / RK3588_NBYTES; buf = kzalloc(array_size(addr_len, RK3588_NBYTES), GFP_KERNEL); if (!buf) @@ -274,12 +274,21 @@ static const struct rockchip_data px30_data = { .reg_read = px30_otp_read, }; +static const struct rockchip_data rk3576_data = { + .size = 0x100, + .read_offset = 0x700, + .clks = px30_otp_clocks, + .num_clks = ARRAY_SIZE(px30_otp_clocks), + .reg_read = rk3588_otp_read, +}; + static const char * const rk3588_otp_clocks[] = { "otp", "apb_pclk", "phy", "arb", }; static const struct rockchip_data rk3588_data = { .size = 0x400, + .read_offset = 0xc00, .clks = rk3588_otp_clocks, .num_clks = ARRAY_SIZE(rk3588_otp_clocks), .reg_read = rk3588_otp_read, @@ -294,6 +303,10 @@ static const struct of_device_id rockchip_otp_match[] = { .compatible = "rockchip,rk3308-otp", .data = &px30_data, }, + { + .compatible = "rockchip,rk3576-otp", + .data = &rk3576_data, + }, { .compatible = "rockchip,rk3588-otp", .data = &rk3588_data, diff --git a/drivers/nvmem/zynqmp_nvmem.c b/drivers/nvmem/zynqmp_nvmem.c index 8682adaacd692d..7da717d6c7faf3 100644 --- a/drivers/nvmem/zynqmp_nvmem.c +++ b/drivers/nvmem/zynqmp_nvmem.c @@ -213,6 +213,7 @@ static int zynqmp_nvmem_probe(struct platform_device *pdev) econfig.word_size = 1; econfig.size = ZYNQMP_NVMEM_SIZE; econfig.dev = dev; + econfig.priv = dev; econfig.add_legacy_fixed_of_cells = true; econfig.reg_read = zynqmp_nvmem_read; econfig.reg_write = zynqmp_nvmem_write; diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 64d301893af7b8..eeb370e0f50777 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -2029,15 +2029,16 @@ static int __init unittest_data_add(void) rc = of_resolve_phandles(unittest_data_node); if (rc) { pr_err("%s: Failed to resolve phandles (rc=%i)\n", __func__, rc); - of_overlay_mutex_unlock(); - return -EINVAL; + rc = -EINVAL; + goto unlock; } /* attach the sub-tree to live tree */ if (!of_root) { pr_warn("%s: no live tree to attach sub-tree\n", __func__); kfree(unittest_data); - return -ENODEV; + rc = -ENODEV; + goto unlock; } EXPECT_BEGIN(KERN_INFO, @@ -2056,9 +2057,10 @@ static int __init unittest_data_add(void) EXPECT_END(KERN_INFO, "Duplicate name in testcase-data, renamed to \"duplicate-name#1\""); +unlock: of_overlay_mutex_unlock(); - return 0; + return rc; } #ifdef CONFIG_OF_OVERLAY diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile index 038ccbd9e3ba23..de5e4f5145af8d 100644 --- a/drivers/pci/controller/Makefile +++ b/drivers/pci/controller/Makefile @@ -1,4 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 +ifdef CONFIG_X86_64 +ifdef CONFIG_SATA_AHCI +obj-y += intel-nvme-remap.o +endif +endif + obj-$(CONFIG_PCIE_CADENCE) += cadence/ obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o obj-$(CONFIG_PCI_IXP4XX) += pci-ixp4xx.o diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c b/drivers/pci/controller/cadence/pcie-cadence-host.c index 8af95e9da7cec6..741e10a575ec75 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-host.c +++ b/drivers/pci/controller/cadence/pcie-cadence-host.c @@ -570,14 +570,5 @@ int cdns_pcie_host_setup(struct cdns_pcie_rc *rc) if (!bridge->ops) bridge->ops = &cdns_pcie_host_ops; - ret = pci_host_probe(bridge); - if (ret < 0) - goto err_init; - - return 0; - - err_init: - pm_runtime_put_sync(dev); - - return ret; + return pci_host_probe(bridge); } diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 5f267dd261b51e..ea5c06371171ff 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -129,6 +129,11 @@ struct imx_pcie_drvdata { const struct dw_pcie_host_ops *ops; }; +struct imx_lut_data { + u32 data1; + u32 data2; +}; + struct imx_pcie { struct dw_pcie *pci; struct gpio_desc *reset_gpiod; @@ -148,6 +153,8 @@ struct imx_pcie { struct regulator *vph; void __iomem *phy_base; + /* LUT data for pcie */ + struct imx_lut_data luts[IMX95_MAX_LUT]; /* power domain for pcie */ struct device *pd_pcie; /* power domain for pcie phy */ @@ -1386,6 +1393,42 @@ static void imx_pcie_msi_save_restore(struct imx_pcie *imx_pcie, bool save) } } +static void imx_pcie_lut_save(struct imx_pcie *imx_pcie) +{ + u32 data1, data2; + int i; + + for (i = 0; i < IMX95_MAX_LUT; i++) { + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, + IMX95_PEO_LUT_RWA | i); + regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, &data1); + regmap_read(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, &data2); + if (data1 & IMX95_PE0_LUT_VLD) { + imx_pcie->luts[i].data1 = data1; + imx_pcie->luts[i].data2 = data2; + } else { + imx_pcie->luts[i].data1 = 0; + imx_pcie->luts[i].data2 = 0; + } + } +} + +static void imx_pcie_lut_restore(struct imx_pcie *imx_pcie) +{ + int i; + + for (i = 0; i < IMX95_MAX_LUT; i++) { + if ((imx_pcie->luts[i].data1 & IMX95_PE0_LUT_VLD) == 0) + continue; + + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA1, + imx_pcie->luts[i].data1); + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_DATA2, + imx_pcie->luts[i].data2); + regmap_write(imx_pcie->iomuxc_gpr, IMX95_PE0_LUT_ACSCTRL, i); + } +} + static int imx_pcie_suspend_noirq(struct device *dev) { struct imx_pcie *imx_pcie = dev_get_drvdata(dev); @@ -1394,6 +1437,8 @@ static int imx_pcie_suspend_noirq(struct device *dev) return 0; imx_pcie_msi_save_restore(imx_pcie, true); + if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_HAS_LUT)) + imx_pcie_lut_save(imx_pcie); if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_BROKEN_SUSPEND)) { /* * The minimum for a workaround would be to set PERST# and to @@ -1438,6 +1483,8 @@ static int imx_pcie_resume_noirq(struct device *dev) if (ret) return ret; } + if (imx_check_flag(imx_pcie, IMX_PCIE_FLAG_HAS_LUT)) + imx_pcie_lut_restore(imx_pcie); imx_pcie_msi_save_restore(imx_pcie, false); return 0; diff --git a/drivers/pci/controller/dwc/pcie-rcar-gen4.c b/drivers/pci/controller/dwc/pcie-rcar-gen4.c index fc872dd35029c0..02638ec442e701 100644 --- a/drivers/pci/controller/dwc/pcie-rcar-gen4.c +++ b/drivers/pci/controller/dwc/pcie-rcar-gen4.c @@ -403,6 +403,7 @@ static const struct pci_epc_features rcar_gen4_pcie_epc_features = { .msix_capable = false, .bar[BAR_1] = { .type = BAR_RESERVED, }, .bar[BAR_3] = { .type = BAR_RESERVED, }, + .bar[BAR_4] = { .type = BAR_FIXED, .fixed_size = 256 }, .bar[BAR_5] = { .type = BAR_RESERVED, }, .align = SZ_1M, }; diff --git a/drivers/pci/controller/intel-nvme-remap.c b/drivers/pci/controller/intel-nvme-remap.c new file mode 100644 index 00000000000000..e105e6f5cc91d1 --- /dev/null +++ b/drivers/pci/controller/intel-nvme-remap.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel remapped NVMe device support. + * + * Copyright (c) 2019 Endless Mobile, Inc. + * Author: Daniel Drake + * + * Some products ship by default with the SATA controller in "RAID" or + * "Intel RST Premium With Intel Optane System Acceleration" mode. Under this + * mode, which we refer to as "remapped NVMe" mode, any installed NVMe + * devices disappear from the PCI bus, and instead their I/O memory becomes + * available within the AHCI device BARs. + * + * This scheme is understood to be a way of avoiding usage of the standard + * Windows NVMe driver under that OS, instead mandating usage of Intel's + * driver instead, which has better power management, and presumably offers + * some RAID/disk-caching solutions too. + * + * Here in this driver, we support the remapped NVMe mode by claiming the + * AHCI device and creating a fake PCIe root port. On the new bus, the + * original AHCI device is exposed with only minor tweaks. Then, fake PCI + * devices corresponding to the remapped NVMe devices are created. The usual + * ahci and nvme drivers are then expected to bind to these devices and + * operate as normal. + * + * The PCI configuration space for the NVMe devices is completely + * unavailable, so we fake a minimal one and hope for the best. + * + * Interrupts are shared between the AHCI and NVMe devices. For simplicity, + * we only support the legacy interrupt here, although MSI support + * could potentially be added later. + */ + +#define MODULE_NAME "intel-nvme-remap" + +#include +#include +#include +#include +#include + +#define AHCI_PCI_BAR_STANDARD 5 + +struct nvme_remap_dev { + struct pci_dev *dev; /* AHCI device */ + struct pci_bus *bus; /* our fake PCI bus */ + struct pci_sysdata sysdata; + int irq_base; /* our fake interrupts */ + + /* + * When we detect an all-ones write to a BAR register, this flag + * is set, so that we return the BAR size on the next read (a + * standard PCI behaviour). + * This includes the assumption that an all-ones BAR write is + * immediately followed by a read of the same register. + */ + bool bar_sizing; + + /* + * Resources copied from the AHCI device, to be regarded as + * resources on our fake bus. + */ + struct resource ahci_resources[PCI_NUM_RESOURCES]; + + /* Resources corresponding to the NVMe devices. */ + struct resource remapped_dev_mem[AHCI_MAX_REMAP]; + + /* Number of remapped NVMe devices found. */ + int num_remapped_devices; +}; + +static inline struct nvme_remap_dev *nrdev_from_bus(struct pci_bus *bus) +{ + return container_of(bus->sysdata, struct nvme_remap_dev, sysdata); +} + + +/******** PCI configuration space **********/ + +/* + * Helper macros for tweaking returned contents of PCI configuration space. + * + * value contains len bytes of data read from reg. + * If fixup_reg is included in that range, fix up the contents of that + * register to fixed_value. + */ +#define NR_FIX8(fixup_reg, fixed_value) do { \ + if (reg <= fixup_reg && fixup_reg < reg + len) \ + ((u8 *) value)[fixup_reg - reg] = (u8) (fixed_value); \ + } while (0) + +#define NR_FIX16(fixup_reg, fixed_value) do { \ + NR_FIX8(fixup_reg, fixed_value); \ + NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ + } while (0) + +#define NR_FIX24(fixup_reg, fixed_value) do { \ + NR_FIX8(fixup_reg, fixed_value); \ + NR_FIX8(fixup_reg + 1, fixed_value >> 8); \ + NR_FIX8(fixup_reg + 2, fixed_value >> 16); \ + } while (0) + +#define NR_FIX32(fixup_reg, fixed_value) do { \ + NR_FIX16(fixup_reg, (u16) fixed_value); \ + NR_FIX16(fixup_reg + 2, fixed_value >> 16); \ + } while (0) + +/* + * Read PCI config space of the slot 0 (AHCI) device. + * We pass through the read request to the underlying device, but + * tweak the results in some cases. + */ +static int nvme_remap_pci_read_slot0(struct pci_bus *bus, int reg, + int len, u32 *value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct pci_bus *ahci_dev_bus = nrdev->dev->bus; + int ret; + + ret = ahci_dev_bus->ops->read(ahci_dev_bus, nrdev->dev->devfn, + reg, len, value); + if (ret) + return ret; + + /* + * Adjust the device class, to prevent this driver from attempting to + * additionally probe the device we're simulating here. + */ + NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_SATA_AHCI); + + /* + * Unset interrupt pin, otherwise ACPI tries to find routing + * info for our virtual IRQ, fails, and complains. + */ + NR_FIX8(PCI_INTERRUPT_PIN, 0); + + /* + * Truncate the AHCI BAR to not include the region that covers the + * hidden devices. This will cause the ahci driver to successfully + * probe th new device (instead of handing it over to this driver). + */ + if (nrdev->bar_sizing) { + NR_FIX32(PCI_BASE_ADDRESS_5, ~(SZ_16K - 1)); + nrdev->bar_sizing = false; + } + + return PCIBIOS_SUCCESSFUL; +} + +/* + * Read PCI config space of a remapped device. + * Since the original PCI config space is inaccessible, we provide a minimal, + * fake config space instead. + */ +static int nvme_remap_pci_read_remapped(struct pci_bus *bus, unsigned int port, + int reg, int len, u32 *value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct resource *remapped_mem; + + if (port > nrdev->num_remapped_devices) + return PCIBIOS_DEVICE_NOT_FOUND; + + *value = 0; + remapped_mem = &nrdev->remapped_dev_mem[port - 1]; + + /* Set a Vendor ID, otherwise Linux assumes no device is present */ + NR_FIX16(PCI_VENDOR_ID, PCI_VENDOR_ID_INTEL); + + /* Always appear on & bus mastering */ + NR_FIX16(PCI_COMMAND, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + /* Set class so that nvme driver probes us */ + NR_FIX24(PCI_CLASS_PROG, PCI_CLASS_STORAGE_EXPRESS); + + if (nrdev->bar_sizing) { + NR_FIX32(PCI_BASE_ADDRESS_0, + ~(resource_size(remapped_mem) - 1)); + nrdev->bar_sizing = false; + } else { + resource_size_t mem_start = remapped_mem->start; + + mem_start |= PCI_BASE_ADDRESS_MEM_TYPE_64; + NR_FIX32(PCI_BASE_ADDRESS_0, mem_start); + mem_start >>= 32; + NR_FIX32(PCI_BASE_ADDRESS_1, mem_start); + } + + return PCIBIOS_SUCCESSFUL; +} + +/* Read PCI configuration space. */ +static int nvme_remap_pci_read(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 *value) +{ + if (PCI_SLOT(devfn) == 0) + return nvme_remap_pci_read_slot0(bus, reg, len, value); + else + return nvme_remap_pci_read_remapped(bus, PCI_SLOT(devfn), + reg, len, value); +} + +/* + * Write PCI config space of the slot 0 (AHCI) device. + * Apart from the special case of BAR sizing, we disable all writes. + * Otherwise, the ahci driver could make changes (e.g. unset PCI bus master) + * that would affect the operation of the NVMe devices. + */ +static int nvme_remap_pci_write_slot0(struct pci_bus *bus, int reg, + int len, u32 value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + struct pci_bus *ahci_dev_bus = nrdev->dev->bus; + + if (reg >= PCI_BASE_ADDRESS_0 && reg <= PCI_BASE_ADDRESS_5) { + /* + * Writing all-ones to a BAR means that the size of the + * memory region is being checked. Flag this so that we can + * reply with an appropriate size on the next read. + */ + if (value == ~0) + nrdev->bar_sizing = true; + + return ahci_dev_bus->ops->write(ahci_dev_bus, + nrdev->dev->devfn, + reg, len, value); + } + + return PCIBIOS_SET_FAILED; +} + +/* + * Write PCI config space of a remapped device. + * Since the original PCI config space is inaccessible, we reject all + * writes, except for the special case of BAR probing. + */ +static int nvme_remap_pci_write_remapped(struct pci_bus *bus, + unsigned int port, + int reg, int len, u32 value) +{ + struct nvme_remap_dev *nrdev = nrdev_from_bus(bus); + + if (port > nrdev->num_remapped_devices) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* + * Writing all-ones to a BAR means that the size of the memory + * region is being checked. Flag this so that we can reply with + * an appropriate size on the next read. + */ + if (value == ~0 && reg >= PCI_BASE_ADDRESS_0 + && reg <= PCI_BASE_ADDRESS_5) { + nrdev->bar_sizing = true; + return PCIBIOS_SUCCESSFUL; + } + + return PCIBIOS_SET_FAILED; +} + +/* Write PCI configuration space. */ +static int nvme_remap_pci_write(struct pci_bus *bus, unsigned int devfn, + int reg, int len, u32 value) +{ + if (PCI_SLOT(devfn) == 0) + return nvme_remap_pci_write_slot0(bus, reg, len, value); + else + return nvme_remap_pci_write_remapped(bus, PCI_SLOT(devfn), + reg, len, value); +} + +static struct pci_ops nvme_remap_pci_ops = { + .read = nvme_remap_pci_read, + .write = nvme_remap_pci_write, +}; + + +/******** Initialization & exit **********/ + +/* + * Find a PCI domain ID to use for our fake bus. + * Start at 0x10000 to not clash with ACPI _SEG domains (16 bits). + */ +static int find_free_domain(void) +{ + int domain = 0xffff; + struct pci_bus *bus = NULL; + + while ((bus = pci_find_next_bus(bus)) != NULL) + domain = max_t(int, domain, pci_domain_nr(bus)); + + return domain + 1; +} + +static int find_remapped_devices(struct nvme_remap_dev *nrdev, + struct list_head *resources) +{ + void __iomem *mmio; + int i, count = 0; + u32 cap; + + mmio = pcim_iomap(nrdev->dev, AHCI_PCI_BAR_STANDARD, + pci_resource_len(nrdev->dev, + AHCI_PCI_BAR_STANDARD)); + if (!mmio) + return -ENODEV; + + /* Check if this device might have remapped nvme devices. */ + if (pci_resource_len(nrdev->dev, AHCI_PCI_BAR_STANDARD) < SZ_512K || + !(readl(mmio + AHCI_VSCAP) & 1)) + return -ENODEV; + + cap = readq(mmio + AHCI_REMAP_CAP); + for (i = AHCI_MAX_REMAP-1; i >= 0; i--) { + struct resource *remapped_mem; + + if ((cap & (1 << i)) == 0) + continue; + if (readl(mmio + ahci_remap_dcc(i)) + != PCI_CLASS_STORAGE_EXPRESS) + continue; + + /* We've found a remapped device */ + remapped_mem = &nrdev->remapped_dev_mem[count++]; + remapped_mem->start = + pci_resource_start(nrdev->dev, AHCI_PCI_BAR_STANDARD) + + ahci_remap_base(i); + remapped_mem->end = remapped_mem->start + + AHCI_REMAP_N_SIZE - 1; + remapped_mem->flags = IORESOURCE_MEM | IORESOURCE_PCI_FIXED; + pci_add_resource(resources, remapped_mem); + } + + pcim_iounmap(nrdev->dev, mmio); + + if (count == 0) + return -ENODEV; + + nrdev->num_remapped_devices = count; + dev_info(&nrdev->dev->dev, "Found %d remapped NVMe devices\n", + nrdev->num_remapped_devices); + return 0; +} + +static void nvme_remap_remove_root_bus(void *data) +{ + struct pci_bus *bus = data; + + pci_stop_root_bus(bus); + pci_remove_root_bus(bus); +} + +static int nvme_remap_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct nvme_remap_dev *nrdev; + LIST_HEAD(resources); + int i; + int ret; + struct pci_dev *child; + + nrdev = devm_kzalloc(&dev->dev, sizeof(*nrdev), GFP_KERNEL); + nrdev->sysdata.domain = find_free_domain(); + nrdev->sysdata.nvme_remap_dev = dev; + nrdev->dev = dev; + pci_set_drvdata(dev, nrdev); + + ret = pcim_enable_device(dev); + if (ret < 0) + return ret; + + pci_set_master(dev); + + ret = find_remapped_devices(nrdev, &resources); + if (ret) + return ret; + + /* Add resources from the original AHCI device */ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &dev->resource[i]; + + if (res->start) { + struct resource *nr_res = &nrdev->ahci_resources[i]; + + nr_res->start = res->start; + nr_res->end = res->end; + nr_res->flags = res->flags; + pci_add_resource(&resources, nr_res); + } + } + + /* Create virtual interrupts */ + nrdev->irq_base = devm_irq_alloc_descs(&dev->dev, -1, 0, + nrdev->num_remapped_devices + 1, + 0); + if (nrdev->irq_base < 0) + return nrdev->irq_base; + + /* Create and populate PCI bus */ + nrdev->bus = pci_create_root_bus(&dev->dev, 0, &nvme_remap_pci_ops, + &nrdev->sysdata, &resources); + if (!nrdev->bus) + return -ENODEV; + + if (devm_add_action_or_reset(&dev->dev, nvme_remap_remove_root_bus, + nrdev->bus)) + return -ENOMEM; + + /* We don't support sharing MSI interrupts between these devices */ + nrdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI; + + pci_scan_child_bus(nrdev->bus); + + list_for_each_entry(child, &nrdev->bus->devices, bus_list) { + /* + * Prevent PCI core from trying to move memory BARs around. + * The hidden NVMe devices are at fixed locations. + */ + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + struct resource *res = &child->resource[i]; + + if (res->flags & IORESOURCE_MEM) + res->flags |= IORESOURCE_PCI_FIXED; + } + + /* Share the legacy IRQ between all devices */ + child->irq = dev->irq; + } + + pci_assign_unassigned_bus_resources(nrdev->bus); + pci_bus_add_devices(nrdev->bus); + + return 0; +} + +static const struct pci_device_id nvme_remap_ids[] = { + /* + * Match all Intel RAID controllers. + * + * There's overlap here with the set of devices detected by the ahci + * driver, but ahci will only successfully probe when there + * *aren't* any remapped NVMe devices, and this driver will only + * successfully probe when there *are* remapped NVMe devices that + * need handling. + */ + { + PCI_VDEVICE(INTEL, PCI_ANY_ID), + .class = PCI_CLASS_STORAGE_RAID << 8, + .class_mask = 0xffffff00, + }, + {0,} +}; +MODULE_DEVICE_TABLE(pci, nvme_remap_ids); + +static struct pci_driver nvme_remap_drv = { + .name = MODULE_NAME, + .id_table = nvme_remap_ids, + .probe = nvme_remap_probe, +}; +module_pci_driver(nvme_remap_drv); + +MODULE_AUTHOR("Daniel Drake "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 18e11b9a7f4647..3d778d8b018756 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -540,7 +540,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK); /* Assert PERST# before setting up the clock */ - gpiod_set_value(reset, 1); + gpiod_set_value_cansleep(reset, 1); ret = apple_pcie_setup_refclk(pcie, port); if (ret < 0) @@ -551,7 +551,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, /* Deassert PERST# */ rmw_set(PORT_PERST_OFF, port->base + PORT_PERST); - gpiod_set_value(reset, 0); + gpiod_set_value_cansleep(reset, 0); /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */ msleep(100); diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 14954f43e5e9af..5864a20323f21a 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -319,11 +319,12 @@ static const char * const rockchip_pci_pm_rsts[] = { "aclk", }; +/* NOTE: Do not reorder the deassert sequence of the following reset pins */ static const char * const rockchip_pci_core_rsts[] = { - "mgmt-sticky", - "core", - "mgmt", "pipe", + "mgmt", + "core", + "mgmt-sticky", }; struct rockchip_pcie { diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index 394395c7f8decf..577a9e490115c9 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -236,12 +236,13 @@ void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, } dev = epc->dev.parent; - dma_free_coherent(dev, epf_bar[bar].size, addr, + dma_free_coherent(dev, epf_bar[bar].aligned_size, addr, epf_bar[bar].phys_addr); epf_bar[bar].phys_addr = 0; epf_bar[bar].addr = NULL; epf_bar[bar].size = 0; + epf_bar[bar].aligned_size = 0; epf_bar[bar].barno = 0; epf_bar[bar].flags = 0; } @@ -264,7 +265,7 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, enum pci_epc_interface_type type) { u64 bar_fixed_size = epc_features->bar[bar].fixed_size; - size_t align = epc_features->align; + size_t aligned_size, align = epc_features->align; struct pci_epf_bar *epf_bar; dma_addr_t phys_addr; struct pci_epc *epc; @@ -285,12 +286,18 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, return NULL; } size = bar_fixed_size; + } else { + /* BAR size must be power of two */ + size = roundup_pow_of_two(size); } - if (align) - size = ALIGN(size, align); - else - size = roundup_pow_of_two(size); + /* + * Allocate enough memory to accommodate the iATU alignment + * requirement. In most cases, this will be the same as .size but + * it might be different if, for example, the fixed size of a BAR + * is smaller than align. + */ + aligned_size = align ? ALIGN(size, align) : size; if (type == PRIMARY_INTERFACE) { epc = epf->epc; @@ -301,7 +308,7 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, } dev = epc->dev.parent; - space = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL); + space = dma_alloc_coherent(dev, aligned_size, &phys_addr, GFP_KERNEL); if (!space) { dev_err(dev, "failed to allocate mem space\n"); return NULL; @@ -310,6 +317,7 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, epf_bar[bar].phys_addr = phys_addr; epf_bar[bar].addr = space; epf_bar[bar].size = size; + epf_bar[bar].aligned_size = aligned_size; epf_bar[bar].barno = bar; if (upper_32_bits(size) || epc_features->bar[bar].only_64bit) epf_bar[bar].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index d30f1316c98e2c..d7fc3bc039643c 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -492,6 +492,75 @@ void pci_hp_destroy(struct hotplug_slot *slot) } EXPORT_SYMBOL_GPL(pci_hp_destroy); +static DECLARE_WAIT_QUEUE_HEAD(pci_hp_link_change_wq); + +/** + * pci_hp_ignore_link_change - begin code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the beginning of a code section causing spurious link changes on the + * Secondary Bus of @pdev, e.g. as a side effect of a Secondary Bus Reset, + * D3cold transition, firmware update or FPGA reconfiguration. + * + * Hotplug drivers can thus check whether such a code section is executing + * concurrently, await it with pci_hp_spurious_link_change() and ignore the + * resulting link change events. + * + * Must be paired with pci_hp_unignore_link_change(). May be called both + * from the PCI core and from Endpoint drivers. May be called for bridges + * which are not hotplug-capable, in which case it has no effect because + * no hotplug driver is bound to the bridge. + */ +void pci_hp_ignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + smp_mb__after_atomic(); /* pairs with implied barrier of wait_event() */ +} + +/** + * pci_hp_unignore_link_change - end code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the end of a code section causing spurious link changes on the + * Secondary Bus of @pdev. Must be paired with pci_hp_ignore_link_change(). + */ +void pci_hp_unignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGED, &pdev->priv_flags); + mb(); /* ensure pci_hp_spurious_link_change() sees either bit set */ + clear_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + wake_up_all(&pci_hp_link_change_wq); +} + +/** + * pci_hp_spurious_link_change - check for spurious link changes + * @pdev: PCI hotplug bridge + * + * Check whether a code section is executing concurrently which is causing + * spurious link changes on the Secondary Bus of @pdev. Await the end of the + * code section if so. + * + * May be called by hotplug drivers to check whether a link change is spurious + * and can be ignored. + * + * Because a genuine link change may have occurred in-between a spurious link + * change and the invocation of this function, hotplug drivers should perform + * sanity checks such as retrieving the current link state and bringing down + * the slot if the link is down. + * + * Return: %true if such a code section has been executing concurrently, + * otherwise %false. Also return %true if such a code section has not been + * executing concurrently, but at least once since the last invocation of this + * function. + */ +bool pci_hp_spurious_link_change(struct pci_dev *pdev) +{ + wait_event(pci_hp_link_change_wq, + !test_bit(PCI_LINK_CHANGING, &pdev->priv_flags)); + + return test_and_clear_bit(PCI_LINK_CHANGED, &pdev->priv_flags); +} + static int __init pci_hotplug_init(void) { int result; diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 273dd8c66f4eff..debc79b0adfb2c 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -187,6 +187,7 @@ int pciehp_card_present(struct controller *ctrl); int pciehp_card_present_or_link_active(struct controller *ctrl); int pciehp_check_link_status(struct controller *ctrl); int pciehp_check_link_active(struct controller *ctrl); +bool pciehp_device_replaced(struct controller *ctrl); void pciehp_release_ctrl(struct controller *ctrl); int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 997841c6989359..f59baa91297099 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -284,35 +284,6 @@ static int pciehp_suspend(struct pcie_device *dev) return 0; } -static bool pciehp_device_replaced(struct controller *ctrl) -{ - struct pci_dev *pdev __free(pci_dev_put) = NULL; - u32 reg; - - if (pci_dev_is_disconnected(ctrl->pcie->port)) - return false; - - pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); - if (!pdev) - return true; - - if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || - reg != (pdev->vendor | (pdev->device << 16)) || - pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || - reg != (pdev->revision | (pdev->class << 8))) - return true; - - if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && - (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || - reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) - return true; - - if (pci_get_dsn(pdev) != ctrl->dsn) - return true; - - return false; -} - static int pciehp_resume_noirq(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 8a09fb6083e276..ebd342bda235d4 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -563,20 +563,50 @@ void pciehp_power_off_slot(struct controller *ctrl) PCI_EXP_SLTCTL_PWR_OFF); } -static void pciehp_ignore_dpc_link_change(struct controller *ctrl, - struct pci_dev *pdev, int irq) +bool pciehp_device_replaced(struct controller *ctrl) +{ + struct pci_dev *pdev __free(pci_dev_put) = NULL; + u32 reg; + + if (pci_dev_is_disconnected(ctrl->pcie->port)) + return false; + + pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); + if (!pdev) + return true; + + if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || + reg != (pdev->vendor | (pdev->device << 16)) || + pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || + reg != (pdev->revision | (pdev->class << 8))) + return true; + + if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || + reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) + return true; + + if (pci_get_dsn(pdev) != ctrl->dsn) + return true; + + return false; +} + +static void pciehp_ignore_link_change(struct controller *ctrl, + struct pci_dev *pdev, int irq, + u16 ignored_events) { /* * Ignore link changes which occurred while waiting for DPC recovery. * Could be several if DPC triggered multiple times consecutively. + * Also ignore link changes caused by Secondary Bus Reset, etc. */ synchronize_hardirq(irq); - atomic_and(~PCI_EXP_SLTSTA_DLLSC, &ctrl->pending_events); + atomic_and(~ignored_events, &ctrl->pending_events); if (pciehp_poll_mode) pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_DLLSC); - ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", - slot_name(ctrl)); + ignored_events); + ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored\n", slot_name(ctrl)); /* * If the link is unexpectedly down after successful recovery, @@ -584,8 +614,8 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl, * Synthesize it to ensure that it is acted on. */ down_read_nested(&ctrl->reset_lock, ctrl->depth); - if (!pciehp_check_link_active(ctrl)) - pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + if (!pciehp_check_link_active(ctrl) || pciehp_device_replaced(ctrl)) + pciehp_request(ctrl, ignored_events); up_read(&ctrl->reset_lock); } @@ -732,12 +762,19 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) /* * Ignore Link Down/Up events caused by Downstream Port Containment - * if recovery from the error succeeded. + * if recovery succeeded, or caused by Secondary Bus Reset, + * suspend to D3cold, firmware update, FPGA reconfiguration, etc. */ - if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && + if ((events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) && + (pci_dpc_recovered(pdev) || pci_hp_spurious_link_change(pdev)) && ctrl->state == ON_STATE) { - events &= ~PCI_EXP_SLTSTA_DLLSC; - pciehp_ignore_dpc_link_change(ctrl, pdev, irq); + u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; + + if (!ctrl->inband_presence_disabled) + ignored_events |= events & PCI_EXP_SLTSTA_PDC; + + events &= ~ignored_events; + pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); } /* @@ -902,7 +939,6 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) { struct controller *ctrl = to_ctrl(hotplug_slot); struct pci_dev *pdev = ctrl_dev(ctrl); - u16 stat_mask = 0, ctrl_mask = 0; int rc; if (probe) @@ -910,23 +946,11 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) down_write_nested(&ctrl->reset_lock, ctrl->depth); - if (!ATTN_BUTTN(ctrl)) { - ctrl_mask |= PCI_EXP_SLTCTL_PDCE; - stat_mask |= PCI_EXP_SLTSTA_PDC; - } - ctrl_mask |= PCI_EXP_SLTCTL_DLLSCE; - stat_mask |= PCI_EXP_SLTSTA_DLLSC; - - pcie_write_cmd(ctrl, 0, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0); + pci_hp_ignore_link_change(pdev); rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port); - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); - pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); + pci_hp_unignore_link_change(pdev); up_write(&ctrl->reset_lock); return rc; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 055518ee354dc9..e9e9aaa91770ae 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -59,7 +59,6 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); if (pdev && pci_num_vf(pdev)) { - pci_dev_put(pdev); rc = -EBUSY; goto out; } diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6569ba3577fe63..8b884878861842 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -615,6 +615,9 @@ void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) void __iomem *addr = pci_msix_desc_addr(desc); desc->pci.msi_attrib.can_mask = 1; + /* Workaround for SUN NIU insanity, which requires write before read */ + if (dev->dev_flags & PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST) + writel(0, addr + PCI_MSIX_ENTRY_DATA); desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); } } diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index af370628e58393..b78e0e41732445 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1676,24 +1676,19 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); - if (!root_ops) { - kfree(ri); - return NULL; - } + if (!root_ops) + goto free_ri; ri->cfg = pci_acpi_setup_ecam_mapping(root); - if (!ri->cfg) { - kfree(ri); - kfree(root_ops); - return NULL; - } + if (!ri->cfg) + goto free_root_ops; root_ops->release_info = pci_acpi_generic_release_info; root_ops->prepare_resources = pci_acpi_root_prepare_resources; root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops; bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) - return NULL; + goto free_cfg; /* If we must preserve the resource configuration, claim now */ host = pci_find_host_bridge(bus); @@ -1710,6 +1705,14 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); return bus; + +free_cfg: + pci_ecam_free(ri->cfg); +free_root_ops: + kfree(root_ops); +free_ri: + kfree(ri); + return NULL; } void pcibios_add_bus(struct pci_bus *bus) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4d7c9f64ea24ec..4d84ed41248442 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4954,7 +4954,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type) delay); if (!pcie_wait_for_link_delay(dev, true, delay)) { /* Did not train, no need to wait any further */ - pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n"); + pci_info(dev, "Data Link Layer Link Active not set in %d msec\n", delay); return -ENOTTY; } @@ -5429,8 +5429,6 @@ static bool pci_bus_resettable(struct pci_bus *bus) return false; list_for_each_entry(dev, &bus->devices, bus_list) { - if (!pci_reset_supported(dev)) - return false; if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || (dev->subordinate && !pci_bus_resettable(dev->subordinate))) return false; @@ -5507,8 +5505,6 @@ static bool pci_slot_resettable(struct pci_slot *slot) list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot != slot) continue; - if (!pci_reset_supported(dev)) - return false; if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || (dev->subordinate && !pci_bus_resettable(dev->subordinate))) return false; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b81e99cd4b62a3..7db798bdcaaae6 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -227,6 +227,7 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; } /* Functions for PCI Hotplug drivers to use */ int pci_hp_add_bridge(struct pci_dev *dev); +bool pci_hp_spurious_link_change(struct pci_dev *pdev); #if defined(CONFIG_SYSFS) && defined(HAVE_PCI_LEGACY) void pci_create_legacy_files(struct pci_bus *bus); @@ -557,6 +558,8 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 #define PCI_DEV_REMOVED 3 +#define PCI_LINK_CHANGED 4 +#define PCI_LINK_CHANGING 5 static inline void pci_dev_assign_added(struct pci_dev *dev) { diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index df42f15c98295f..9d85f1b3b76112 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -258,40 +258,48 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, void dpc_process_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; - struct aer_err_info info; + struct aer_err_info info = {}; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); - pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); - - pci_info(pdev, "containment event, status:%#06x source:%#06x\n", - status, source); reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN; - ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; - pci_warn(pdev, "%s detected\n", - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ? - "unmasked uncorrectable error" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ? - "ERR_NONFATAL" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? - "ERR_FATAL" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? - "RP PIO error" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? - "software trigger" : - "reserved error"); - - /* show RP PIO error detail information */ - if (pdev->dpc_rp_extensions && - reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT && - ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) - dpc_process_rp_pio_error(pdev); - else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR && - dpc_get_aer_uncorrect_severity(pdev, &info) && - aer_get_device_error_info(pdev, &info)) { - aer_print_error(pdev, &info); - pci_aer_clear_nonfatal_status(pdev); - pci_aer_clear_fatal_status(pdev); + + switch (reason) { + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR: + pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n", + status); + if (dpc_get_aer_uncorrect_severity(pdev, &info) && + aer_get_device_error_info(pdev, &info)) { + aer_print_error(pdev, &info); + pci_aer_clear_nonfatal_status(pdev); + pci_aer_clear_fatal_status(pdev); + } + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE: + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE: + pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, + &source); + pci_warn(pdev, "containment event, status:%#06x, %s received from %04x:%02x:%02x.%d\n", + status, + (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? + "ERR_FATAL" : "ERR_NONFATAL", + pci_domain_nr(pdev->bus), PCI_BUS_NUM(source), + PCI_SLOT(source), PCI_FUNC(source)); + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT: + ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; + pci_warn(pdev, "containment event, status:%#06x: %s detected\n", + status, + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? + "RP PIO error" : + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? + "software trigger" : + "reserved error"); + /* show RP PIO error detail information */ + if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO && + pdev->dpc_rp_extensions) + dpc_process_rp_pio_error(pdev); + break; } } diff --git a/drivers/pci/pwrctrl/core.c b/drivers/pci/pwrctrl/core.c index 9cc7e2b7f2b560..6bdbfed584d6d7 100644 --- a/drivers/pci/pwrctrl/core.c +++ b/drivers/pci/pwrctrl/core.c @@ -101,6 +101,8 @@ EXPORT_SYMBOL_GPL(pci_pwrctrl_device_set_ready); */ void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl) { + cancel_work_sync(&pwrctrl->work); + /* * We don't have to delete the link here. Typically, this function * is only called when the power control device is being detached. If diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8d610c17e0f2f0..4822a23dd357d4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1990,12 +1990,12 @@ static void quirk_huawei_pcie_sva(struct pci_dev *pdev) device_create_managed_software_node(&pdev->dev, properties, NULL)) pci_warn(pdev, "could not add stall property"); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa250, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa251, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa255, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa256, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa258, quirk_huawei_pcie_sva); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_HUAWEI, 0xa259, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa250, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa251, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa255, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa256, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa258, quirk_huawei_pcie_sva); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0xa259, quirk_huawei_pcie_sva); /* * It's possible for the MSI to get corrupted if SHPC and ACPI are used @@ -3747,6 +3747,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; } +static bool acs_on_downstream; +static bool acs_on_multifunction; + +#define NUM_ACS_IDS 16 +struct acs_on_id { + unsigned short vendor; + unsigned short device; +}; +static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; +static u8 max_acs_id; + +static __init int pcie_acs_override_setup(char *p) +{ + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "downstream", 10)) + acs_on_downstream = true; + if (!strncmp(p, "multifunction", 13)) + acs_on_multifunction = true; + if (!strncmp(p, "id:", 3)) { + char opt[5]; + int ret; + long val; + + if (max_acs_id >= NUM_ACS_IDS - 1) { + pr_warn("Out of PCIe ACS override slots (%d)\n", + NUM_ACS_IDS); + goto next; + } + + p += 3; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].vendor = val; + + p += strcspn(p, ":"); + if (*p != ':') { + pr_warn("PCIe ACS invalid ID\n"); + goto next; + } + + p++; + snprintf(opt, 5, "%s", p); + ret = kstrtol(opt, 16, &val); + if (ret) { + pr_warn("PCIe ACS ID parse error %d\n", ret); + goto next; + } + acs_on_ids[max_acs_id].device = val; + max_acs_id++; + } +next: + p += strcspn(p, ","); + if (*p == ',') + p++; + } + + if (acs_on_downstream || acs_on_multifunction || max_acs_id) + pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); + + return 0; +} +early_param("pcie_acs_override", pcie_acs_override_setup); + +static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) +{ + int i; + + /* Never override ACS for legacy devices or devices with ACS caps */ + if (!pci_is_pcie(dev) || + pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) + return -ENOTTY; + + for (i = 0; i < max_acs_id; i++) + if (acs_on_ids[i].vendor == dev->vendor && + acs_on_ids[i].device == dev->device) + return 1; + + switch (pci_pcie_type(dev)) { + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_ROOT_PORT: + if (acs_on_downstream) + return 1; + break; + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + if (acs_on_multifunction && dev->multifunction) + return 1; + } + + return -ENOTTY; +} /* * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be * prevented for those affected devices. @@ -5171,6 +5271,7 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, /* Wangxun nics */ { PCI_VENDOR_ID_WANGXUN, PCI_ANY_ID, pci_quirk_wangxun_nic_acs }, + { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, { 0 } }; diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 54d6f4fa3ce166..e994c546422c9e 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -187,6 +187,9 @@ static void pdev_sort_resources(struct pci_dev *dev, struct list_head *head) panic("%s: kzalloc() failed!\n", __func__); tmp->res = r; tmp->dev = dev; + tmp->start = r->start; + tmp->end = r->end; + tmp->flags = r->flags; /* Fallback is smallest one or list is empty */ n = head; @@ -545,6 +548,7 @@ static void __assign_resources_sorted(struct list_head *head, pci_dbg(dev, "%s %pR: releasing\n", res_name, res); release_resource(res); + restore_dev_resource(dev_res); } /* Restore start/end/flags from saved list */ list_for_each_entry(save_res, &save_head, list) diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c index 07446d784a1a64..c1e755c356a333 100644 --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c @@ -511,7 +511,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev) fmt_attr_fill(pmu->info.hw_info->fmt_attr); - pmu->cpu = smp_processor_id(); + pmu->cpu = raw_smp_processor_id(); name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME); if (!name) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index d4fe30ff225b6a..403850b1040d3d 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -727,8 +727,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, if ((chan == 5 && cmn->rsp_vc_num < 2) || (chan == 6 && cmn->dat_vc_num < 2) || - (chan == 7 && cmn->snp_vc_num < 2) || - (chan == 8 && cmn->req_vc_num < 2)) + (chan == 7 && cmn->req_vc_num < 2) || + (chan == 8 && cmn->snp_vc_num < 2)) return 0; } @@ -882,8 +882,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, _CMN_EVENT_XP(pub_##_name, (_event) | (4 << 5)), \ _CMN_EVENT_XP(rsp2_##_name, (_event) | (5 << 5)), \ _CMN_EVENT_XP(dat2_##_name, (_event) | (6 << 5)), \ - _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)), \ - _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5)) + _CMN_EVENT_XP(req2_##_name, (_event) | (7 << 5)), \ + _CMN_EVENT_XP(snp2_##_name, (_event) | (8 << 5)) #define CMN_EVENT_XP_DAT(_name, _event) \ _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)), \ @@ -2558,6 +2558,7 @@ static int arm_cmn_probe(struct platform_device *pdev) cmn->dev = &pdev->dev; cmn->part = (unsigned long)device_get_match_data(cmn->dev); + cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); platform_set_drvdata(pdev, cmn); if (cmn->part == PART_CMN600 && has_acpi_companion(cmn->dev)) { @@ -2585,7 +2586,6 @@ static int arm_cmn_probe(struct platform_device *pdev) if (err) return err; - cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev)); cmn->pmu = (struct pmu) { .module = THIS_MODULE, .parent = cmn->dev, @@ -2651,6 +2651,7 @@ static const struct acpi_device_id arm_cmn_acpi_match[] = { { "ARMHC600", PART_CMN600 }, { "ARMHC650" }, { "ARMHC700" }, + { "ARMHC003" }, {} }; MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match); diff --git a/drivers/perf/arm-ni.c b/drivers/perf/arm-ni.c index fd7a5e60e96302..de7b6cce4d68a8 100644 --- a/drivers/perf/arm-ni.c +++ b/drivers/perf/arm-ni.c @@ -575,6 +575,23 @@ static int arm_ni_init_cd(struct arm_ni *ni, struct arm_ni_node *node, u64 res_s return err; } +static void arm_ni_remove(struct platform_device *pdev) +{ + struct arm_ni *ni = platform_get_drvdata(pdev); + + for (int i = 0; i < ni->num_cds; i++) { + struct arm_ni_cd *cd = ni->cds + i; + + if (!cd->pmu_base) + continue; + + writel_relaxed(0, cd->pmu_base + NI_PMCR); + writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); + perf_pmu_unregister(&cd->pmu); + cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); + } +} + static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node) { u32 reg = readl_relaxed(base + NI_NODE_TYPE); @@ -643,6 +660,7 @@ static int arm_ni_probe(struct platform_device *pdev) ni->num_cds = num_cds; ni->part = part; ni->id = atomic_fetch_inc(&id); + platform_set_drvdata(pdev, ni); for (int v = 0; v < cfg.num_components; v++) { reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v)); @@ -656,8 +674,11 @@ static int arm_ni_probe(struct platform_device *pdev) reg = readl_relaxed(pd.base + NI_CHILD_PTR(c)); arm_ni_probe_domain(base + reg, &cd); ret = arm_ni_init_cd(ni, &cd, res->start); - if (ret) + if (ret) { + ni->cds[cd.id].pmu_base = NULL; + arm_ni_remove(pdev); return ret; + } } } } @@ -665,23 +686,6 @@ static int arm_ni_probe(struct platform_device *pdev) return 0; } -static void arm_ni_remove(struct platform_device *pdev) -{ - struct arm_ni *ni = platform_get_drvdata(pdev); - - for (int i = 0; i < ni->num_cds; i++) { - struct arm_ni_cd *cd = ni->cds + i; - - if (!cd->pmu_base) - continue; - - writel_relaxed(0, cd->pmu_base + NI_PMCR); - writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); - perf_pmu_unregister(&cd->pmu); - cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); - } -} - #ifdef CONFIG_OF static const struct of_device_id arm_ni_of_match[] = { { .compatible = "arm,ni-700" }, diff --git a/drivers/phy/phy-can-transceiver.c b/drivers/phy/phy-can-transceiver.c index 2bec70615449f9..f59caff4b3d4c2 100644 --- a/drivers/phy/phy-can-transceiver.c +++ b/drivers/phy/phy-can-transceiver.c @@ -93,6 +93,16 @@ static const struct of_device_id can_transceiver_phy_ids[] = { }; MODULE_DEVICE_TABLE(of, can_transceiver_phy_ids); +/* Temporary wrapper until the multiplexer subsystem supports optional muxes */ +static inline struct mux_state * +devm_mux_state_get_optional(struct device *dev, const char *mux_name) +{ + if (!of_property_present(dev->of_node, "mux-states")) + return NULL; + + return devm_mux_state_get(dev, mux_name); +} + static int can_transceiver_phy_probe(struct platform_device *pdev) { struct phy_provider *phy_provider; @@ -114,13 +124,11 @@ static int can_transceiver_phy_probe(struct platform_device *pdev) match = of_match_node(can_transceiver_phy_ids, pdev->dev.of_node); drvdata = match->data; - mux_state = devm_mux_state_get(dev, NULL); - if (IS_ERR(mux_state)) { - if (PTR_ERR(mux_state) == -EPROBE_DEFER) - return PTR_ERR(mux_state); - } else { - can_transceiver_phy->mux_state = mux_state; - } + mux_state = devm_mux_state_get_optional(dev, NULL); + if (IS_ERR(mux_state)) + return PTR_ERR(mux_state); + + can_transceiver_phy->mux_state = mux_state; phy = devm_phy_create(dev, dev->of_node, &can_transceiver_phy_ops); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c index 45b3b792696e9e..b33e2e2b5014d3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c @@ -1754,7 +1754,8 @@ static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg qmp_ufs_init_all(qmp, &cfg->tbls_hs_overlay[i]); } - qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); + if (qmp->mode == PHY_MODE_UFS_HS_B) + qmp_ufs_init_all(qmp, &cfg->tbls_hs_b); } static int qmp_ufs_com_init(struct qmp_ufs *qmp) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 78772157045752..ed646a7e705ba3 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -2106,12 +2106,16 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np, int index, bool exclusive) { struct resource res; + void __iomem *mem; if (!exclusive) { if (of_address_to_resource(np, index, &res)) return IOMEM_ERR_PTR(-EINVAL); - return devm_ioremap(dev, res.start, resource_size(&res)); + mem = devm_ioremap(dev, res.start, resource_size(&res)); + if (!mem) + return IOMEM_ERR_PTR(-ENOMEM); + return mem; } return devm_of_iomap(dev, np, index, NULL); diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 1f5f7df14d5a2f..49c37c53b38e70 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -151,21 +151,6 @@ static const struct qusb2_phy_init_tbl ipq6018_init_tbl[] = { QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9F), }; -static const struct qusb2_phy_init_tbl ipq5424_init_tbl[] = { - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL, 0x14), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0x00), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0x53), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE4, 0xc3), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TUNE, 0x30), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL1, 0x79), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL2, 0x21), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE5, 0x00), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00), - QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TEST2, 0x14), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TEST, 0x80), - QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9f), -}; - static const struct qusb2_phy_init_tbl qcs615_init_tbl[] = { QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0xc8), QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0xb3), @@ -359,16 +344,6 @@ static const struct qusb2_phy_cfg ipq6018_phy_cfg = { .autoresume_en = BIT(0), }; -static const struct qusb2_phy_cfg ipq5424_phy_cfg = { - .tbl = ipq5424_init_tbl, - .tbl_num = ARRAY_SIZE(ipq5424_init_tbl), - .regs = ipq6018_regs_layout, - - .disable_ctrl = POWER_DOWN, - .mask_core_ready = PLL_LOCKED, - .autoresume_en = BIT(0), -}; - static const struct qusb2_phy_cfg qcs615_phy_cfg = { .tbl = qcs615_init_tbl, .tbl_num = ARRAY_SIZE(qcs615_init_tbl), @@ -955,7 +930,7 @@ static const struct phy_ops qusb2_phy_gen_ops = { static const struct of_device_id qusb2_phy_of_match_table[] = { { .compatible = "qcom,ipq5424-qusb2-phy", - .data = &ipq5424_phy_cfg, + .data = &ipq6018_phy_cfg, }, { .compatible = "qcom,ipq6018-qusb2-phy", .data = &ipq6018_phy_cfg, diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index 775f4f973a6cc2..9fdf17e0848a28 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -9,6 +9,7 @@ * Copyright (C) 2014 Cogent Embedded, Inc. */ +#include #include #include #include @@ -107,7 +108,6 @@ struct rcar_gen3_phy { struct rcar_gen3_chan *ch; u32 int_enable_bits; bool initialized; - bool otg_initialized; bool powered; }; @@ -119,9 +119,8 @@ struct rcar_gen3_chan { struct regulator *vbus; struct reset_control *rstc; struct work_struct work; - struct mutex lock; /* protects rphys[...].powered */ + spinlock_t lock; /* protects access to hardware and driver data structure. */ enum usb_dr_mode dr_mode; - int irq; u32 obint_enable_bits; bool extcon_host; bool is_otg_channel; @@ -320,16 +319,15 @@ static bool rcar_gen3_is_any_rphy_initialized(struct rcar_gen3_chan *ch) return false; } -static bool rcar_gen3_needs_init_otg(struct rcar_gen3_chan *ch) +static bool rcar_gen3_is_any_otg_rphy_initialized(struct rcar_gen3_chan *ch) { - int i; - - for (i = 0; i < NUM_OF_PHYS; i++) { - if (ch->rphys[i].otg_initialized) - return false; + for (enum rcar_gen3_phy_index i = PHY_INDEX_BOTH_HC; i <= PHY_INDEX_EHCI; + i++) { + if (ch->rphys[i].initialized) + return true; } - return true; + return false; } static bool rcar_gen3_are_all_rphys_power_off(struct rcar_gen3_chan *ch) @@ -351,7 +349,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, bool is_b_device; enum phy_mode cur_mode, new_mode; - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + guard(spinlock_irqsave)(&ch->lock); + + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; if (sysfs_streq(buf, "host")) @@ -389,7 +389,7 @@ static ssize_t role_show(struct device *dev, struct device_attribute *attr, { struct rcar_gen3_chan *ch = dev_get_drvdata(dev); - if (!ch->is_otg_channel || !rcar_gen3_is_any_rphy_initialized(ch)) + if (!ch->is_otg_channel || !rcar_gen3_is_any_otg_rphy_initialized(ch)) return -EIO; return sprintf(buf, "%s\n", rcar_gen3_is_host(ch) ? "host" : @@ -402,6 +402,9 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) void __iomem *usb2_base = ch->base; u32 val; + if (!ch->is_otg_channel || rcar_gen3_is_any_otg_rphy_initialized(ch)) + return; + /* Should not use functions of read-modify-write a register */ val = readl(usb2_base + USB2_LINECTRL1); val = (val & ~USB2_LINECTRL1_DP_RPD) | USB2_LINECTRL1_DPRPD_EN | @@ -415,7 +418,7 @@ static void rcar_gen3_init_otg(struct rcar_gen3_chan *ch) val = readl(usb2_base + USB2_ADPCTRL); writel(val | USB2_ADPCTRL_IDPULLUP, usb2_base + USB2_ADPCTRL); } - msleep(20); + mdelay(20); writel(0xffffffff, usb2_base + USB2_OBINTSTA); writel(ch->obint_enable_bits, usb2_base + USB2_OBINTEN); @@ -427,16 +430,27 @@ static irqreturn_t rcar_gen3_phy_usb2_irq(int irq, void *_ch) { struct rcar_gen3_chan *ch = _ch; void __iomem *usb2_base = ch->base; - u32 status = readl(usb2_base + USB2_OBINTSTA); + struct device *dev = ch->dev; irqreturn_t ret = IRQ_NONE; + u32 status; + + pm_runtime_get_noresume(dev); + + if (pm_runtime_suspended(dev)) + goto rpm_put; - if (status & ch->obint_enable_bits) { - dev_vdbg(ch->dev, "%s: %08x\n", __func__, status); - writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); - rcar_gen3_device_recognition(ch); - ret = IRQ_HANDLED; + scoped_guard(spinlock, &ch->lock) { + status = readl(usb2_base + USB2_OBINTSTA); + if (status & ch->obint_enable_bits) { + dev_vdbg(dev, "%s: %08x\n", __func__, status); + writel(ch->obint_enable_bits, usb2_base + USB2_OBINTSTA); + rcar_gen3_device_recognition(ch); + ret = IRQ_HANDLED; + } } +rpm_put: + pm_runtime_put_noidle(dev); return ret; } @@ -446,32 +460,23 @@ static int rcar_gen3_phy_usb2_init(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; void __iomem *usb2_base = channel->base; u32 val; - int ret; - if (!rcar_gen3_is_any_rphy_initialized(channel) && channel->irq >= 0) { - INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); - ret = request_irq(channel->irq, rcar_gen3_phy_usb2_irq, - IRQF_SHARED, dev_name(channel->dev), channel); - if (ret < 0) { - dev_err(channel->dev, "No irq handler (%d)\n", channel->irq); - return ret; - } - } + guard(spinlock_irqsave)(&channel->lock); /* Initialize USB2 part */ val = readl(usb2_base + USB2_INT_ENABLE); val |= USB2_INT_ENABLE_UCOM_INTEN | rphy->int_enable_bits; writel(val, usb2_base + USB2_INT_ENABLE); - writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); - writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); - - /* Initialize otg part */ - if (channel->is_otg_channel) { - if (rcar_gen3_needs_init_otg(channel)) - rcar_gen3_init_otg(channel); - rphy->otg_initialized = true; + + if (!rcar_gen3_is_any_rphy_initialized(channel)) { + writel(USB2_SPD_RSM_TIMSET_INIT, usb2_base + USB2_SPD_RSM_TIMSET); + writel(USB2_OC_TIMSET_INIT, usb2_base + USB2_OC_TIMSET); } + /* Initialize otg part (only if we initialize a PHY with IRQs). */ + if (rphy->int_enable_bits) + rcar_gen3_init_otg(channel); + rphy->initialized = true; return 0; @@ -484,10 +489,9 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) void __iomem *usb2_base = channel->base; u32 val; - rphy->initialized = false; + guard(spinlock_irqsave)(&channel->lock); - if (channel->is_otg_channel) - rphy->otg_initialized = false; + rphy->initialized = false; val = readl(usb2_base + USB2_INT_ENABLE); val &= ~rphy->int_enable_bits; @@ -495,9 +499,6 @@ static int rcar_gen3_phy_usb2_exit(struct phy *p) val &= ~USB2_INT_ENABLE_UCOM_INTEN; writel(val, usb2_base + USB2_INT_ENABLE); - if (channel->irq >= 0 && !rcar_gen3_is_any_rphy_initialized(channel)) - free_irq(channel->irq, channel); - return 0; } @@ -509,16 +510,17 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) u32 val; int ret = 0; - mutex_lock(&channel->lock); - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; - if (channel->vbus) { ret = regulator_enable(channel->vbus); if (ret) - goto out; + return ret; } + guard(spinlock_irqsave)(&channel->lock); + + if (!rcar_gen3_are_all_rphys_power_off(channel)) + goto out; + val = readl(usb2_base + USB2_USBCTR); val |= USB2_USBCTR_PLL_RST; writel(val, usb2_base + USB2_USBCTR); @@ -528,7 +530,6 @@ static int rcar_gen3_phy_usb2_power_on(struct phy *p) out: /* The powered flag should be set for any other phys anyway */ rphy->powered = true; - mutex_unlock(&channel->lock); return 0; } @@ -539,18 +540,20 @@ static int rcar_gen3_phy_usb2_power_off(struct phy *p) struct rcar_gen3_chan *channel = rphy->ch; int ret = 0; - mutex_lock(&channel->lock); - rphy->powered = false; + scoped_guard(spinlock_irqsave, &channel->lock) { + rphy->powered = false; - if (!rcar_gen3_are_all_rphys_power_off(channel)) - goto out; + if (rcar_gen3_are_all_rphys_power_off(channel)) { + u32 val = readl(channel->base + USB2_USBCTR); + + val |= USB2_USBCTR_PLL_RST; + writel(val, channel->base + USB2_USBCTR); + } + } if (channel->vbus) ret = regulator_disable(channel->vbus); -out: - mutex_unlock(&channel->lock); - return ret; } @@ -703,7 +706,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct rcar_gen3_chan *channel; struct phy_provider *provider; - int ret = 0, i; + int ret = 0, i, irq; if (!dev->of_node) { dev_err(dev, "This driver needs device tree\n"); @@ -719,8 +722,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) return PTR_ERR(channel->base); channel->obint_enable_bits = USB2_OBINT_BITS; - /* get irq number here and request_irq for OTG in phy_init */ - channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { channel->is_otg_channel = true; @@ -763,7 +764,7 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) if (phy_data->no_adp_ctrl) channel->obint_enable_bits = USB2_OBINT_IDCHG_EN; - mutex_init(&channel->lock); + spin_lock_init(&channel->lock); for (i = 0; i < NUM_OF_PHYS; i++) { channel->rphys[i].phy = devm_phy_create(dev, NULL, phy_data->phy_usb2_ops); @@ -789,6 +790,20 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->vbus = NULL; } + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) { + ret = irq; + goto error; + } else if (irq > 0) { + INIT_WORK(&channel->work, rcar_gen3_phy_usb2_work); + ret = devm_request_irq(dev, irq, rcar_gen3_phy_usb2_irq, + IRQF_SHARED, dev_name(dev), channel); + if (ret < 0) { + dev_err(dev, "Failed to request irq (%d)\n", irq); + goto error; + } + } + provider = devm_of_phy_provider_register(dev, rcar_gen3_phy_usb2_xlate); if (IS_ERR(provider)) { dev_err(dev, "Failed to register PHY provider\n"); diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c index 08c78c1bafc9a2..28a052e1736651 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c @@ -1653,7 +1653,7 @@ static __maybe_unused int samsung_mipi_dcphy_runtime_resume(struct device *dev) return ret; } - clk_prepare_enable(samsung->ref_clk); + ret = clk_prepare_enable(samsung->ref_clk); if (ret) { dev_err(samsung->dev, "Failed to enable reference clock, %d\n", ret); clk_disable_unprepare(samsung->pclk); diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c index fe7c0574835636..61db514ce5cfb5 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c @@ -320,6 +320,7 @@ #define LN3_TX_SER_RATE_SEL_HBR2_MASK BIT(3) #define LN3_TX_SER_RATE_SEL_HBR3_MASK BIT(2) +#define HDMI14_MAX_RATE 340000000 #define HDMI20_MAX_RATE 600000000 enum dp_link_rate { @@ -476,6 +477,8 @@ static const struct ropll_config ropll_tmds_cfg[] = { 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, + { 502500, 84, 84, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 11, 1, 4, 5, + 4, 11, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5, 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, @@ -1005,9 +1008,7 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, { const struct ropll_config *cfg = NULL; struct ropll_config rc = {0}; - int i; - - hdptx->rate = rate * 100; + int ret, i; for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++) if (rate == ropll_tmds_cfg[i].bit_rate) { @@ -1062,7 +1063,11 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN_MASK, FIELD_PREP(PLL_PCG_CLK_EN_MASK, 0x1)); - return rk_hdptx_post_enable_pll(hdptx); + ret = rk_hdptx_post_enable_pll(hdptx); + if (!ret) + hdptx->rate = rate * 100; + + return ret; } static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx, @@ -1072,7 +1077,7 @@ static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx, regmap_write(hdptx->regmap, LNTOP_REG(0200), 0x06); - if (rate >= 3400000) { + if (rate > HDMI14_MAX_RATE / 100) { /* For 1/40 bitrate clk */ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_highbr_seq); } else { diff --git a/drivers/phy/starfive/phy-jh7110-usb.c b/drivers/phy/starfive/phy-jh7110-usb.c index cb5454fbe2c8fa..b505d89860b439 100644 --- a/drivers/phy/starfive/phy-jh7110-usb.c +++ b/drivers/phy/starfive/phy-jh7110-usb.c @@ -18,6 +18,8 @@ #include #define USB_125M_CLK_RATE 125000000 +#define USB_CLK_MODE_OFF 0x0 +#define USB_CLK_MODE_RX_NORMAL_PWR BIT(1) #define USB_LS_KEEPALIVE_OFF 0x4 #define USB_LS_KEEPALIVE_ENABLE BIT(4) @@ -78,6 +80,7 @@ static int jh7110_usb2_phy_init(struct phy *_phy) { struct jh7110_usb2_phy *phy = phy_get_drvdata(_phy); int ret; + unsigned int val; ret = clk_set_rate(phy->usb_125m_clk, USB_125M_CLK_RATE); if (ret) @@ -87,6 +90,10 @@ static int jh7110_usb2_phy_init(struct phy *_phy) if (ret) return ret; + val = readl(phy->regs + USB_CLK_MODE_OFF); + val |= USB_CLK_MODE_RX_NORMAL_PWR; + writel(val, phy->regs + USB_CLK_MODE_OFF); + return 0; } diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c index fae6242aa730e0..23a23f2d64e586 100644 --- a/drivers/phy/tegra/xusb-tegra186.c +++ b/drivers/phy/tegra/xusb-tegra186.c @@ -237,6 +237,8 @@ #define DATA0_VAL_PD BIT(1) #define USE_XUSB_AO BIT(4) +#define TEGRA_UTMI_PAD_MAX 4 + #define TEGRA186_LANE(_name, _offset, _shift, _mask, _type) \ { \ .name = _name, \ @@ -269,7 +271,7 @@ struct tegra186_xusb_padctl { /* UTMI bias and tracking */ struct clk *usb2_trk_clk; - unsigned int bias_pad_enable; + DECLARE_BITMAP(utmi_pad_enabled, TEGRA_UTMI_PAD_MAX); /* padctl context */ struct tegra186_xusb_padctl_context context; @@ -603,12 +605,8 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) u32 value; int err; - mutex_lock(&padctl->lock); - - if (priv->bias_pad_enable++ > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } err = clk_prepare_enable(priv->usb2_trk_clk); if (err < 0) @@ -658,8 +656,6 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl) } else { clk_disable_unprepare(priv->usb2_trk_clk); } - - mutex_unlock(&padctl->lock); } static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) @@ -667,17 +663,8 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); u32 value; - mutex_lock(&padctl->lock); - - if (WARN_ON(priv->bias_pad_enable == 0)) { - mutex_unlock(&padctl->lock); - return; - } - - if (--priv->bias_pad_enable > 0) { - mutex_unlock(&padctl->lock); + if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX)) return; - } value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL1); value |= USB2_PD_TRK; @@ -690,13 +677,13 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl) clk_disable_unprepare(priv->usb2_trk_clk); } - mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_on(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); struct tegra_xusb_usb2_port *port; struct device *dev = padctl->dev; unsigned int index = lane->index; @@ -705,9 +692,16 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) if (!phy) return; + mutex_lock(&padctl->lock); + if (test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + port = tegra_xusb_find_usb2_port(padctl, index); if (!port) { dev_err(dev, "no port found for USB2 lane %u\n", index); + mutex_unlock(&padctl->lock); return; } @@ -724,18 +718,28 @@ static void tegra186_utmi_pad_power_on(struct phy *phy) value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); value &= ~USB2_OTG_PD_DR; padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index)); + + set_bit(index, priv->utmi_pad_enabled); + mutex_unlock(&padctl->lock); } static void tegra186_utmi_pad_power_down(struct phy *phy) { struct tegra_xusb_lane *lane = phy_get_drvdata(phy); struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl); unsigned int index = lane->index; u32 value; if (!phy) return; + mutex_lock(&padctl->lock); + if (!test_bit(index, priv->utmi_pad_enabled)) { + mutex_unlock(&padctl->lock); + return; + } + dev_dbg(padctl->dev, "power down UTMI pad %u\n", index); value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL0(index)); @@ -748,7 +752,11 @@ static void tegra186_utmi_pad_power_down(struct phy *phy) udelay(2); + clear_bit(index, priv->utmi_pad_enabled); + tegra186_utmi_bias_pad_power_off(padctl); + + mutex_unlock(&padctl->lock); } static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl, diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 79d4814d758d5e..c89df95aa6ca98 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -548,16 +548,16 @@ static int tegra_xusb_port_init(struct tegra_xusb_port *port, err = dev_set_name(&port->dev, "%s-%u", name, index); if (err < 0) - goto unregister; + goto put_device; err = device_add(&port->dev); if (err < 0) - goto unregister; + goto put_device; return 0; -unregister: - device_unregister(&port->dev); +put_device: + put_device(&port->dev); return err; } diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index 842a1e6cbfc41a..18de3132854045 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -37,16 +37,16 @@ static inline const struct group_desc *imx_pinctrl_find_group_by_name( struct pinctrl_dev *pctldev, const char *name) { - const struct group_desc *grp = NULL; + const struct group_desc *grp; int i; for (i = 0; i < pctldev->num_groups; i++) { grp = pinctrl_generic_get_group(pctldev, i); if (grp && !strcmp(grp->grp.name, name)) - break; + return grp; } - return grp; + return NULL; } static void imx_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index ced4ee509b5b6d..e235a98ae7ee58 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -22,7 +22,6 @@ #include #include "mtk-eint.h" -#include "pinctrl-mtk-common-v2.h" #define MTK_EINT_EDGE_SENSITIVE 0 #define MTK_EINT_LEVEL_SENSITIVE 1 @@ -449,7 +448,7 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_num, return -EOPNOTSUPP; virq = irq_find_mapping(eint->domain, eint_num); - eint_offset = (eint_num % 4) * 8; + eint_offset = (idx % 4) * 8; d = irq_get_irq_data(virq); set_offset = (idx / 4) * 4 + eint->regs->dbnc_set; @@ -505,10 +504,9 @@ int mtk_eint_find_irq(struct mtk_eint *eint, unsigned long eint_n) } EXPORT_SYMBOL_GPL(mtk_eint_find_irq); -int mtk_eint_do_init(struct mtk_eint *eint) +int mtk_eint_do_init(struct mtk_eint *eint, struct mtk_eint_pin *eint_pin) { - unsigned int size, i, port, inst = 0; - struct mtk_pinctrl *hw = (struct mtk_pinctrl *)eint->pctl; + unsigned int size, i, port, virq, inst = 0; /* If clients don't assign a specific regs, let's use generic one */ if (!eint->regs) @@ -519,7 +517,15 @@ int mtk_eint_do_init(struct mtk_eint *eint) if (!eint->base_pin_num) return -ENOMEM; - if (eint->nbase == 1) { + if (eint_pin) { + eint->pins = eint_pin; + for (i = 0; i < eint->hw->ap_num; i++) { + inst = eint->pins[i].instance; + if (inst >= eint->nbase) + continue; + eint->base_pin_num[inst]++; + } + } else { size = eint->hw->ap_num * sizeof(struct mtk_eint_pin); eint->pins = devm_kmalloc(eint->dev, size, GFP_KERNEL); if (!eint->pins) @@ -533,16 +539,6 @@ int mtk_eint_do_init(struct mtk_eint *eint) } } - if (hw && hw->soc && hw->soc->eint_pin) { - eint->pins = hw->soc->eint_pin; - for (i = 0; i < eint->hw->ap_num; i++) { - inst = eint->pins[i].instance; - if (inst >= eint->nbase) - continue; - eint->base_pin_num[inst]++; - } - } - eint->pin_list = devm_kmalloc(eint->dev, eint->nbase * sizeof(u16 *), GFP_KERNEL); if (!eint->pin_list) goto err_pin_list; @@ -584,7 +580,7 @@ int mtk_eint_do_init(struct mtk_eint *eint) if (inst >= eint->nbase) continue; eint->pin_list[inst][eint->pins[i].index] = i; - int virq = irq_create_mapping(eint->domain, i); + virq = irq_create_mapping(eint->domain, i); irq_set_chip_and_handler(virq, &mtk_eint_irq_chip, handle_level_irq); irq_set_chip_data(virq, eint); @@ -610,7 +606,7 @@ int mtk_eint_do_init(struct mtk_eint *eint) err_wake_mask: devm_kfree(eint->dev, eint->pin_list); err_pin_list: - if (eint->nbase == 1) + if (!eint_pin) devm_kfree(eint->dev, eint->pins); err_pins: devm_kfree(eint->dev, eint->base_pin_num); diff --git a/drivers/pinctrl/mediatek/mtk-eint.h b/drivers/pinctrl/mediatek/mtk-eint.h index f7f58cca0d5e31..fc31a4c0c77bf2 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.h +++ b/drivers/pinctrl/mediatek/mtk-eint.h @@ -66,7 +66,7 @@ struct mtk_eint_xt { struct mtk_eint { struct device *dev; void __iomem **base; - u8 nbase; + int nbase; u16 *base_pin_num; struct irq_domain *domain; int irq; @@ -88,7 +88,7 @@ struct mtk_eint { }; #if IS_ENABLED(CONFIG_EINT_MTK) -int mtk_eint_do_init(struct mtk_eint *eint); +int mtk_eint_do_init(struct mtk_eint *eint, struct mtk_eint_pin *eint_pin); int mtk_eint_do_suspend(struct mtk_eint *eint); int mtk_eint_do_resume(struct mtk_eint *eint); int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_n, @@ -96,7 +96,8 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_n, int mtk_eint_find_irq(struct mtk_eint *eint, unsigned long eint_n); #else -static inline int mtk_eint_do_init(struct mtk_eint *eint) +static inline int mtk_eint_do_init(struct mtk_eint *eint, + struct mtk_eint_pin *eint_pin) { return -EOPNOTSUPP; } diff --git a/drivers/pinctrl/mediatek/pinctrl-airoha.c b/drivers/pinctrl/mediatek/pinctrl-airoha.c index 547a798b71c8ae..5d84a778683d05 100644 --- a/drivers/pinctrl/mediatek/pinctrl-airoha.c +++ b/drivers/pinctrl/mediatek/pinctrl-airoha.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -112,39 +113,19 @@ #define REG_LAN_LED1_MAPPING 0x0280 #define LAN4_LED_MAPPING_MASK GENMASK(18, 16) -#define LAN4_PHY4_LED_MAP BIT(18) -#define LAN4_PHY2_LED_MAP BIT(17) -#define LAN4_PHY1_LED_MAP BIT(16) -#define LAN4_PHY0_LED_MAP 0 -#define LAN4_PHY3_LED_MAP GENMASK(17, 16) +#define LAN4_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN4_LED_MAPPING_MASK, (_n)) #define LAN3_LED_MAPPING_MASK GENMASK(14, 12) -#define LAN3_PHY4_LED_MAP BIT(14) -#define LAN3_PHY2_LED_MAP BIT(13) -#define LAN3_PHY1_LED_MAP BIT(12) -#define LAN3_PHY0_LED_MAP 0 -#define LAN3_PHY3_LED_MAP GENMASK(13, 12) +#define LAN3_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN3_LED_MAPPING_MASK, (_n)) #define LAN2_LED_MAPPING_MASK GENMASK(10, 8) -#define LAN2_PHY4_LED_MAP BIT(12) -#define LAN2_PHY2_LED_MAP BIT(11) -#define LAN2_PHY1_LED_MAP BIT(10) -#define LAN2_PHY0_LED_MAP 0 -#define LAN2_PHY3_LED_MAP GENMASK(11, 10) +#define LAN2_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN2_LED_MAPPING_MASK, (_n)) #define LAN1_LED_MAPPING_MASK GENMASK(6, 4) -#define LAN1_PHY4_LED_MAP BIT(6) -#define LAN1_PHY2_LED_MAP BIT(5) -#define LAN1_PHY1_LED_MAP BIT(4) -#define LAN1_PHY0_LED_MAP 0 -#define LAN1_PHY3_LED_MAP GENMASK(5, 4) +#define LAN1_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN1_LED_MAPPING_MASK, (_n)) #define LAN0_LED_MAPPING_MASK GENMASK(2, 0) -#define LAN0_PHY4_LED_MAP BIT(3) -#define LAN0_PHY2_LED_MAP BIT(2) -#define LAN0_PHY1_LED_MAP BIT(1) -#define LAN0_PHY0_LED_MAP 0 -#define LAN0_PHY3_LED_MAP GENMASK(2, 1) +#define LAN0_PHY_LED_MAP(_n) FIELD_PREP_CONST(LAN0_LED_MAPPING_MASK, (_n)) /* CONF */ #define REG_I2C_SDA_E2 0x001c @@ -1476,8 +1457,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1491,8 +1472,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1506,8 +1487,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1521,8 +1502,8 @@ static const struct airoha_pinctrl_func_group phy1_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1540,8 +1521,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1555,8 +1536,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1570,8 +1551,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1585,8 +1566,8 @@ static const struct airoha_pinctrl_func_group phy2_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1604,8 +1585,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1619,8 +1600,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1634,8 +1615,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1649,8 +1630,8 @@ static const struct airoha_pinctrl_func_group phy3_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1668,8 +1649,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1683,8 +1664,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1698,8 +1679,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1713,8 +1694,8 @@ static const struct airoha_pinctrl_func_group phy4_led0_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED0_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, @@ -1732,8 +1713,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY1_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1747,8 +1728,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY1_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1762,8 +1743,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY1_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(0) }, .regmap_size = 2, }, { @@ -1777,8 +1758,8 @@ static const struct airoha_pinctrl_func_group phy1_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY1_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(0) }, .regmap_size = 2, }, @@ -1796,8 +1777,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY2_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1811,8 +1792,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY2_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1826,8 +1807,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY2_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(1) }, .regmap_size = 2, }, { @@ -1841,8 +1822,8 @@ static const struct airoha_pinctrl_func_group phy2_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY2_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(1) }, .regmap_size = 2, }, @@ -1860,8 +1841,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY3_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1875,8 +1856,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY3_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1890,8 +1871,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY3_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(2) }, .regmap_size = 2, }, { @@ -1905,8 +1886,8 @@ static const struct airoha_pinctrl_func_group phy3_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY3_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(2) }, .regmap_size = 2, }, @@ -1924,8 +1905,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN1_LED_MAPPING_MASK, - LAN1_PHY4_LED_MAP + LAN0_LED_MAPPING_MASK, + LAN0_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1939,8 +1920,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN2_LED_MAPPING_MASK, - LAN2_PHY4_LED_MAP + LAN1_LED_MAPPING_MASK, + LAN1_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1954,8 +1935,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN3_LED_MAPPING_MASK, - LAN3_PHY4_LED_MAP + LAN2_LED_MAPPING_MASK, + LAN2_PHY_LED_MAP(3) }, .regmap_size = 2, }, { @@ -1969,8 +1950,8 @@ static const struct airoha_pinctrl_func_group phy4_led1_func_group[] = { .regmap[1] = { AIROHA_FUNC_MUX, REG_LAN_LED1_MAPPING, - LAN4_LED_MAPPING_MASK, - LAN4_PHY4_LED_MAP + LAN3_LED_MAPPING_MASK, + LAN3_PHY_LED_MAP(3) }, .regmap_size = 2, }, diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index d1556b75d9effd..4918d38abfc29d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -381,10 +381,13 @@ int mtk_build_eint(struct mtk_pinctrl *hw, struct platform_device *pdev) return -ENOMEM; count_reg_names = of_property_count_strings(np, "reg-names"); - if (count_reg_names < hw->soc->nbase_names) + if (count_reg_names < 0) + return -EINVAL; + + hw->eint->nbase = count_reg_names - (int)hw->soc->nbase_names; + if (hw->eint->nbase <= 0) return -EINVAL; - hw->eint->nbase = count_reg_names - hw->soc->nbase_names; hw->eint->base = devm_kmalloc_array(&pdev->dev, hw->eint->nbase, sizeof(*hw->eint->base), GFP_KERNEL | __GFP_ZERO); if (!hw->eint->base) { @@ -416,7 +419,7 @@ int mtk_build_eint(struct mtk_pinctrl *hw, struct platform_device *pdev) hw->eint->pctl = hw; hw->eint->gpio_xlate = &mtk_eint_xt; - ret = mtk_eint_do_init(hw->eint); + ret = mtk_eint_do_init(hw->eint, hw->soc->eint_pin); if (ret) goto err_free_eint; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 91edb539925a49..7289648eaa0259 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1015,9 +1015,15 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) if (!pctl->eint) return -ENOMEM; - pctl->eint->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(pctl->eint->base)) - return PTR_ERR(pctl->eint->base); + pctl->eint->nbase = 1; + /* mtk-eint expects an array */ + pctl->eint->base = devm_kzalloc(pctl->dev, sizeof(pctl->eint->base), GFP_KERNEL); + if (!pctl->eint->base) + return -ENOMEM; + + pctl->eint->base[0] = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pctl->eint->base[0])) + return PTR_ERR(pctl->eint->base[0]); pctl->eint->irq = irq_of_parse_and_map(np, 0); if (!pctl->eint->irq) @@ -1033,7 +1039,7 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) pctl->eint->pctl = pctl; pctl->eint->gpio_xlate = &mtk_eint_xt; - return mtk_eint_do_init(pctl->eint); + return mtk_eint_do_init(pctl->eint, NULL); } /* This is used as a common probe function */ diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 253a0cc57e396d..e5a32a0532eeec 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -487,7 +487,7 @@ static int meson_pinconf_get(struct pinctrl_dev *pcdev, unsigned int pin, case PIN_CONFIG_BIAS_PULL_DOWN: case PIN_CONFIG_BIAS_PULL_UP: if (meson_pinconf_get_pull(pc, pin) == param) - arg = 1; + arg = 60000; else return -EINVAL; break; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index 335744ac831057..79f9c08e5039c3 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -417,20 +417,22 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { struct armada_37xx_pinctrl *info = gpiochip_get_data(chip); - unsigned int reg = OUTPUT_EN; + unsigned int en_offset = offset; + unsigned int reg = OUTPUT_VAL; unsigned int mask, val, ret; armada_37xx_update_reg(®, &offset); mask = BIT(offset); + val = value ? mask : 0; - ret = regmap_update_bits(info->regmap, reg, mask, mask); - + ret = regmap_update_bits(info->regmap, reg, mask, val); if (ret) return ret; - reg = OUTPUT_VAL; - val = value ? mask : 0; - regmap_update_bits(info->regmap, reg, mask, val); + reg = OUTPUT_EN; + armada_37xx_update_reg(®, &en_offset); + + regmap_update_bits(info->regmap, reg, mask, mask); return 0; } diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 93ab277d9943cf..fbe74e4ef320c1 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1819,12 +1819,16 @@ static int at91_gpio_probe(struct platform_device *pdev) struct at91_gpio_chip *at91_chip = NULL; struct gpio_chip *chip; struct pinctrl_gpio_range *range; + int alias_idx; int ret = 0; int irq, i; - int alias_idx = of_alias_get_id(np, "gpio"); uint32_t ngpio; char **names; + alias_idx = of_alias_get_id(np, "gpio"); + if (alias_idx < 0) + return alias_idx; + BUG_ON(alias_idx >= ARRAY_SIZE(gpio_chips)); if (gpio_chips[alias_idx]) return dev_err_probe(dev, -EBUSY, "%d slot is occupied.\n", alias_idx); diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 82f0cc43bbf4f4..0eb816395dc64d 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -44,7 +44,6 @@ * @pctrl: pinctrl handle. * @chip: gpiochip handle. * @desc: pin controller descriptor - * @restart_nb: restart notifier block. * @irq: parent irq for the TLMM irq_chip. * @intr_target_use_scm: route irq to application cpu using scm calls * @lock: Spinlock to protect register resources as well @@ -64,7 +63,6 @@ struct msm_pinctrl { struct pinctrl_dev *pctrl; struct gpio_chip chip; struct pinctrl_desc desc; - struct notifier_block restart_nb; int irq; @@ -1471,10 +1469,9 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) return 0; } -static int msm_ps_hold_restart(struct notifier_block *nb, unsigned long action, - void *data) +static int msm_ps_hold_restart(struct sys_off_data *data) { - struct msm_pinctrl *pctrl = container_of(nb, struct msm_pinctrl, restart_nb); + struct msm_pinctrl *pctrl = data->cb_data; writel(0, pctrl->regs[0] + PS_HOLD_OFFSET); mdelay(1000); @@ -1485,7 +1482,11 @@ static struct msm_pinctrl *poweroff_pctrl; static void msm_ps_hold_poweroff(void) { - msm_ps_hold_restart(&poweroff_pctrl->restart_nb, 0, NULL); + struct sys_off_data data = { + .cb_data = poweroff_pctrl, + }; + + msm_ps_hold_restart(&data); } static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl) @@ -1495,9 +1496,11 @@ static void msm_pinctrl_setup_pm_reset(struct msm_pinctrl *pctrl) for (i = 0; i < pctrl->soc->nfunctions; i++) if (!strcmp(func[i].name, "ps_hold")) { - pctrl->restart_nb.notifier_call = msm_ps_hold_restart; - pctrl->restart_nb.priority = 128; - if (register_restart_handler(&pctrl->restart_nb)) + if (devm_register_sys_off_handler(pctrl->dev, + SYS_OFF_MODE_RESTART, + 128, + msm_ps_hold_restart, + pctrl)) dev_err(pctrl->dev, "failed to setup restart handler.\n"); poweroff_pctrl = pctrl; @@ -1599,8 +1602,6 @@ void msm_pinctrl_remove(struct platform_device *pdev) struct msm_pinctrl *pctrl = platform_get_drvdata(pdev); gpiochip_remove(&pctrl->chip); - - unregister_restart_handler(&pctrl->restart_nb); } EXPORT_SYMBOL(msm_pinctrl_remove); diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c index ba699eac9ee8b2..20e9bccda4cd6d 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c +++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c @@ -165,6 +165,10 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(62, "GPIO_62"), PINCTRL_PIN(63, "GPIO_63"), PINCTRL_PIN(64, "GPIO_64"), + PINCTRL_PIN(65, "GPIO_65"), + PINCTRL_PIN(66, "GPIO_66"), + PINCTRL_PIN(67, "GPIO_67"), + PINCTRL_PIN(68, "GPIO_68"), PINCTRL_PIN(69, "GPIO_69"), PINCTRL_PIN(70, "GPIO_70"), PINCTRL_PIN(71, "GPIO_71"), @@ -179,12 +183,17 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(80, "GPIO_80"), PINCTRL_PIN(81, "GPIO_81"), PINCTRL_PIN(82, "GPIO_82"), + PINCTRL_PIN(83, "GPIO_83"), + PINCTRL_PIN(84, "GPIO_84"), + PINCTRL_PIN(85, "GPIO_85"), PINCTRL_PIN(86, "GPIO_86"), PINCTRL_PIN(87, "GPIO_87"), PINCTRL_PIN(88, "GPIO_88"), PINCTRL_PIN(89, "GPIO_89"), PINCTRL_PIN(90, "GPIO_90"), PINCTRL_PIN(91, "GPIO_91"), + PINCTRL_PIN(92, "GPIO_92"), + PINCTRL_PIN(93, "GPIO_93"), PINCTRL_PIN(94, "GPIO_94"), PINCTRL_PIN(95, "GPIO_95"), PINCTRL_PIN(96, "GPIO_96"), diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 23015b055f6a92..17ca743c2210fc 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1062,7 +1062,7 @@ static const struct msm_pinctrl_soc_data qcs615_tlmm = { .nfunctions = ARRAY_SIZE(qcs615_functions), .groups = qcs615_groups, .ngroups = ARRAY_SIZE(qcs615_groups), - .ngpios = 123, + .ngpios = 124, .tiles = qcs615_tiles, .ntiles = ARRAY_SIZE(qcs615_tiles), .wakeirq_map = qcs615_pdc_map, diff --git a/drivers/pinctrl/qcom/pinctrl-qcs8300.c b/drivers/pinctrl/qcom/pinctrl-qcs8300.c index ba6de944a859a0..5f5f7c4ac644c4 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs8300.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs8300.c @@ -1204,7 +1204,7 @@ static const struct msm_pinctrl_soc_data qcs8300_pinctrl = { .nfunctions = ARRAY_SIZE(qcs8300_functions), .groups = qcs8300_groups, .ngroups = ARRAY_SIZE(qcs8300_groups), - .ngpios = 133, + .ngpios = 134, .wakeirq_map = qcs8300_pdc_map, .nwakeirq_map = ARRAY_SIZE(qcs8300_pdc_map), .egpio_func = 11, diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index 1af11cd95fb0e6..b94fb4ee0ec380 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -46,7 +46,9 @@ .out_bit = 1, \ .intr_enable_bit = 0, \ .intr_status_bit = 0, \ - .intr_target_bit = 5, \ + .intr_wakeup_present_bit = 6, \ + .intr_wakeup_enable_bit = 7, \ + .intr_target_bit = 8, \ .intr_target_kpss_val = 3, \ .intr_raw_status_bit = 4, \ .intr_polarity_bit = 1, \ diff --git a/drivers/pinctrl/qcom/tlmm-test.c b/drivers/pinctrl/qcom/tlmm-test.c index fd02bf3a76cbcc..7b99e89e0f6703 100644 --- a/drivers/pinctrl/qcom/tlmm-test.c +++ b/drivers/pinctrl/qcom/tlmm-test.c @@ -547,6 +547,7 @@ static int tlmm_test_init(struct kunit *test) struct tlmm_test_priv *priv; priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv); atomic_set(&priv->intr_count, 0); atomic_set(&priv->thread_count, 0); diff --git a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c index dd07720e32cc09..9fd894729a7b87 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos-arm64.c @@ -1419,8 +1419,8 @@ static const struct samsung_pin_ctrl exynosautov920_pin_ctrl[] = { .pin_banks = exynosautov920_pin_banks0, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks0), .eint_wkup_init = exynos_eint_wkup_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, .retention_data = &exynosautov920_retention_data, }, { /* pin-controller instance 1 AUD data */ @@ -1431,43 +1431,43 @@ static const struct samsung_pin_ctrl exynosautov920_pin_ctrl[] = { .pin_banks = exynosautov920_pin_banks2, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks2), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 3 HSI1 data */ .pin_banks = exynosautov920_pin_banks3, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks3), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 4 HSI2 data */ .pin_banks = exynosautov920_pin_banks4, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks4), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 5 HSI2UFS data */ .pin_banks = exynosautov920_pin_banks5, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks5), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 6 PERIC0 data */ .pin_banks = exynosautov920_pin_banks6, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks6), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, { /* pin-controller instance 7 PERIC1 data */ .pin_banks = exynosautov920_pin_banks7, .nr_banks = ARRAY_SIZE(exynosautov920_pin_banks7), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = exynosautov920_pinctrl_suspend, + .resume = exynosautov920_pinctrl_resume, }, }; @@ -1762,15 +1762,15 @@ static const struct samsung_pin_ctrl gs101_pin_ctrl[] __initconst = { .pin_banks = gs101_pin_alive, .nr_banks = ARRAY_SIZE(gs101_pin_alive), .eint_wkup_init = exynos_eint_wkup_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (FAR_ALIVE) */ .pin_banks = gs101_pin_far_alive, .nr_banks = ARRAY_SIZE(gs101_pin_far_alive), .eint_wkup_init = exynos_eint_wkup_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (GSACORE) */ .pin_banks = gs101_pin_gsacore, @@ -1784,29 +1784,29 @@ static const struct samsung_pin_ctrl gs101_pin_ctrl[] __initconst = { .pin_banks = gs101_pin_peric0, .nr_banks = ARRAY_SIZE(gs101_pin_peric0), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (PERIC1) */ .pin_banks = gs101_pin_peric1, .nr_banks = ARRAY_SIZE(gs101_pin_peric1), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (HSI1) */ .pin_banks = gs101_pin_hsi1, .nr_banks = ARRAY_SIZE(gs101_pin_hsi1), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, { /* pin banks of gs101 pin-controller (HSI2) */ .pin_banks = gs101_pin_hsi2, .nr_banks = ARRAY_SIZE(gs101_pin_hsi2), .eint_gpio_init = exynos_eint_gpio_init, - .suspend = exynos_pinctrl_suspend, - .resume = exynos_pinctrl_resume, + .suspend = gs101_pinctrl_suspend, + .resume = gs101_pinctrl_resume, }, }; diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 42093bae8bb793..0879684338c772 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -762,153 +762,187 @@ __init int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d) return 0; } -static void exynos_pinctrl_suspend_bank( - struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +static void exynos_set_wakeup(struct samsung_pin_bank *bank) { - struct exynos_eint_gpio_save *save = bank->soc_priv; - const void __iomem *regs = bank->eint_base; + struct exynos_irq_chip *irq_chip; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for saving state\n"); - return; + if (bank->irq_chip) { + irq_chip = bank->irq_chip; + irq_chip->set_eint_wakeup_mask(bank->drvdata, irq_chip); } - - save->eint_con = readl(regs + EXYNOS_GPIO_ECON_OFFSET - + bank->eint_offset); - save->eint_fltcon0 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset); - save->eint_fltcon1 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset + 4); - save->eint_mask = readl(regs + bank->irq_chip->eint_mask - + bank->eint_offset); - - clk_disable(bank->drvdata->pclk); - - pr_debug("%s: save con %#010x\n", bank->name, save->eint_con); - pr_debug("%s: save fltcon0 %#010x\n", bank->name, save->eint_fltcon0); - pr_debug("%s: save fltcon1 %#010x\n", bank->name, save->eint_fltcon1); - pr_debug("%s: save mask %#010x\n", bank->name, save->eint_mask); } -static void exynosauto_pinctrl_suspend_bank(struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +void exynos_pinctrl_suspend(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; const void __iomem *regs = bank->eint_base; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for saving state\n"); - return; + if (bank->eint_type == EINT_TYPE_GPIO) { + save->eint_con = readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + save->eint_fltcon0 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset); + save->eint_fltcon1 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset + 4); + save->eint_mask = readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset); + + pr_debug("%s: save con %#010x\n", + bank->name, save->eint_con); + pr_debug("%s: save fltcon0 %#010x\n", + bank->name, save->eint_fltcon0); + pr_debug("%s: save fltcon1 %#010x\n", + bank->name, save->eint_fltcon1); + pr_debug("%s: save mask %#010x\n", + bank->name, save->eint_mask); + } else if (bank->eint_type == EINT_TYPE_WKUP) { + exynos_set_wakeup(bank); } - - save->eint_con = readl(regs + bank->pctl_offset + bank->eint_con_offset); - save->eint_mask = readl(regs + bank->pctl_offset + bank->eint_mask_offset); - - clk_disable(bank->drvdata->pclk); - - pr_debug("%s: save con %#010x\n", bank->name, save->eint_con); - pr_debug("%s: save mask %#010x\n", bank->name, save->eint_mask); } -void exynos_pinctrl_suspend(struct samsung_pinctrl_drv_data *drvdata) +void gs101_pinctrl_suspend(struct samsung_pin_bank *bank) { - struct samsung_pin_bank *bank = drvdata->pin_banks; - struct exynos_irq_chip *irq_chip = NULL; - int i; + struct exynos_eint_gpio_save *save = bank->soc_priv; + const void __iomem *regs = bank->eint_base; - for (i = 0; i < drvdata->nr_banks; ++i, ++bank) { - if (bank->eint_type == EINT_TYPE_GPIO) { - if (bank->eint_con_offset) - exynosauto_pinctrl_suspend_bank(drvdata, bank); - else - exynos_pinctrl_suspend_bank(drvdata, bank); - } - else if (bank->eint_type == EINT_TYPE_WKUP) { - if (!irq_chip) { - irq_chip = bank->irq_chip; - irq_chip->set_eint_wakeup_mask(drvdata, - irq_chip); - } - } + if (bank->eint_type == EINT_TYPE_GPIO) { + save->eint_con = readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + + save->eint_fltcon0 = readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + bank->eint_fltcon_offset); + + /* fltcon1 register only exists for pins 4-7 */ + if (bank->nr_pins > 4) + save->eint_fltcon1 = readl(regs + + EXYNOS_GPIO_EFLTCON_OFFSET + + bank->eint_fltcon_offset + 4); + + save->eint_mask = readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset); + + pr_debug("%s: save con %#010x\n", + bank->name, save->eint_con); + pr_debug("%s: save fltcon0 %#010x\n", + bank->name, save->eint_fltcon0); + if (bank->nr_pins > 4) + pr_debug("%s: save fltcon1 %#010x\n", + bank->name, save->eint_fltcon1); + pr_debug("%s: save mask %#010x\n", + bank->name, save->eint_mask); + } else if (bank->eint_type == EINT_TYPE_WKUP) { + exynos_set_wakeup(bank); } } -static void exynos_pinctrl_resume_bank( - struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +void exynosautov920_pinctrl_suspend(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; - void __iomem *regs = bank->eint_base; + const void __iomem *regs = bank->eint_base; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for restoring state\n"); - return; + if (bank->eint_type == EINT_TYPE_GPIO) { + save->eint_con = readl(regs + bank->pctl_offset + + bank->eint_con_offset); + save->eint_mask = readl(regs + bank->pctl_offset + + bank->eint_mask_offset); + pr_debug("%s: save con %#010x\n", + bank->name, save->eint_con); + pr_debug("%s: save mask %#010x\n", + bank->name, save->eint_mask); + } else if (bank->eint_type == EINT_TYPE_WKUP) { + exynos_set_wakeup(bank); } +} - pr_debug("%s: con %#010x => %#010x\n", bank->name, - readl(regs + EXYNOS_GPIO_ECON_OFFSET - + bank->eint_offset), save->eint_con); - pr_debug("%s: fltcon0 %#010x => %#010x\n", bank->name, - readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset), save->eint_fltcon0); - pr_debug("%s: fltcon1 %#010x => %#010x\n", bank->name, - readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset + 4), save->eint_fltcon1); - pr_debug("%s: mask %#010x => %#010x\n", bank->name, - readl(regs + bank->irq_chip->eint_mask - + bank->eint_offset), save->eint_mask); - - writel(save->eint_con, regs + EXYNOS_GPIO_ECON_OFFSET - + bank->eint_offset); - writel(save->eint_fltcon0, regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset); - writel(save->eint_fltcon1, regs + EXYNOS_GPIO_EFLTCON_OFFSET - + 2 * bank->eint_offset + 4); - writel(save->eint_mask, regs + bank->irq_chip->eint_mask - + bank->eint_offset); +void gs101_pinctrl_resume(struct samsung_pin_bank *bank) +{ + struct exynos_eint_gpio_save *save = bank->soc_priv; - clk_disable(bank->drvdata->pclk); + void __iomem *regs = bank->eint_base; + void __iomem *eint_fltcfg0 = regs + EXYNOS_GPIO_EFLTCON_OFFSET + + bank->eint_fltcon_offset; + + if (bank->eint_type == EINT_TYPE_GPIO) { + pr_debug("%s: con %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset), save->eint_con); + + pr_debug("%s: fltcon0 %#010x => %#010x\n", bank->name, + readl(eint_fltcfg0), save->eint_fltcon0); + + /* fltcon1 register only exists for pins 4-7 */ + if (bank->nr_pins > 4) + pr_debug("%s: fltcon1 %#010x => %#010x\n", bank->name, + readl(eint_fltcfg0 + 4), save->eint_fltcon1); + + pr_debug("%s: mask %#010x => %#010x\n", bank->name, + readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset), save->eint_mask); + + writel(save->eint_con, regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + writel(save->eint_fltcon0, eint_fltcfg0); + + if (bank->nr_pins > 4) + writel(save->eint_fltcon1, eint_fltcfg0 + 4); + writel(save->eint_mask, regs + bank->irq_chip->eint_mask + + bank->eint_offset); + } } -static void exynosauto_pinctrl_resume_bank(struct samsung_pinctrl_drv_data *drvdata, - struct samsung_pin_bank *bank) +void exynos_pinctrl_resume(struct samsung_pin_bank *bank) { struct exynos_eint_gpio_save *save = bank->soc_priv; void __iomem *regs = bank->eint_base; - if (clk_enable(bank->drvdata->pclk)) { - dev_err(bank->gpio_chip.parent, - "unable to enable clock for restoring state\n"); - return; + if (bank->eint_type == EINT_TYPE_GPIO) { + pr_debug("%s: con %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset), save->eint_con); + pr_debug("%s: fltcon0 %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset), save->eint_fltcon0); + pr_debug("%s: fltcon1 %#010x => %#010x\n", bank->name, + readl(regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset + 4), + save->eint_fltcon1); + pr_debug("%s: mask %#010x => %#010x\n", bank->name, + readl(regs + bank->irq_chip->eint_mask + + bank->eint_offset), save->eint_mask); + + writel(save->eint_con, regs + EXYNOS_GPIO_ECON_OFFSET + + bank->eint_offset); + writel(save->eint_fltcon0, regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset); + writel(save->eint_fltcon1, regs + EXYNOS_GPIO_EFLTCON_OFFSET + + 2 * bank->eint_offset + 4); + writel(save->eint_mask, regs + bank->irq_chip->eint_mask + + bank->eint_offset); } - - pr_debug("%s: con %#010x => %#010x\n", bank->name, - readl(regs + bank->pctl_offset + bank->eint_con_offset), save->eint_con); - pr_debug("%s: mask %#010x => %#010x\n", bank->name, - readl(regs + bank->pctl_offset + bank->eint_mask_offset), save->eint_mask); - - writel(save->eint_con, regs + bank->pctl_offset + bank->eint_con_offset); - writel(save->eint_mask, regs + bank->pctl_offset + bank->eint_mask_offset); - - clk_disable(bank->drvdata->pclk); } -void exynos_pinctrl_resume(struct samsung_pinctrl_drv_data *drvdata) +void exynosautov920_pinctrl_resume(struct samsung_pin_bank *bank) { - struct samsung_pin_bank *bank = drvdata->pin_banks; - int i; + struct exynos_eint_gpio_save *save = bank->soc_priv; + void __iomem *regs = bank->eint_base; - for (i = 0; i < drvdata->nr_banks; ++i, ++bank) - if (bank->eint_type == EINT_TYPE_GPIO) { - if (bank->eint_con_offset) - exynosauto_pinctrl_resume_bank(drvdata, bank); - else - exynos_pinctrl_resume_bank(drvdata, bank); - } + if (bank->eint_type == EINT_TYPE_GPIO) { + /* exynosautov920 has eint_con_offset for all but one bank */ + if (!bank->eint_con_offset) + exynos_pinctrl_resume(bank); + + pr_debug("%s: con %#010x => %#010x\n", bank->name, + readl(regs + bank->pctl_offset + bank->eint_con_offset), + save->eint_con); + pr_debug("%s: mask %#010x => %#010x\n", bank->name, + readl(regs + bank->pctl_offset + + bank->eint_mask_offset), save->eint_mask); + + writel(save->eint_con, + regs + bank->pctl_offset + bank->eint_con_offset); + writel(save->eint_mask, + regs + bank->pctl_offset + bank->eint_mask_offset); + } } static void exynos_retention_enable(struct samsung_pinctrl_drv_data *drvdata) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h index b483270ddc53c0..2bee52b61b9317 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.h +++ b/drivers/pinctrl/samsung/pinctrl-exynos.h @@ -240,8 +240,12 @@ struct exynos_muxed_weint_data { int exynos_eint_gpio_init(struct samsung_pinctrl_drv_data *d); int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d); -void exynos_pinctrl_suspend(struct samsung_pinctrl_drv_data *drvdata); -void exynos_pinctrl_resume(struct samsung_pinctrl_drv_data *drvdata); +void exynosautov920_pinctrl_resume(struct samsung_pin_bank *bank); +void exynosautov920_pinctrl_suspend(struct samsung_pin_bank *bank); +void exynos_pinctrl_suspend(struct samsung_pin_bank *bank); +void exynos_pinctrl_resume(struct samsung_pin_bank *bank); +void gs101_pinctrl_suspend(struct samsung_pin_bank *bank); +void gs101_pinctrl_resume(struct samsung_pin_bank *bank); struct samsung_retention_ctrl * exynos_retention_init(struct samsung_pinctrl_drv_data *drvdata, const struct samsung_retention_data *data); diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 2896eb2de2c098..ef557217e173af 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -1333,6 +1333,7 @@ static int samsung_pinctrl_probe(struct platform_device *pdev) static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) { struct samsung_pinctrl_drv_data *drvdata = dev_get_drvdata(dev); + struct samsung_pin_bank *bank; int i; i = clk_enable(drvdata->pclk); @@ -1343,7 +1344,7 @@ static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) } for (i = 0; i < drvdata->nr_banks; i++) { - struct samsung_pin_bank *bank = &drvdata->pin_banks[i]; + bank = &drvdata->pin_banks[i]; const void __iomem *reg = bank->pctl_base + bank->pctl_offset; const u8 *offs = bank->type->reg_offset; const u8 *widths = bank->type->fld_width; @@ -1371,10 +1372,14 @@ static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) } } + for (i = 0; i < drvdata->nr_banks; i++) { + bank = &drvdata->pin_banks[i]; + if (drvdata->suspend) + drvdata->suspend(bank); + } + clk_disable(drvdata->pclk); - if (drvdata->suspend) - drvdata->suspend(drvdata); if (drvdata->retention_ctrl && drvdata->retention_ctrl->enable) drvdata->retention_ctrl->enable(drvdata); @@ -1392,6 +1397,7 @@ static int __maybe_unused samsung_pinctrl_suspend(struct device *dev) static int __maybe_unused samsung_pinctrl_resume(struct device *dev) { struct samsung_pinctrl_drv_data *drvdata = dev_get_drvdata(dev); + struct samsung_pin_bank *bank; int ret; int i; @@ -1406,11 +1412,14 @@ static int __maybe_unused samsung_pinctrl_resume(struct device *dev) return ret; } - if (drvdata->resume) - drvdata->resume(drvdata); + for (i = 0; i < drvdata->nr_banks; i++) { + bank = &drvdata->pin_banks[i]; + if (drvdata->resume) + drvdata->resume(bank); + } for (i = 0; i < drvdata->nr_banks; i++) { - struct samsung_pin_bank *bank = &drvdata->pin_banks[i]; + bank = &drvdata->pin_banks[i]; void __iomem *reg = bank->pctl_base + bank->pctl_offset; const u8 *offs = bank->type->reg_offset; const u8 *widths = bank->type->fld_width; diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h index 3cf758df7d6912..fcc57c244d167d 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.h +++ b/drivers/pinctrl/samsung/pinctrl-samsung.h @@ -285,8 +285,8 @@ struct samsung_pin_ctrl { int (*eint_gpio_init)(struct samsung_pinctrl_drv_data *); int (*eint_wkup_init)(struct samsung_pinctrl_drv_data *); void (*pud_value_init)(struct samsung_pinctrl_drv_data *drvdata); - void (*suspend)(struct samsung_pinctrl_drv_data *); - void (*resume)(struct samsung_pinctrl_drv_data *); + void (*suspend)(struct samsung_pin_bank *bank); + void (*resume)(struct samsung_pin_bank *bank); }; /** @@ -335,8 +335,8 @@ struct samsung_pinctrl_drv_data { struct samsung_retention_ctrl *retention_ctrl; - void (*suspend)(struct samsung_pinctrl_drv_data *); - void (*resume)(struct samsung_pinctrl_drv_data *); + void (*suspend)(struct samsung_pin_bank *bank); + void (*resume)(struct samsung_pin_bank *bank); }; /** diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c index 1833078f68776c..4e34b0cd3b73aa 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c @@ -143,7 +143,7 @@ static struct sunxi_desc_pin *init_pins_table(struct device *dev, */ static int prepare_function_table(struct device *dev, struct device_node *pnode, struct sunxi_desc_pin *pins, int npins, - const u8 *irq_bank_muxes) + unsigned pin_base, const u8 *irq_bank_muxes) { struct device_node *node; struct property *prop; @@ -166,7 +166,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, */ for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; if (irq_bank_muxes[bank]) { pin->variant++; @@ -211,7 +211,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, last_bank = 0; for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; int lastfunc = pin->variant + 1; int irq_mux = irq_bank_muxes[bank]; @@ -353,7 +353,7 @@ int sunxi_pinctrl_dt_table_init(struct platform_device *pdev, return PTR_ERR(pins); ret = prepare_function_table(&pdev->dev, pnode, pins, desc->npins, - irq_bank_muxes); + desc->pin_base, irq_bank_muxes); if (ret) return ret; diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c index d2228720991ffe..7678e3d05fd36f 100644 --- a/drivers/platform/chrome/cros_ec_typec.c +++ b/drivers/platform/chrome/cros_ec_typec.c @@ -22,8 +22,10 @@ #define DRV_NAME "cros-ec-typec" -#define DP_PORT_VDO (DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_C) | BIT(DP_PIN_ASSIGN_D)) | \ - DP_CAP_DFP_D | DP_CAP_RECEPTACLE) +#define DP_PORT_VDO (DP_CAP_DFP_D | DP_CAP_RECEPTACLE | \ + DP_CONF_SET_PIN_ASSIGN(BIT(DP_PIN_ASSIGN_C) | \ + BIT(DP_PIN_ASSIGN_D) | \ + BIT(DP_PIN_ASSIGN_E))) static void cros_typec_role_switch_quirk(struct fwnode_handle *fwnode) { diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c index b95dcb8d483ce4..c18a5b96de5ce7 100644 --- a/drivers/platform/mellanox/mlxbf-bootctl.c +++ b/drivers/platform/mellanox/mlxbf-bootctl.c @@ -333,9 +333,9 @@ static ssize_t secure_boot_fuse_state_show(struct device *dev, else status = valid ? "Invalid" : "Free"; } - buf_len += sysfs_emit(buf + buf_len, "%d:%s ", key, status); + buf_len += sysfs_emit_at(buf, buf_len, "%d:%s ", key, status); } - buf_len += sysfs_emit(buf + buf_len, "\n"); + buf_len += sysfs_emit_at(buf, buf_len, "\n"); return buf_len; } diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c index c1eccb3c80c5c9..eaae044e4f824c 100644 --- a/drivers/platform/x86/amd/hsmp/acpi.c +++ b/drivers/platform/x86/amd/hsmp/acpi.c @@ -27,9 +27,8 @@ #include "hsmp.h" -#define DRIVER_NAME "amd_hsmp" +#define DRIVER_NAME "hsmp_acpi" #define DRIVER_VERSION "2.3" -#define ACPI_HSMP_DEVICE_HID "AMDI0097" /* These are the strings specified in ACPI table */ #define MSG_IDOFF_STR "MsgIdOffset" diff --git a/drivers/platform/x86/amd/hsmp/hsmp.h b/drivers/platform/x86/amd/hsmp/hsmp.h index af8b21f821d668..d58d4f0c20d552 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.h +++ b/drivers/platform/x86/amd/hsmp/hsmp.h @@ -23,6 +23,7 @@ #define HSMP_CDEV_NAME "hsmp_cdev" #define HSMP_DEVNODE_NAME "hsmp" +#define ACPI_HSMP_DEVICE_HID "AMDI0097" struct hsmp_mbaddr_info { u32 base_addr; diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c index b9782a078dbd2f..81931e808bbc81 100644 --- a/drivers/platform/x86/amd/hsmp/plat.c +++ b/drivers/platform/x86/amd/hsmp/plat.c @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -266,7 +267,7 @@ static bool legacy_hsmp_support(void) } case 0x1A: switch (boot_cpu_data.x86_model) { - case 0x00 ... 0x1F: + case 0x00 ... 0x0F: return true; default: return false; @@ -288,6 +289,9 @@ static int __init hsmp_plt_init(void) return ret; } + if (acpi_dev_present(ACPI_HSMP_DEVICE_HID, NULL, -1)) + return -ENODEV; + hsmp_pdev = get_hsmp_pdev(); if (!hsmp_pdev) return -ENOMEM; diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index b4f49720c87f62..2e3f6fc67c568d 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -217,6 +217,13 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_BIOS_VERSION, "03.05"), } }, + { + .ident = "MECHREVO Wujie 14X (GX4HRXL)", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"), + } + }, {} }; diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index d789d6cab79486..0329fafe14ebcd 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -644,10 +644,9 @@ static void amd_pmc_s2idle_check(void) struct smu_metrics table; int rc; - /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ - if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && - table.s0i3_last_entry_status) - usleep_range(10000, 20000); + /* Avoid triggering OVP */ + if (!get_metrics_table(pdev, &table) && table.s0i3_last_entry_status) + msleep(2500); /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); diff --git a/drivers/platform/x86/amd/pmf/auto-mode.c b/drivers/platform/x86/amd/pmf/auto-mode.c index 02ff68be10d012..a184922bba8d65 100644 --- a/drivers/platform/x86/amd/pmf/auto-mode.c +++ b/drivers/platform/x86/amd/pmf/auto-mode.c @@ -120,9 +120,9 @@ static void amd_pmf_set_automode(struct amd_pmf_dev *dev, int idx, amd_pmf_send_cmd(dev, SET_SPPT_APU_ONLY, false, pwr_ctrl->sppt_apu_only, NULL); amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, pwr_ctrl->stt_min, NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, - pwr_ctrl->stt_skin_temp[STT_TEMP_APU], NULL); + fixp_q88_fromint(pwr_ctrl->stt_skin_temp[STT_TEMP_APU]), NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, - pwr_ctrl->stt_skin_temp[STT_TEMP_HS2], NULL); + fixp_q88_fromint(pwr_ctrl->stt_skin_temp[STT_TEMP_HS2]), NULL); if (is_apmf_func_supported(dev, APMF_FUNC_SET_FAN_IDX)) apmf_update_fan_idx(dev, config_store.mode_set[idx].fan_control.manual, diff --git a/drivers/platform/x86/amd/pmf/cnqf.c b/drivers/platform/x86/amd/pmf/cnqf.c index bc8899e15c914b..207a0b33d8d368 100644 --- a/drivers/platform/x86/amd/pmf/cnqf.c +++ b/drivers/platform/x86/amd/pmf/cnqf.c @@ -81,10 +81,10 @@ static int amd_pmf_set_cnqf(struct amd_pmf_dev *dev, int src, int idx, amd_pmf_send_cmd(dev, SET_SPPT, false, pc->sppt, NULL); amd_pmf_send_cmd(dev, SET_SPPT_APU_ONLY, false, pc->sppt_apu_only, NULL); amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, pc->stt_min, NULL); - amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, pc->stt_skin_temp[STT_TEMP_APU], - NULL); - amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, pc->stt_skin_temp[STT_TEMP_HS2], - NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, + fixp_q88_fromint(pc->stt_skin_temp[STT_TEMP_APU]), NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, + fixp_q88_fromint(pc->stt_skin_temp[STT_TEMP_HS2]), NULL); if (is_apmf_func_supported(dev, APMF_FUNC_SET_FAN_IDX)) apmf_update_fan_idx(dev, diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index a2cb2d5544f5b3..96821101ec773c 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -176,6 +176,20 @@ static void __maybe_unused amd_pmf_dump_registers(struct amd_pmf_dev *dev) dev_dbg(dev->dev, "AMD_PMF_REGISTER_MESSAGE:%x\n", value); } +/** + * fixp_q88_fromint: Convert integer to Q8.8 + * @val: input value + * + * Converts an integer into binary fixed point format where 8 bits + * are used for integer and 8 bits are used for the decimal. + * + * Return: unsigned integer converted to Q8.8 format + */ +u32 fixp_q88_fromint(u32 val) +{ + return val << 8; +} + int amd_pmf_send_cmd(struct amd_pmf_dev *dev, u8 message, bool get, u32 arg, u32 *data) { int rc; diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index e6bdee68ccf347..45b60238d5277f 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -777,6 +777,7 @@ int apmf_install_handler(struct amd_pmf_dev *pmf_dev); int apmf_os_power_slider_update(struct amd_pmf_dev *dev, u8 flag); int amd_pmf_set_dram_addr(struct amd_pmf_dev *dev, bool alloc_buffer); int amd_pmf_notify_sbios_heartbeat_event_v2(struct amd_pmf_dev *dev, u8 flag); +u32 fixp_q88_fromint(u32 val); /* SPS Layer */ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf); diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index d3083383f11fbf..49e14ca94a9e77 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -198,9 +198,11 @@ static void amd_pmf_update_slider_v2(struct amd_pmf_dev *dev, int idx) amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, apts_config_store.val[idx].stt_min_limit, NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, - apts_config_store.val[idx].stt_skin_temp_limit_apu, NULL); + fixp_q88_fromint(apts_config_store.val[idx].stt_skin_temp_limit_apu), + NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, - apts_config_store.val[idx].stt_skin_temp_limit_hs2, NULL); + fixp_q88_fromint(apts_config_store.val[idx].stt_skin_temp_limit_hs2), + NULL); } void amd_pmf_update_slider(struct amd_pmf_dev *dev, bool op, int idx, @@ -217,9 +219,11 @@ void amd_pmf_update_slider(struct amd_pmf_dev *dev, bool op, int idx, amd_pmf_send_cmd(dev, SET_STT_MIN_LIMIT, false, config_store.prop[src][idx].stt_min, NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, - config_store.prop[src][idx].stt_skin_temp[STT_TEMP_APU], NULL); + fixp_q88_fromint(config_store.prop[src][idx].stt_skin_temp[STT_TEMP_APU]), + NULL); amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, - config_store.prop[src][idx].stt_skin_temp[STT_TEMP_HS2], NULL); + fixp_q88_fromint(config_store.prop[src][idx].stt_skin_temp[STT_TEMP_HS2]), + NULL); } else if (op == SLIDER_OP_GET) { amd_pmf_send_cmd(dev, GET_SPL, true, ARG_NONE, &table->prop[src][idx].spl); amd_pmf_send_cmd(dev, GET_FPPT, true, ARG_NONE, &table->prop[src][idx].fppt); diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index a1e43873a07b08..d3bd12ad036ae0 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -123,7 +123,8 @@ static void amd_pmf_apply_policies(struct amd_pmf_dev *dev, struct ta_pmf_enact_ case PMF_POLICY_STT_SKINTEMP_APU: if (dev->prev_data->stt_skintemp_apu != val) { - amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, val, NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, + fixp_q88_fromint(val), NULL); dev_dbg(dev->dev, "update STT_SKINTEMP_APU: %u\n", val); dev->prev_data->stt_skintemp_apu = val; } @@ -131,7 +132,8 @@ static void amd_pmf_apply_policies(struct amd_pmf_dev *dev, struct ta_pmf_enact_ case PMF_POLICY_STT_SKINTEMP_HS2: if (dev->prev_data->stt_skintemp_hs2 != val) { - amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, val, NULL); + amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, + fixp_q88_fromint(val), NULL); dev_dbg(dev->dev, "update STT_SKINTEMP_HS2: %u\n", val); dev->prev_data->stt_skintemp_hs2 = val; } @@ -332,6 +334,11 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) return 0; } +static inline bool amd_pmf_pb_valid(struct amd_pmf_dev *dev) +{ + return memchr_inv(dev->policy_buf, 0xff, dev->policy_sz); +} + #ifdef CONFIG_AMD_PMF_DEBUG static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev) { @@ -359,12 +366,22 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, dev->policy_buf = new_policy_buf; dev->policy_sz = length; + if (!amd_pmf_pb_valid(dev)) { + ret = -EINVAL; + goto cleanup; + } + amd_pmf_hex_dump_pb(dev); ret = amd_pmf_start_policy_engine(dev); if (ret < 0) - return ret; + goto cleanup; return length; + +cleanup: + kfree(dev->policy_buf); + dev->policy_buf = NULL; + return ret; } static const struct file_operations pb_fops = { @@ -526,6 +543,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); + if (!amd_pmf_pb_valid(dev)) { + dev_info(dev->dev, "No Smart PC policy present\n"); + ret = -EINVAL; + goto err_free_policy; + } + amd_pmf_hex_dump_pb(dev); dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index d460dd194f1965..a0a411b4f2d6d8 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -426,11 +426,14 @@ static int asus_pega_lucid_set(struct asus_laptop *asus, int unit, bool enable) static int pega_acc_axis(struct asus_laptop *asus, int curr, char *method) { + unsigned long long val = (unsigned long long)curr; + acpi_status status; int i, delta; - unsigned long long val; - for (i = 0; i < PEGA_ACC_RETRIES; i++) { - acpi_evaluate_integer(asus->handle, method, NULL, &val); + for (i = 0; i < PEGA_ACC_RETRIES; i++) { + status = acpi_evaluate_integer(asus->handle, method, NULL, &val); + if (ACPI_FAILURE(status)) + continue; /* The output is noisy. From reading the ASL * dissassembly, timeout errors are returned with 1's * in the high word, and the lack of locking around diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 38ef778e8c19b9..47cc766624d7bb 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -304,6 +304,7 @@ struct asus_wmi { u32 kbd_rgb_dev; bool kbd_rgb_state_available; + bool oobe_state_available; u8 throttle_thermal_policy_mode; u32 throttle_thermal_policy_dev; @@ -1826,7 +1827,7 @@ static int asus_wmi_led_init(struct asus_wmi *asus) goto error; } - if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE)) { + if (asus->oobe_state_available) { /* * Disable OOBE state, so that e.g. the keyboard backlight * works. @@ -4723,6 +4724,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->egpu_enable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_EGPU); asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); + asus->oobe_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_OOBE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) && dmi_check_system(asus_ally_mcu_quirk); @@ -4777,7 +4779,8 @@ static int asus_wmi_add(struct platform_device *pdev) goto fail_leds; asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result); - if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) + if ((result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) == + (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) asus->driver->wlan_ctrl_by_user = 1; if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) { @@ -4970,6 +4973,13 @@ static int asus_hotk_restore(struct device *device) } if (!IS_ERR_OR_NULL(asus->kbd_led.dev)) kbd_led_update(asus); + if (asus->oobe_state_available) { + /* + * Disable OOBE state, so that e.g. the keyboard backlight + * works. + */ + asus_wmi_set_devstate(ASUS_WMI_DEVID_OOBE, 1, NULL); + } if (asus_wmi_has_fnlock_key(asus)) asus_wmi_fnlock_update(asus); diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 3d3014b5adf046..08b82c151e0710 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -61,12 +61,44 @@ static struct awcc_quirks generic_quirks = { static struct awcc_quirks empty_quirks; static const struct dmi_system_id awcc_dmi_table[] __initconst = { + { + .ident = "Alienware Area-51m R2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware Area-51m R2"), + }, + .driver_data = &generic_quirks, + }, + { + .ident = "Alienware m15 R7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m15 R7"), + }, + .driver_data = &generic_quirks, + }, + { + .ident = "Alienware m16 R1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1"), + }, + .driver_data = &g_series_quirks, + }, { .ident = "Alienware m16 R1 AMD", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"), }, + .driver_data = &g_series_quirks, + }, + { + .ident = "Alienware m16 R2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R2"), + }, .driver_data = &generic_quirks, }, { @@ -93,6 +125,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &generic_quirks, }, + { + .ident = "Alienware x15 R2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware x15 R2"), + }, + .driver_data = &generic_quirks, + }, { .ident = "Alienware x17 R2", .matches = { @@ -125,6 +165,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &g_series_quirks, }, + { + .ident = "Dell Inc. G16 7630", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell G16 7630"), + }, + .driver_data = &g_series_quirks, + }, { .ident = "Dell Inc. G3 3500", .matches = { @@ -149,6 +197,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &g_series_quirks, }, + { + .ident = "Dell Inc. G5 5505", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "G5 5505"), + }, + .driver_data = &g_series_quirks, + }, }; enum WMAX_THERMAL_INFORMATION_OPERATIONS { @@ -607,12 +663,10 @@ static int thermal_profile_probe(void *drvdata, unsigned long *choices) for (u32 i = 0; i < sys_desc[3]; i++) { ret = wmax_thermal_information(priv->wdev, WMAX_OPERATION_LIST_IDS, i + first_mode, &out_data); - - if (ret == -EIO) - return ret; - if (ret == -EBADRQC) break; + if (ret) + return ret; if (!is_wmax_thermal_code(out_data)) continue; diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c index 230e6ee966366a..d8f1bf5e58a0f4 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c @@ -45,7 +45,7 @@ static ssize_t current_password_store(struct kobject *kobj, int length; length = strlen(buf); - if (buf[length-1] == '\n') + if (length && buf[length - 1] == '\n') length--; /* firmware does verifiation of min/max password length, diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index a0eae24ca9e608..162809140f68a2 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -17,13 +17,13 @@ /* * fujitsu-laptop.c - Fujitsu laptop support, providing access to additional * features made available on a range of Fujitsu laptops including the - * P2xxx/P5xxx/S6xxx/S7xxx series. + * P2xxx/P5xxx/S2xxx/S6xxx/S7xxx series. * * This driver implements a vendor-specific backlight control interface for * Fujitsu laptops and provides support for hotkeys present on certain Fujitsu * laptops. * - * This driver has been tested on a Fujitsu Lifebook S6410, S7020 and + * This driver has been tested on a Fujitsu Lifebook S2110, S6410, S7020 and * P8010. It should work on most P-series and S-series Lifebooks, but * YMMV. * @@ -107,7 +107,11 @@ #define KEY2_CODE 0x411 #define KEY3_CODE 0x412 #define KEY4_CODE 0x413 -#define KEY5_CODE 0x420 +#define KEY5_CODE 0x414 +#define KEY6_CODE 0x415 +#define KEY7_CODE 0x416 +#define KEY8_CODE 0x417 +#define KEY9_CODE 0x420 /* Hotkey ringbuffer limits */ #define MAX_HOTKEY_RINGBUFFER_SIZE 100 @@ -560,7 +564,7 @@ static const struct key_entry keymap_default[] = { { KE_KEY, KEY2_CODE, { KEY_PROG2 } }, { KE_KEY, KEY3_CODE, { KEY_PROG3 } }, { KE_KEY, KEY4_CODE, { KEY_PROG4 } }, - { KE_KEY, KEY5_CODE, { KEY_RFKILL } }, + { KE_KEY, KEY9_CODE, { KEY_RFKILL } }, /* Soft keys read from status flags */ { KE_KEY, FLAG_RFKILL, { KEY_RFKILL } }, { KE_KEY, FLAG_TOUCHPAD_TOGGLE, { KEY_TOUCHPAD_TOGGLE } }, @@ -584,6 +588,18 @@ static const struct key_entry keymap_p8010[] = { { KE_END, 0 } }; +static const struct key_entry keymap_s2110[] = { + { KE_KEY, KEY1_CODE, { KEY_PROG1 } }, /* "A" */ + { KE_KEY, KEY2_CODE, { KEY_PROG2 } }, /* "B" */ + { KE_KEY, KEY3_CODE, { KEY_WWW } }, /* "Internet" */ + { KE_KEY, KEY4_CODE, { KEY_EMAIL } }, /* "E-mail" */ + { KE_KEY, KEY5_CODE, { KEY_STOPCD } }, + { KE_KEY, KEY6_CODE, { KEY_PLAYPAUSE } }, + { KE_KEY, KEY7_CODE, { KEY_PREVIOUSSONG } }, + { KE_KEY, KEY8_CODE, { KEY_NEXTSONG } }, + { KE_END, 0 } +}; + static const struct key_entry *keymap = keymap_default; static int fujitsu_laptop_dmi_keymap_override(const struct dmi_system_id *id) @@ -621,6 +637,15 @@ static const struct dmi_system_id fujitsu_laptop_dmi_table[] = { }, .driver_data = (void *)keymap_p8010 }, + { + .callback = fujitsu_laptop_dmi_keymap_override, + .ident = "Fujitsu LifeBook S2110", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK S2110"), + }, + .driver_data = (void *)keymap_s2110 + }, {} }; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 17a09b7784edb3..ede483573fe0dd 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1294,6 +1294,16 @@ static const struct key_entry ideapad_keymap[] = { /* Specific to some newer models */ { KE_KEY, 0x3e | IDEAPAD_WMI_KEY, { KEY_MICMUTE } }, { KE_KEY, 0x3f | IDEAPAD_WMI_KEY, { KEY_RFKILL } }, + /* Star- (User Assignable Key) */ + { KE_KEY, 0x44 | IDEAPAD_WMI_KEY, { KEY_PROG1 } }, + /* Eye */ + { KE_KEY, 0x45 | IDEAPAD_WMI_KEY, { KEY_PROG3 } }, + /* Performance toggle also Fn+Q, handled inside ideapad_wmi_notify() */ + { KE_KEY, 0x3d | IDEAPAD_WMI_KEY, { KEY_PROG4 } }, + /* shift + prtsc */ + { KE_KEY, 0x2d | IDEAPAD_WMI_KEY, { KEY_CUT } }, + { KE_KEY, 0x29 | IDEAPAD_WMI_KEY, { KEY_TOUCHPAD_TOGGLE } }, + { KE_KEY, 0x2a | IDEAPAD_WMI_KEY, { KEY_ROOT_MENU } }, { KE_END }, }; @@ -2080,6 +2090,12 @@ static void ideapad_wmi_notify(struct wmi_device *wdev, union acpi_object *data) dev_dbg(&wdev->dev, "WMI fn-key event: 0x%llx\n", data->integer.value); + /* performance button triggered by 0x3d */ + if (data->integer.value == 0x3d && priv->dytc) { + platform_profile_cycle(); + break; + } + /* 0x02 FnLock, 0x03 Esc */ if (data->integer.value == 0x02 || data->integer.value == 0x03) ideapad_fn_lock_led_notify(priv, data->integer.value == 0x02); diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 88a1a9ff2f3443..0b5e43444ed603 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -44,16 +44,17 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alex Hung"); static const struct acpi_device_id intel_hid_ids[] = { - {"INT33D5", 0}, - {"INTC1051", 0}, - {"INTC1054", 0}, - {"INTC1070", 0}, - {"INTC1076", 0}, - {"INTC1077", 0}, - {"INTC1078", 0}, - {"INTC107B", 0}, - {"INTC10CB", 0}, - {"", 0}, + { "INT33D5" }, + { "INTC1051" }, + { "INTC1054" }, + { "INTC1070" }, + { "INTC1076" }, + { "INTC1077" }, + { "INTC1078" }, + { "INTC107B" }, + { "INTC10CB" }, + { "INTC10CC" }, + { } }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); diff --git a/drivers/platform/x86/intel/pmc/arl.c b/drivers/platform/x86/intel/pmc/arl.c index 320993bd6d31de..f9c48738b853b4 100644 --- a/drivers/platform/x86/intel/pmc/arl.c +++ b/drivers/platform/x86/intel/pmc/arl.c @@ -681,6 +681,7 @@ static struct pmc_info arl_pmc_info_list[] = { #define ARL_NPU_PCI_DEV 0xad1d #define ARL_GNA_PCI_DEV 0xae4c +#define ARL_H_NPU_PCI_DEV 0x7d1d #define ARL_H_GNA_PCI_DEV 0x774c /* * Set power state of select devices that do not have drivers to D3 @@ -694,7 +695,7 @@ static void arl_d3_fixup(void) static void arl_h_d3_fixup(void) { - pmc_core_set_device_d3(ARL_NPU_PCI_DEV); + pmc_core_set_device_d3(ARL_H_NPU_PCI_DEV); pmc_core_set_device_d3(ARL_H_GNA_PCI_DEV); } diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index 40bbf8e45fa4bb..bdee5d00f30b80 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -146,15 +146,13 @@ static int uncore_event_cpu_online(unsigned int cpu) { struct uncore_data *data; int target; + int ret; /* Check if there is an online cpu in the package for uncore MSR */ target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; - /* Use this CPU on this die as a control CPU */ - cpumask_set_cpu(cpu, &uncore_cpu_mask); - data = uncore_get_instance(cpu); if (!data) return 0; @@ -163,7 +161,14 @@ static int uncore_event_cpu_online(unsigned int cpu) data->die_id = topology_die_id(cpu); data->domain_id = UNCORE_DOMAIN_ID_INVALID; - return uncore_freq_add_entry(data, cpu); + ret = uncore_freq_add_entry(data, cpu); + if (ret) + return ret; + + /* Use this CPU on this die as a control CPU */ + cpumask_set_cpu(cpu, &uncore_cpu_mask); + + return 0; } static int uncore_event_cpu_offline(unsigned int cpu) diff --git a/drivers/platform/x86/msi-wmi-platform.c b/drivers/platform/x86/msi-wmi-platform.c index 9b5c7f8c79b0dd..dc5e9878cb6822 100644 --- a/drivers/platform/x86/msi-wmi-platform.c +++ b/drivers/platform/x86/msi-wmi-platform.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -76,8 +78,13 @@ enum msi_wmi_platform_method { MSI_PLATFORM_GET_WMI = 0x1d, }; -struct msi_wmi_platform_debugfs_data { +struct msi_wmi_platform_data { struct wmi_device *wdev; + struct mutex wmi_lock; /* Necessary when calling WMI methods */ +}; + +struct msi_wmi_platform_debugfs_data { + struct msi_wmi_platform_data *data; enum msi_wmi_platform_method method; struct rw_semaphore buffer_lock; /* Protects debugfs buffer */ size_t length; @@ -132,8 +139,9 @@ static int msi_wmi_platform_parse_buffer(union acpi_object *obj, u8 *output, siz return 0; } -static int msi_wmi_platform_query(struct wmi_device *wdev, enum msi_wmi_platform_method method, - u8 *input, size_t input_length, u8 *output, size_t output_length) +static int msi_wmi_platform_query(struct msi_wmi_platform_data *data, + enum msi_wmi_platform_method method, u8 *input, + size_t input_length, u8 *output, size_t output_length) { struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL }; struct acpi_buffer in = { @@ -147,9 +155,15 @@ static int msi_wmi_platform_query(struct wmi_device *wdev, enum msi_wmi_platform if (!input_length || !output_length) return -EINVAL; - status = wmidev_evaluate_method(wdev, 0x0, method, &in, &out); - if (ACPI_FAILURE(status)) - return -EIO; + /* + * The ACPI control method responsible for handling the WMI method calls + * is not thread-safe. Because of this we have to do the locking ourself. + */ + scoped_guard(mutex, &data->wmi_lock) { + status = wmidev_evaluate_method(data->wdev, 0x0, method, &in, &out); + if (ACPI_FAILURE(status)) + return -EIO; + } obj = out.pointer; if (!obj) @@ -170,22 +184,22 @@ static umode_t msi_wmi_platform_is_visible(const void *drvdata, enum hwmon_senso static int msi_wmi_platform_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { - struct wmi_device *wdev = dev_get_drvdata(dev); + struct msi_wmi_platform_data *data = dev_get_drvdata(dev); u8 input[32] = { 0 }; u8 output[32]; - u16 data; + u16 value; int ret; - ret = msi_wmi_platform_query(wdev, MSI_PLATFORM_GET_FAN, input, sizeof(input), output, + ret = msi_wmi_platform_query(data, MSI_PLATFORM_GET_FAN, input, sizeof(input), output, sizeof(output)); if (ret < 0) return ret; - data = get_unaligned_be16(&output[channel * 2 + 1]); - if (!data) + value = get_unaligned_be16(&output[channel * 2 + 1]); + if (!value) *val = 0; else - *val = 480000 / data; + *val = 480000 / value; return 0; } @@ -231,7 +245,7 @@ static ssize_t msi_wmi_platform_write(struct file *fp, const char __user *input, return ret; down_write(&data->buffer_lock); - ret = msi_wmi_platform_query(data->wdev, data->method, payload, data->length, data->buffer, + ret = msi_wmi_platform_query(data->data, data->method, payload, data->length, data->buffer, data->length); up_write(&data->buffer_lock); @@ -277,17 +291,17 @@ static void msi_wmi_platform_debugfs_remove(void *data) debugfs_remove_recursive(dir); } -static void msi_wmi_platform_debugfs_add(struct wmi_device *wdev, struct dentry *dir, +static void msi_wmi_platform_debugfs_add(struct msi_wmi_platform_data *drvdata, struct dentry *dir, const char *name, enum msi_wmi_platform_method method) { struct msi_wmi_platform_debugfs_data *data; struct dentry *entry; - data = devm_kzalloc(&wdev->dev, sizeof(*data), GFP_KERNEL); + data = devm_kzalloc(&drvdata->wdev->dev, sizeof(*data), GFP_KERNEL); if (!data) return; - data->wdev = wdev; + data->data = drvdata; data->method = method; init_rwsem(&data->buffer_lock); @@ -298,82 +312,82 @@ static void msi_wmi_platform_debugfs_add(struct wmi_device *wdev, struct dentry entry = debugfs_create_file(name, 0600, dir, data, &msi_wmi_platform_debugfs_fops); if (IS_ERR(entry)) - devm_kfree(&wdev->dev, data); + devm_kfree(&drvdata->wdev->dev, data); } -static void msi_wmi_platform_debugfs_init(struct wmi_device *wdev) +static void msi_wmi_platform_debugfs_init(struct msi_wmi_platform_data *data) { struct dentry *dir; char dir_name[64]; int ret, method; - scnprintf(dir_name, ARRAY_SIZE(dir_name), "%s-%s", DRIVER_NAME, dev_name(&wdev->dev)); + scnprintf(dir_name, ARRAY_SIZE(dir_name), "%s-%s", DRIVER_NAME, dev_name(&data->wdev->dev)); dir = debugfs_create_dir(dir_name, NULL); if (IS_ERR(dir)) return; - ret = devm_add_action_or_reset(&wdev->dev, msi_wmi_platform_debugfs_remove, dir); + ret = devm_add_action_or_reset(&data->wdev->dev, msi_wmi_platform_debugfs_remove, dir); if (ret < 0) return; for (method = MSI_PLATFORM_GET_PACKAGE; method <= MSI_PLATFORM_GET_WMI; method++) - msi_wmi_platform_debugfs_add(wdev, dir, msi_wmi_platform_debugfs_names[method - 1], + msi_wmi_platform_debugfs_add(data, dir, msi_wmi_platform_debugfs_names[method - 1], method); } -static int msi_wmi_platform_hwmon_init(struct wmi_device *wdev) +static int msi_wmi_platform_hwmon_init(struct msi_wmi_platform_data *data) { struct device *hdev; - hdev = devm_hwmon_device_register_with_info(&wdev->dev, "msi_wmi_platform", wdev, + hdev = devm_hwmon_device_register_with_info(&data->wdev->dev, "msi_wmi_platform", data, &msi_wmi_platform_chip_info, NULL); return PTR_ERR_OR_ZERO(hdev); } -static int msi_wmi_platform_ec_init(struct wmi_device *wdev) +static int msi_wmi_platform_ec_init(struct msi_wmi_platform_data *data) { u8 input[32] = { 0 }; u8 output[32]; u8 flags; int ret; - ret = msi_wmi_platform_query(wdev, MSI_PLATFORM_GET_EC, input, sizeof(input), output, + ret = msi_wmi_platform_query(data, MSI_PLATFORM_GET_EC, input, sizeof(input), output, sizeof(output)); if (ret < 0) return ret; flags = output[MSI_PLATFORM_EC_FLAGS_OFFSET]; - dev_dbg(&wdev->dev, "EC RAM version %lu.%lu\n", + dev_dbg(&data->wdev->dev, "EC RAM version %lu.%lu\n", FIELD_GET(MSI_PLATFORM_EC_MAJOR_MASK, flags), FIELD_GET(MSI_PLATFORM_EC_MINOR_MASK, flags)); - dev_dbg(&wdev->dev, "EC firmware version %.28s\n", + dev_dbg(&data->wdev->dev, "EC firmware version %.28s\n", &output[MSI_PLATFORM_EC_VERSION_OFFSET]); if (!(flags & MSI_PLATFORM_EC_IS_TIGERLAKE)) { if (!force) return -ENODEV; - dev_warn(&wdev->dev, "Loading on a non-Tigerlake platform\n"); + dev_warn(&data->wdev->dev, "Loading on a non-Tigerlake platform\n"); } return 0; } -static int msi_wmi_platform_init(struct wmi_device *wdev) +static int msi_wmi_platform_init(struct msi_wmi_platform_data *data) { u8 input[32] = { 0 }; u8 output[32]; int ret; - ret = msi_wmi_platform_query(wdev, MSI_PLATFORM_GET_WMI, input, sizeof(input), output, + ret = msi_wmi_platform_query(data, MSI_PLATFORM_GET_WMI, input, sizeof(input), output, sizeof(output)); if (ret < 0) return ret; - dev_dbg(&wdev->dev, "WMI interface version %u.%u\n", + dev_dbg(&data->wdev->dev, "WMI interface version %u.%u\n", output[MSI_PLATFORM_WMI_MAJOR_OFFSET], output[MSI_PLATFORM_WMI_MINOR_OFFSET]); @@ -381,7 +395,8 @@ static int msi_wmi_platform_init(struct wmi_device *wdev) if (!force) return -ENODEV; - dev_warn(&wdev->dev, "Loading despite unsupported WMI interface version (%u.%u)\n", + dev_warn(&data->wdev->dev, + "Loading despite unsupported WMI interface version (%u.%u)\n", output[MSI_PLATFORM_WMI_MAJOR_OFFSET], output[MSI_PLATFORM_WMI_MINOR_OFFSET]); } @@ -391,19 +406,31 @@ static int msi_wmi_platform_init(struct wmi_device *wdev) static int msi_wmi_platform_probe(struct wmi_device *wdev, const void *context) { + struct msi_wmi_platform_data *data; int ret; - ret = msi_wmi_platform_init(wdev); + data = devm_kzalloc(&wdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->wdev = wdev; + dev_set_drvdata(&wdev->dev, data); + + ret = devm_mutex_init(&wdev->dev, &data->wmi_lock); + if (ret < 0) + return ret; + + ret = msi_wmi_platform_init(data); if (ret < 0) return ret; - ret = msi_wmi_platform_ec_init(wdev); + ret = msi_wmi_platform_ec_init(data); if (ret < 0) return ret; - msi_wmi_platform_debugfs_init(wdev); + msi_wmi_platform_debugfs_init(data); - return msi_wmi_platform_hwmon_init(wdev); + return msi_wmi_platform_hwmon_init(data); } static const struct wmi_device_id msi_wmi_platform_id_table[] = { diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 0fc275e461be40..00b1e7c79a3d1e 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1061,8 +1061,8 @@ static ssize_t current_value_store(struct kobject *kobj, ret = -EINVAL; goto out; } - set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name, - new_setting, tlmi_priv.pwd_admin->signature); + set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name, + new_setting, tlmi_priv.pwd_admin->signature); if (!set_str) { ret = -ENOMEM; goto out; @@ -1092,7 +1092,7 @@ static ssize_t current_value_store(struct kobject *kobj, goto out; } - set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name, new_setting); if (!set_str) { ret = -ENOMEM; @@ -1120,11 +1120,11 @@ static ssize_t current_value_store(struct kobject *kobj, } if (auth_str) - set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->display_name, - new_setting, auth_str); + set_str = kasprintf(GFP_KERNEL, "%s,%s,%s", setting->name, + new_setting, auth_str); else - set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, - new_setting); + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->name, + new_setting); if (!set_str) { ret = -ENOMEM; goto out; @@ -1629,9 +1629,6 @@ static int tlmi_analyze(struct wmi_device *wdev) continue; } - /* It is not allowed to have '/' for file name. Convert it into '\'. */ - strreplace(item, '/', '\\'); - /* Remove the value part */ strreplace(item, ',', '\0'); @@ -1644,11 +1641,16 @@ static int tlmi_analyze(struct wmi_device *wdev) } setting->wdev = wdev; setting->index = i; + + strscpy(setting->name, item); + /* It is not allowed to have '/' for file name. Convert it into '\'. */ + strreplace(item, '/', '\\'); strscpy(setting->display_name, item); + /* If BIOS selections supported, load those */ if (tlmi_priv.can_get_bios_selections) { - ret = tlmi_get_bios_selections(setting->display_name, - &setting->possible_values); + ret = tlmi_get_bios_selections(setting->name, + &setting->possible_values); if (ret || !setting->possible_values) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index a8045248222729..9b014644d31610 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -90,6 +90,7 @@ struct tlmi_attr_setting { struct kobject kobj; struct wmi_device *wdev; int index; + char name[TLMI_SETTINGS_MAXLEN]; char display_name[TLMI_SETTINGS_MAXLEN]; char *possible_values; }; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 5790095c175e6f..657625dd60a06c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -231,6 +231,7 @@ enum tpacpi_hkey_event_t { /* Thermal events */ TP_HKEY_EV_ALARM_BAT_HOT = 0x6011, /* battery too hot */ TP_HKEY_EV_ALARM_BAT_XHOT = 0x6012, /* battery critically hot */ + TP_HKEY_EV_ALARM_BAT_LIM_CHANGE = 0x6013, /* battery charge limit changed*/ TP_HKEY_EV_ALARM_SENSOR_HOT = 0x6021, /* sensor too hot */ TP_HKEY_EV_ALARM_SENSOR_XHOT = 0x6022, /* sensor critically hot */ TP_HKEY_EV_THM_TABLE_CHANGED = 0x6030, /* windows; thermal table changed */ @@ -3777,6 +3778,10 @@ static bool hotkey_notify_6xxx(const u32 hkey, bool *send_acpi_ev) pr_alert("THERMAL EMERGENCY: battery is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; + case TP_HKEY_EV_ALARM_BAT_LIM_CHANGE: + pr_debug("Battery Info: battery charge threshold changed\n"); + /* User changed charging threshold. No action needed */ + return true; case TP_HKEY_EV_ALARM_SENSOR_HOT: pr_crit("THERMAL ALARM: a sensor reports something is too hot!\n"); /* recommended action: warn user through gui, that */ @@ -11478,6 +11483,8 @@ static int __must_check __init get_thinkpad_model_data( tp->vendor = PCI_VENDOR_ID_IBM; else if (dmi_name_in_vendors("LENOVO")) tp->vendor = PCI_VENDOR_ID_LENOVO; + else if (dmi_name_in_vendors("NEC")) + tp->vendor = PCI_VENDOR_ID_LENOVO; else return 0; diff --git a/drivers/platform/x86/x86-android-tablets/dmi.c b/drivers/platform/x86/x86-android-tablets/dmi.c index 3e5fa3b6e2fdfe..278c6d151dc492 100644 --- a/drivers/platform/x86/x86-android-tablets/dmi.c +++ b/drivers/platform/x86/x86-android-tablets/dmi.c @@ -179,6 +179,18 @@ const struct dmi_system_id x86_android_tablet_ids[] __initconst = { }, .driver_data = (void *)&peaq_c1010_info, }, + { + /* Vexia Edu Atla 10 tablet 5V version */ + .matches = { + /* Having all 3 of these not set is somewhat unique */ + DMI_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), + DMI_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), + DMI_MATCH(DMI_BOARD_NAME, "To be filled by O.E.M."), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "05/14/2015"), + }, + .driver_data = (void *)&vexia_edu_atla10_5v_info, + }, { /* Vexia Edu Atla 10 tablet 9V version */ .matches = { @@ -187,7 +199,7 @@ const struct dmi_system_id x86_android_tablet_ids[] __initconst = { /* Above strings are too generic, also match on BIOS date */ DMI_MATCH(DMI_BIOS_DATE, "08/25/2014"), }, - .driver_data = (void *)&vexia_edu_atla10_info, + .driver_data = (void *)&vexia_edu_atla10_9v_info, }, { /* Whitelabel (sold as various brands) TM800A550L */ diff --git a/drivers/platform/x86/x86-android-tablets/other.c b/drivers/platform/x86/x86-android-tablets/other.c index 1d93d9edb23f48..f7bd9f863c85ed 100644 --- a/drivers/platform/x86/x86-android-tablets/other.c +++ b/drivers/platform/x86/x86-android-tablets/other.c @@ -599,62 +599,122 @@ const struct x86_dev_info whitelabel_tm800a550l_info __initconst = { }; /* - * Vexia EDU ATLA 10 tablet, Android 4.2 / 4.4 + Guadalinex Ubuntu tablet + * Vexia EDU ATLA 10 tablet 5V, Android 4.4 + Guadalinex Ubuntu tablet + * distributed to schools in the Spanish Andalucía region. + */ +static const struct property_entry vexia_edu_atla10_5v_touchscreen_props[] = { + PROPERTY_ENTRY_U32("hid-descr-addr", 0x0000), + PROPERTY_ENTRY_U32("post-reset-deassert-delay-ms", 120), + { } +}; + +static const struct software_node vexia_edu_atla10_5v_touchscreen_node = { + .properties = vexia_edu_atla10_5v_touchscreen_props, +}; + +static const struct x86_i2c_client_info vexia_edu_atla10_5v_i2c_clients[] __initconst = { + { + /* kxcjk1013 accelerometer */ + .board_info = { + .type = "kxcjk1013", + .addr = 0x0f, + .dev_name = "kxcjk1013", + }, + .adapter_path = "\\_SB_.I2C3", + }, { + /* touchscreen controller */ + .board_info = { + .type = "hid-over-i2c", + .addr = 0x38, + .dev_name = "FTSC1000", + .swnode = &vexia_edu_atla10_5v_touchscreen_node, + }, + .adapter_path = "\\_SB_.I2C4", + .irq_data = { + .type = X86_ACPI_IRQ_TYPE_APIC, + .index = 0x44, + .trigger = ACPI_LEVEL_SENSITIVE, + .polarity = ACPI_ACTIVE_HIGH, + }, + } +}; + +static struct gpiod_lookup_table vexia_edu_atla10_5v_ft5416_gpios = { + .dev_id = "i2c-FTSC1000", + .table = { + GPIO_LOOKUP("INT33FC:01", 26, "reset", GPIO_ACTIVE_LOW), + { } + }, +}; + +static struct gpiod_lookup_table * const vexia_edu_atla10_5v_gpios[] = { + &vexia_edu_atla10_5v_ft5416_gpios, + NULL +}; + +const struct x86_dev_info vexia_edu_atla10_5v_info __initconst = { + .i2c_client_info = vexia_edu_atla10_5v_i2c_clients, + .i2c_client_count = ARRAY_SIZE(vexia_edu_atla10_5v_i2c_clients), + .gpiod_lookup_tables = vexia_edu_atla10_5v_gpios, +}; + +/* + * Vexia EDU ATLA 10 tablet 9V, Android 4.2 + Guadalinex Ubuntu tablet * distributed to schools in the Spanish Andalucía region. */ static const char * const crystal_cove_pwrsrc_psy[] = { "crystal_cove_pwrsrc" }; -static const struct property_entry vexia_edu_atla10_ulpmc_props[] = { +static const struct property_entry vexia_edu_atla10_9v_ulpmc_props[] = { PROPERTY_ENTRY_STRING_ARRAY("supplied-from", crystal_cove_pwrsrc_psy), { } }; -static const struct software_node vexia_edu_atla10_ulpmc_node = { - .properties = vexia_edu_atla10_ulpmc_props, +static const struct software_node vexia_edu_atla10_9v_ulpmc_node = { + .properties = vexia_edu_atla10_9v_ulpmc_props, }; -static const char * const vexia_edu_atla10_accel_mount_matrix[] = { +static const char * const vexia_edu_atla10_9v_accel_mount_matrix[] = { "0", "-1", "0", "1", "0", "0", "0", "0", "1" }; -static const struct property_entry vexia_edu_atla10_accel_props[] = { - PROPERTY_ENTRY_STRING_ARRAY("mount-matrix", vexia_edu_atla10_accel_mount_matrix), +static const struct property_entry vexia_edu_atla10_9v_accel_props[] = { + PROPERTY_ENTRY_STRING_ARRAY("mount-matrix", vexia_edu_atla10_9v_accel_mount_matrix), { } }; -static const struct software_node vexia_edu_atla10_accel_node = { - .properties = vexia_edu_atla10_accel_props, +static const struct software_node vexia_edu_atla10_9v_accel_node = { + .properties = vexia_edu_atla10_9v_accel_props, }; -static const struct property_entry vexia_edu_atla10_touchscreen_props[] = { +static const struct property_entry vexia_edu_atla10_9v_touchscreen_props[] = { PROPERTY_ENTRY_U32("hid-descr-addr", 0x0000), PROPERTY_ENTRY_U32("post-reset-deassert-delay-ms", 120), { } }; -static const struct software_node vexia_edu_atla10_touchscreen_node = { - .properties = vexia_edu_atla10_touchscreen_props, +static const struct software_node vexia_edu_atla10_9v_touchscreen_node = { + .properties = vexia_edu_atla10_9v_touchscreen_props, }; -static const struct property_entry vexia_edu_atla10_pmic_props[] = { +static const struct property_entry vexia_edu_atla10_9v_pmic_props[] = { PROPERTY_ENTRY_BOOL("linux,register-pwrsrc-power_supply"), { } }; -static const struct software_node vexia_edu_atla10_pmic_node = { - .properties = vexia_edu_atla10_pmic_props, +static const struct software_node vexia_edu_atla10_9v_pmic_node = { + .properties = vexia_edu_atla10_9v_pmic_props, }; -static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initconst = { +static const struct x86_i2c_client_info vexia_edu_atla10_9v_i2c_clients[] __initconst = { { /* I2C attached embedded controller, used to access fuel-gauge */ .board_info = { .type = "vexia_atla10_ec", .addr = 0x76, .dev_name = "ulpmc", - .swnode = &vexia_edu_atla10_ulpmc_node, + .swnode = &vexia_edu_atla10_9v_ulpmc_node, }, .adapter_path = "0000:00:18.1", }, { @@ -679,7 +739,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon .type = "kxtj21009", .addr = 0x0f, .dev_name = "kxtj21009", - .swnode = &vexia_edu_atla10_accel_node, + .swnode = &vexia_edu_atla10_9v_accel_node, }, .adapter_path = "0000:00:18.5", }, { @@ -688,7 +748,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon .type = "hid-over-i2c", .addr = 0x38, .dev_name = "FTSC1000", - .swnode = &vexia_edu_atla10_touchscreen_node, + .swnode = &vexia_edu_atla10_9v_touchscreen_node, }, .adapter_path = "0000:00:18.6", .irq_data = { @@ -703,7 +763,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon .type = "intel_soc_pmic_crc", .addr = 0x6e, .dev_name = "intel_soc_pmic_crc", - .swnode = &vexia_edu_atla10_pmic_node, + .swnode = &vexia_edu_atla10_9v_pmic_node, }, .adapter_path = "0000:00:18.7", .irq_data = { @@ -715,7 +775,7 @@ static const struct x86_i2c_client_info vexia_edu_atla10_i2c_clients[] __initcon } }; -static const struct x86_serdev_info vexia_edu_atla10_serdevs[] __initconst = { +static const struct x86_serdev_info vexia_edu_atla10_9v_serdevs[] __initconst = { { .ctrl.pci.devfn = PCI_DEVFN(0x1e, 3), .ctrl_devname = "serial0", @@ -723,7 +783,7 @@ static const struct x86_serdev_info vexia_edu_atla10_serdevs[] __initconst = { }, }; -static struct gpiod_lookup_table vexia_edu_atla10_ft5416_gpios = { +static struct gpiod_lookup_table vexia_edu_atla10_9v_ft5416_gpios = { .dev_id = "i2c-FTSC1000", .table = { GPIO_LOOKUP("INT33FC:00", 60, "reset", GPIO_ACTIVE_LOW), @@ -731,12 +791,12 @@ static struct gpiod_lookup_table vexia_edu_atla10_ft5416_gpios = { }, }; -static struct gpiod_lookup_table * const vexia_edu_atla10_gpios[] = { - &vexia_edu_atla10_ft5416_gpios, +static struct gpiod_lookup_table * const vexia_edu_atla10_9v_gpios[] = { + &vexia_edu_atla10_9v_ft5416_gpios, NULL }; -static int __init vexia_edu_atla10_init(struct device *dev) +static int __init vexia_edu_atla10_9v_init(struct device *dev) { struct pci_dev *pdev; int ret; @@ -760,13 +820,13 @@ static int __init vexia_edu_atla10_init(struct device *dev) return 0; } -const struct x86_dev_info vexia_edu_atla10_info __initconst = { - .i2c_client_info = vexia_edu_atla10_i2c_clients, - .i2c_client_count = ARRAY_SIZE(vexia_edu_atla10_i2c_clients), - .serdev_info = vexia_edu_atla10_serdevs, - .serdev_count = ARRAY_SIZE(vexia_edu_atla10_serdevs), - .gpiod_lookup_tables = vexia_edu_atla10_gpios, - .init = vexia_edu_atla10_init, +const struct x86_dev_info vexia_edu_atla10_9v_info __initconst = { + .i2c_client_info = vexia_edu_atla10_9v_i2c_clients, + .i2c_client_count = ARRAY_SIZE(vexia_edu_atla10_9v_i2c_clients), + .serdev_info = vexia_edu_atla10_9v_serdevs, + .serdev_count = ARRAY_SIZE(vexia_edu_atla10_9v_serdevs), + .gpiod_lookup_tables = vexia_edu_atla10_9v_gpios, + .init = vexia_edu_atla10_9v_init, .use_pci = true, }; diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h index 63a38a0069bae6..dcf8d49e3b5f48 100644 --- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h +++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h @@ -127,7 +127,8 @@ extern const struct x86_dev_info nextbook_ares8_info; extern const struct x86_dev_info nextbook_ares8a_info; extern const struct x86_dev_info peaq_c1010_info; extern const struct x86_dev_info whitelabel_tm800a550l_info; -extern const struct x86_dev_info vexia_edu_atla10_info; +extern const struct x86_dev_info vexia_edu_atla10_5v_info; +extern const struct x86_dev_info vexia_edu_atla10_9v_info; extern const struct x86_dev_info xiaomi_mipad2_info; extern const struct dmi_system_id x86_android_tablet_ids[]; diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 9b2f28b34bb51a..d6c1ddb807b208 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -3126,7 +3126,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, /* Verify that the index is within a valid range. */ num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", "#power-domain-cells"); - if (index >= num_domains) + if (num_domains < 0 || index >= num_domains) return NULL; /* Allocate and register device on the genpd bus. */ diff --git a/drivers/pmdomain/renesas/rcar-gen4-sysc.c b/drivers/pmdomain/renesas/rcar-gen4-sysc.c index 66409cff2083fc..e001b5c25bed00 100644 --- a/drivers/pmdomain/renesas/rcar-gen4-sysc.c +++ b/drivers/pmdomain/renesas/rcar-gen4-sysc.c @@ -338,11 +338,6 @@ static int __init rcar_gen4_sysc_pd_init(void) struct rcar_gen4_sysc_pd *pd; size_t n; - if (!area->name) { - /* Skip NULLified area */ - continue; - } - n = strlen(area->name) + 1; pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL); if (!pd) { diff --git a/drivers/pmdomain/renesas/rcar-sysc.c b/drivers/pmdomain/renesas/rcar-sysc.c index dce1a6d37e8012..047495f54e8adc 100644 --- a/drivers/pmdomain/renesas/rcar-sysc.c +++ b/drivers/pmdomain/renesas/rcar-sysc.c @@ -396,11 +396,6 @@ static int __init rcar_sysc_pd_init(void) struct rcar_sysc_pd *pd; size_t n; - if (!area->name) { - /* Skip NULLified area */ - continue; - } - n = strlen(area->name) + 1; pd = kzalloc(sizeof(*pd) + n, GFP_KERNEL); if (!pd) { diff --git a/drivers/power/reset/at91-reset.c b/drivers/power/reset/at91-reset.c index 036b18a1f90f80..511f5a8f8961ce 100644 --- a/drivers/power/reset/at91-reset.c +++ b/drivers/power/reset/at91-reset.c @@ -129,12 +129,11 @@ static int at91_reset(struct notifier_block *this, unsigned long mode, " str %4, [%0, %6]\n\t" /* Disable SDRAM1 accesses */ "1: tst %1, #0\n\t" - " beq 2f\n\t" " strne %3, [%1, #" __stringify(AT91_DDRSDRC_RTR) "]\n\t" /* Power down SDRAM1 */ " strne %4, [%1, %6]\n\t" /* Reset CPU */ - "2: str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t" + " str %5, [%2, #" __stringify(AT91_RSTC_CR) "]\n\t" " b .\n\t" : @@ -145,7 +144,7 @@ static int at91_reset(struct notifier_block *this, unsigned long mode, "r" cpu_to_le32(AT91_DDRSDRC_LPCB_POWER_DOWN), "r" (reset->data->reset_args), "r" (reset->ramc_lpr) - : "r4"); + ); return NOTIFY_DONE; } diff --git a/drivers/power/supply/max77705_charger.c b/drivers/power/supply/max77705_charger.c index eec5e9ef795efd..329b430d0e5065 100644 --- a/drivers/power/supply/max77705_charger.c +++ b/drivers/power/supply/max77705_charger.c @@ -545,20 +545,28 @@ static int max77705_charger_probe(struct i2c_client *i2c) return dev_err_probe(dev, ret, "failed to add irq chip\n"); chg->wqueue = create_singlethread_workqueue(dev_name(dev)); - if (IS_ERR(chg->wqueue)) - return dev_err_probe(dev, PTR_ERR(chg->wqueue), "failed to create workqueue\n"); + if (!chg->wqueue) + return dev_err_probe(dev, -ENOMEM, "failed to create workqueue\n"); ret = devm_work_autocancel(dev, &chg->chgin_work, max77705_chgin_isr_work); - if (ret) - return dev_err_probe(dev, ret, "failed to initialize interrupt work\n"); + if (ret) { + dev_err_probe(dev, ret, "failed to initialize interrupt work\n"); + goto destroy_wq; + } max77705_charger_initialize(chg); ret = max77705_charger_enable(chg); - if (ret) - return dev_err_probe(dev, ret, "failed to enable charge\n"); + if (ret) { + dev_err_probe(dev, ret, "failed to enable charge\n"); + goto destroy_wq; + } return devm_add_action_or_reset(dev, max77705_charger_disable, chg); + +destroy_wq: + destroy_workqueue(chg->wqueue); + return ret; } static const struct of_device_id max77705_charger_of_match[] = { diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index edb058c19c9c44..439dd0bf8644e5 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -33,7 +33,7 @@ struct power_supply_attr { [POWER_SUPPLY_PROP_ ## _name] = \ { \ .prop_name = #_name, \ - .attr_name = #_name "\0", \ + .attr_name = #_name, \ .text_values = _text, \ .text_values_len = _len, \ } diff --git a/drivers/pps/generators/pps_gen_tio.c b/drivers/pps/generators/pps_gen_tio.c index 1d5ffe055463fc..de00a85bfafa2d 100644 --- a/drivers/pps/generators/pps_gen_tio.c +++ b/drivers/pps/generators/pps_gen_tio.c @@ -230,7 +230,7 @@ static int pps_gen_tio_probe(struct platform_device *pdev) hrtimer_setup(&tio->timer, hrtimer_callback, CLOCK_REALTIME, HRTIMER_MODE_ABS); spin_lock_init(&tio->lock); - platform_set_drvdata(pdev, &tio); + platform_set_drvdata(pdev, tio); return 0; } diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 7945c6be1f7ce4..e63481f24238da 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -315,6 +315,8 @@ struct ptp_ocp_serial_port { #define OCP_BOARD_ID_LEN 13 #define OCP_SERIAL_LEN 6 #define OCP_SMA_NUM 4 +#define OCP_SIGNAL_NUM 4 +#define OCP_FREQ_NUM 4 enum { PORT_GNSS, @@ -342,8 +344,8 @@ struct ptp_ocp { struct dcf_master_reg __iomem *dcf_out; struct dcf_slave_reg __iomem *dcf_in; struct tod_reg __iomem *nmea_out; - struct frequency_reg __iomem *freq_in[4]; - struct ptp_ocp_ext_src *signal_out[4]; + struct frequency_reg __iomem *freq_in[OCP_FREQ_NUM]; + struct ptp_ocp_ext_src *signal_out[OCP_SIGNAL_NUM]; struct ptp_ocp_ext_src *pps; struct ptp_ocp_ext_src *ts0; struct ptp_ocp_ext_src *ts1; @@ -378,10 +380,12 @@ struct ptp_ocp { u32 utc_tai_offset; u32 ts_window_adjust; u64 fw_cap; - struct ptp_ocp_signal signal[4]; + struct ptp_ocp_signal signal[OCP_SIGNAL_NUM]; struct ptp_ocp_sma_connector sma[OCP_SMA_NUM]; const struct ocp_sma_op *sma_op; struct dpll_device *dpll; + int signals_nr; + int freq_in_nr; }; #define OCP_REQ_TIMESTAMP BIT(0) @@ -2067,6 +2071,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s) if (!s->start) { /* roundup() does not work on 32-bit systems */ s->start = DIV64_U64_ROUND_UP(start_ns, s->period); + s->start *= s->period; s->start = ktime_add(s->start, s->phase); } @@ -2577,12 +2582,60 @@ static const struct ocp_sma_op ocp_fb_sma_op = { .set_output = ptp_ocp_sma_fb_set_output, }; +static int +ptp_ocp_sma_adva_set_output(struct ptp_ocp *bp, int sma_nr, u32 val) +{ + u32 reg, mask, shift; + unsigned long flags; + u32 __iomem *gpio; + + gpio = sma_nr > 2 ? &bp->sma_map1->gpio2 : &bp->sma_map2->gpio2; + shift = sma_nr & 1 ? 0 : 16; + + mask = 0xffff << (16 - shift); + + spin_lock_irqsave(&bp->lock, flags); + + reg = ioread32(gpio); + reg = (reg & mask) | (val << shift); + + iowrite32(reg, gpio); + + spin_unlock_irqrestore(&bp->lock, flags); + + return 0; +} + +static int +ptp_ocp_sma_adva_set_inputs(struct ptp_ocp *bp, int sma_nr, u32 val) +{ + u32 reg, mask, shift; + unsigned long flags; + u32 __iomem *gpio; + + gpio = sma_nr > 2 ? &bp->sma_map2->gpio1 : &bp->sma_map1->gpio1; + shift = sma_nr & 1 ? 0 : 16; + + mask = 0xffff << (16 - shift); + + spin_lock_irqsave(&bp->lock, flags); + + reg = ioread32(gpio); + reg = (reg & mask) | (val << shift); + + iowrite32(reg, gpio); + + spin_unlock_irqrestore(&bp->lock, flags); + + return 0; +} + static const struct ocp_sma_op ocp_adva_sma_op = { .tbl = { ptp_ocp_adva_sma_in, ptp_ocp_adva_sma_out }, .init = ptp_ocp_sma_fb_init, .get = ptp_ocp_sma_fb_get, - .set_inputs = ptp_ocp_sma_fb_set_inputs, - .set_output = ptp_ocp_sma_fb_set_output, + .set_inputs = ptp_ocp_sma_adva_set_inputs, + .set_output = ptp_ocp_sma_adva_set_output, }; static int @@ -2648,6 +2701,8 @@ ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->eeprom_map = fb_eeprom_map; bp->fw_version = ioread32(&bp->image->version); bp->sma_op = &ocp_fb_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; ptp_ocp_fb_set_version(bp); @@ -2813,6 +2868,8 @@ ptp_ocp_art_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->fw_version = ioread32(&bp->reg->version); bp->fw_tag = 2; bp->sma_op = &ocp_art_sma_op; + bp->signals_nr = 4; + bp->freq_in_nr = 4; /* Enable MAC serial port during initialisation */ iowrite32(1, &bp->board_config->mro50_serial_activate); @@ -2839,6 +2896,8 @@ ptp_ocp_adva_board_init(struct ptp_ocp *bp, struct ocp_resource *r) bp->flash_start = 0xA00000; bp->eeprom_map = fb_eeprom_map; bp->sma_op = &ocp_adva_sma_op; + bp->signals_nr = 2; + bp->freq_in_nr = 2; version = ioread32(&bp->image->version); /* if lower 16 bits are empty, this is the fw loader. */ @@ -3959,7 +4018,7 @@ _signal_summary_show(struct seq_file *s, struct ptp_ocp *bp, int nr) { struct signal_reg __iomem *reg = bp->signal_out[nr]->mem; struct ptp_ocp_signal *signal = &bp->signal[nr]; - char label[8]; + char label[16]; bool on; u32 val; @@ -3982,7 +4041,7 @@ static void _frequency_summary_show(struct seq_file *s, int nr, struct frequency_reg __iomem *reg) { - char label[8]; + char label[16]; bool on; u32 val; @@ -4126,11 +4185,11 @@ ptp_ocp_summary_show(struct seq_file *s, void *data) } if (bp->fw_cap & OCP_CAP_SIGNAL) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->signals_nr; i++) _signal_summary_show(s, bp, i); if (bp->fw_cap & OCP_CAP_FREQ) - for (i = 0; i < 4; i++) + for (i = 0; i < bp->freq_in_nr; i++) _frequency_summary_show(s, i, bp->freq_in[i]); if (bp->irig_out) { diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 18934e28469ee6..528d86a33f37de 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -98,17 +98,7 @@ static inline int queue_cnt(const struct timestamp_event_queue *q) /* Check if ptp virtual clock is in use */ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) { - bool in_use = false; - - if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) - return true; - - if (!ptp->is_virtual_clock && ptp->n_vclocks) - in_use = true; - - mutex_unlock(&ptp->n_vclocks_mux); - - return in_use; + return !ptp->is_virtual_clock; } /* Check if ptp clock shall be free running */ diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index a40c511e009652..0387bd838487b1 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -322,7 +322,7 @@ static int __pwm_set_waveform(struct pwm_device *pwm, const struct pwm_ops *ops = chip->ops; char wfhw[WFHWSIZE]; struct pwm_waveform wf_rounded; - int err; + int err, ret_tohw; BUG_ON(WFHWSIZE < ops->sizeof_wfhw); @@ -332,16 +332,16 @@ static int __pwm_set_waveform(struct pwm_device *pwm, if (!pwm_wf_valid(wf)) return -EINVAL; - err = __pwm_round_waveform_tohw(chip, pwm, wf, &wfhw); - if (err) - return err; + ret_tohw = __pwm_round_waveform_tohw(chip, pwm, wf, &wfhw); + if (ret_tohw < 0) + return ret_tohw; if ((IS_ENABLED(CONFIG_PWM_DEBUG) || exact) && wf->period_length_ns) { err = __pwm_round_waveform_fromhw(chip, pwm, &wfhw, &wf_rounded); if (err) return err; - if (IS_ENABLED(CONFIG_PWM_DEBUG) && !pwm_check_rounding(wf, &wf_rounded)) + if (IS_ENABLED(CONFIG_PWM_DEBUG) && ret_tohw == 0 && !pwm_check_rounding(wf, &wf_rounded)) dev_err(&chip->dev, "Wrong rounding: requested %llu/%llu [+%llu], result %llu/%llu [+%llu]\n", wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns); @@ -382,7 +382,8 @@ static int __pwm_set_waveform(struct pwm_device *pwm, wf_rounded.duty_length_ns, wf_rounded.period_length_ns, wf_rounded.duty_offset_ns, wf_set.duty_length_ns, wf_set.period_length_ns, wf_set.duty_offset_ns); } - return 0; + + return ret_tohw; } /** diff --git a/drivers/pwm/pwm-axi-pwmgen.c b/drivers/pwm/pwm-axi-pwmgen.c index 4259a0db9ff458..4337c8f5acf055 100644 --- a/drivers/pwm/pwm-axi-pwmgen.c +++ b/drivers/pwm/pwm-axi-pwmgen.c @@ -75,6 +75,7 @@ static int axi_pwmgen_round_waveform_tohw(struct pwm_chip *chip, { struct axi_pwmgen_waveform *wfhw = _wfhw; struct axi_pwmgen_ddata *ddata = axi_pwmgen_ddata_from_chip(chip); + int ret = 0; if (wf->period_length_ns == 0) { *wfhw = (struct axi_pwmgen_waveform){ @@ -91,12 +92,15 @@ static int axi_pwmgen_round_waveform_tohw(struct pwm_chip *chip, if (wfhw->period_cnt == 0) { /* * The specified period is too short for the hardware. - * Let's round .duty_cycle down to 0 to get a (somewhat) - * valid result. + * So round up .period_cnt to 1 (i.e. the smallest + * possible period). With .duty_cycle and .duty_offset + * being less than or equal to .period, their rounded + * value must be 0. */ wfhw->period_cnt = 1; wfhw->duty_cycle_cnt = 0; wfhw->duty_offset_cnt = 0; + ret = 1; } else { wfhw->duty_cycle_cnt = min_t(u64, mul_u64_u32_div(wf->duty_length_ns, ddata->clk_rate_hz, NSEC_PER_SEC), @@ -111,7 +115,7 @@ static int axi_pwmgen_round_waveform_tohw(struct pwm_chip *chip, pwm->hwpwm, wf->duty_length_ns, wf->period_length_ns, wf->duty_offset_ns, ddata->clk_rate_hz, wfhw->period_cnt, wfhw->duty_cycle_cnt, wfhw->duty_offset_cnt); - return 0; + return ret; } static int axi_pwmgen_round_waveform_fromhw(struct pwm_chip *chip, struct pwm_device *pwm, diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c index 2510c10ca47303..c45a5fca4cbbd2 100644 --- a/drivers/pwm/pwm-fsl-ftm.c +++ b/drivers/pwm/pwm-fsl-ftm.c @@ -118,6 +118,9 @@ static unsigned int fsl_pwm_ticks_to_ns(struct fsl_pwm_chip *fpc, unsigned long long exval; rate = clk_get_rate(fpc->clk[fpc->period.clk_select]); + if (rate >> fpc->period.clk_ps == 0) + return 0; + exval = ticks; exval *= 1000000000UL; do_div(exval, rate >> fpc->period.clk_ps); @@ -190,6 +193,9 @@ static unsigned int fsl_pwm_calculate_duty(struct fsl_pwm_chip *fpc, unsigned int period = fpc->period.mod_period + 1; unsigned int period_ns = fsl_pwm_ticks_to_ns(fpc, period); + if (!period_ns) + return 0; + duty = (unsigned long long)duty_ns * period; do_div(duty, period_ns); diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c index 01dfa0fab80a44..7eaab58314995c 100644 --- a/drivers/pwm/pwm-mediatek.c +++ b/drivers/pwm/pwm-mediatek.c @@ -121,21 +121,25 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_mediatek_chip *pc = to_pwm_mediatek_chip(chip); u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH, reg_thres = PWMTHRES; + unsigned long clk_rate; u64 resolution; int ret; ret = pwm_mediatek_clk_enable(chip, pwm); - if (ret < 0) return ret; + clk_rate = clk_get_rate(pc->clk_pwms[pwm->hwpwm]); + if (!clk_rate) + return -EINVAL; + /* Make sure we use the bus clock and not the 26MHz clock */ if (pc->soc->has_ck_26m_sel) writel(0, pc->regs + PWM_CK_26M_SEL); /* Using resolution in picosecond gets accuracy higher */ resolution = (u64)NSEC_PER_SEC * 1000; - do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm])); + do_div(resolution, clk_rate); cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000, resolution); while (cnt_period > 8191) { diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c index 2261789cc27dae..578dbdd2d5a721 100644 --- a/drivers/pwm/pwm-rcar.c +++ b/drivers/pwm/pwm-rcar.c @@ -8,6 +8,7 @@ * - The hardware cannot generate a 0% duty cycle. */ +#include #include #include #include @@ -102,23 +103,24 @@ static void rcar_pwm_set_clock_control(struct rcar_pwm_chip *rp, rcar_pwm_write(rp, value, RCAR_PWMCR); } -static int rcar_pwm_set_counter(struct rcar_pwm_chip *rp, int div, int duty_ns, - int period_ns) +static int rcar_pwm_set_counter(struct rcar_pwm_chip *rp, int div, u64 duty_ns, + u64 period_ns) { - unsigned long long one_cycle, tmp; /* 0.01 nanoseconds */ + unsigned long long tmp; unsigned long clk_rate = clk_get_rate(rp->clk); u32 cyc, ph; - one_cycle = NSEC_PER_SEC * 100ULL << div; - do_div(one_cycle, clk_rate); + /* div <= 24 == RCAR_PWM_MAX_DIVISION, so the shift doesn't overflow. */ + tmp = mul_u64_u64_div_u64(period_ns, clk_rate, (u64)NSEC_PER_SEC << div); + if (tmp > FIELD_MAX(RCAR_PWMCNT_CYC0_MASK)) + tmp = FIELD_MAX(RCAR_PWMCNT_CYC0_MASK); - tmp = period_ns * 100ULL; - do_div(tmp, one_cycle); - cyc = (tmp << RCAR_PWMCNT_CYC0_SHIFT) & RCAR_PWMCNT_CYC0_MASK; + cyc = FIELD_PREP(RCAR_PWMCNT_CYC0_MASK, tmp); - tmp = duty_ns * 100ULL; - do_div(tmp, one_cycle); - ph = tmp & RCAR_PWMCNT_PH0_MASK; + tmp = mul_u64_u64_div_u64(duty_ns, clk_rate, (u64)NSEC_PER_SEC << div); + if (tmp > FIELD_MAX(RCAR_PWMCNT_PH0_MASK)) + tmp = FIELD_MAX(RCAR_PWMCNT_PH0_MASK); + ph = FIELD_PREP(RCAR_PWMCNT_PH0_MASK, tmp); /* Avoid prohibited setting */ if (cyc == 0 || ph == 0) diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c index a59de4de18b6e9..ec2c05c9ee7a67 100644 --- a/drivers/pwm/pwm-stm32.c +++ b/drivers/pwm/pwm-stm32.c @@ -103,22 +103,16 @@ static int stm32_pwm_round_waveform_tohw(struct pwm_chip *chip, if (ret) goto out; - /* - * calculate the best value for ARR for the given PSC, refuse if - * the resulting period gets bigger than the requested one. - */ arr = mul_u64_u64_div_u64(wf->period_length_ns, rate, (u64)NSEC_PER_SEC * (wfhw->psc + 1)); if (arr <= wfhw->arr) { /* - * requested period is small than the currently + * requested period is smaller than the currently * configured and unchangable period, report back the smallest - * possible period, i.e. the current state; Initialize - * ccr to anything valid. + * possible period, i.e. the current state and return 1 + * to indicate the wrong rounding direction. */ - wfhw->ccr = 0; ret = 1; - goto out; } } else { diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index f9be26d2534846..d096b58cd0ae97 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -362,4 +362,7 @@ static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); } +#define MI300_UMC_MCA_COL GENMASK(5, 1) +#define MI300_UMC_MCA_ROW13 BIT(23) + #endif /* __AMD_ATL_INTERNAL_H__ */ diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index dc8aa12f63c811..6e072b7667e98b 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -229,7 +229,6 @@ int get_umc_info_mi300(void) * Additionally, the PC and Bank bits may be hashed. This must be accounted for before * reconstructing the normalized address. */ -#define MI300_UMC_MCA_COL GENMASK(5, 1) #define MI300_UMC_MCA_BANK GENMASK(9, 6) #define MI300_UMC_MCA_ROW GENMASK(24, 10) #define MI300_UMC_MCA_PC BIT(25) @@ -320,7 +319,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. */ #define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) -static void retire_row_mi300(struct atl_err *a_err) +static void _retire_row_mi300(struct atl_err *a_err) { unsigned long addr; struct page *p; @@ -351,6 +350,22 @@ static void retire_row_mi300(struct atl_err *a_err) } } +/* + * In addition to the column bits, the row[13] bit should also be included when + * calculating addresses affected by a physical row. + * + * Instead of running through another loop over a single bit, just run through + * the column bits twice and flip the row[13] bit in-between. + * + * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value. + */ +static void retire_row_mi300(struct atl_err *a_err) +{ + _retire_row_mi300(a_err); + a_err->addr ^= MI300_UMC_MCA_ROW13; + _retire_row_mi300(a_err); +} + void amd_retire_dram_row(struct atl_err *a_err) { if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 90de737fbc9097..8877c6ff64c468 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -250,6 +250,13 @@ static bool rec_has_valid_entries(struct fru_rec *rec) return true; } +/* + * Row retirement is done on MI300 systems, and some bits are 'don't + * care' for comparing addresses with unique physical rows. This + * includes all column bits and the row[13] bit. + */ +#define MASK_ADDR(addr) ((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL)) + static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) { /* @@ -258,7 +265,7 @@ static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_ * * Also, order the checks from most->least likely to fail to shortcut the code. */ - if (old->addr != new->addr) + if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr)) return false; if (old->hw_id != new->hw_id) diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index 59eb23d467ec05..3d333b61fb18c8 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -5,6 +5,7 @@ // Copyright (C) 2022 Laurent Pinchart // Copyright (C) 2018 Avnet, Inc. +#include #include #include #include @@ -132,23 +133,27 @@ static int max20086_regulators_register(struct max20086 *chip) static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) { - struct of_regulator_match matches[MAX20086_MAX_REGULATORS] = { }; - struct device_node *node; + struct of_regulator_match *matches; unsigned int i; int ret; - node = of_get_child_by_name(chip->dev->of_node, "regulators"); + struct device_node *node __free(device_node) = + of_get_child_by_name(chip->dev->of_node, "regulators"); if (!node) { dev_err(chip->dev, "regulators node not found\n"); return -ENODEV; } + matches = devm_kcalloc(chip->dev, chip->info->num_outputs, + sizeof(*matches), GFP_KERNEL); + if (!matches) + return -ENOMEM; + for (i = 0; i < chip->info->num_outputs; ++i) matches[i].name = max20086_output_names[i]; ret = of_regulator_match(chip->dev, node, matches, chip->info->num_outputs); - of_node_put(node); if (ret < 0) { dev_err(chip->dev, "Failed to match regulators\n"); return -EINVAL; diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 775b056d795a8b..2c7e519a2254ba 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -456,7 +456,8 @@ static int wcnss_init_regulators(struct qcom_wcnss *wcnss, if (wcnss->num_pds) { info += wcnss->num_pds; /* Handle single power domain case */ - num_vregs += num_pd_vregs - wcnss->num_pds; + if (wcnss->num_pds < num_pd_vregs) + num_vregs += num_pd_vregs - wcnss->num_pds; } else { num_vregs += num_pd_vregs; } diff --git a/drivers/remoteproc/qcom_wcnss_iris.c b/drivers/remoteproc/qcom_wcnss_iris.c index b989718776bdb5..2b52b403eb3f76 100644 --- a/drivers/remoteproc/qcom_wcnss_iris.c +++ b/drivers/remoteproc/qcom_wcnss_iris.c @@ -196,6 +196,7 @@ struct qcom_iris *qcom_iris_probe(struct device *parent, bool *use_48mhz_xo) err_device_del: device_del(&iris->dev); + put_device(&iris->dev); return ERR_PTR(ret); } @@ -203,4 +204,5 @@ struct qcom_iris *qcom_iris_probe(struct device *parent, bool *use_48mhz_xo) void qcom_iris_remove(struct qcom_iris *iris) { device_del(&iris->dev); + put_device(&iris->dev); } diff --git a/drivers/remoteproc/ti_k3_dsp_remoteproc.c b/drivers/remoteproc/ti_k3_dsp_remoteproc.c index a695890254ff76..35e8c3cc313c36 100644 --- a/drivers/remoteproc/ti_k3_dsp_remoteproc.c +++ b/drivers/remoteproc/ti_k3_dsp_remoteproc.c @@ -115,10 +115,6 @@ static void k3_dsp_rproc_mbox_callback(struct mbox_client *client, void *data) const char *name = kproc->rproc->name; u32 msg = omap_mbox_message(data); - /* Do not forward messages from a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - dev_dbg(dev, "mbox msg: 0x%x\n", msg); switch (msg) { @@ -159,10 +155,6 @@ static void k3_dsp_rproc_kick(struct rproc *rproc, int vqid) mbox_msg_t msg = (mbox_msg_t)vqid; int ret; - /* Do not forward messages to a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - /* send the index of the triggered virtqueue in the mailbox payload */ ret = mbox_send_message(kproc->mbox, (void *)msg); if (ret < 0) diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c index dbc513c5569cbf..ba082ca13e7508 100644 --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c @@ -194,10 +194,6 @@ static void k3_r5_rproc_mbox_callback(struct mbox_client *client, void *data) const char *name = kproc->rproc->name; u32 msg = omap_mbox_message(data); - /* Do not forward message from a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - dev_dbg(dev, "mbox msg: 0x%x\n", msg); switch (msg) { @@ -233,10 +229,6 @@ static void k3_r5_rproc_kick(struct rproc *rproc, int vqid) mbox_msg_t msg = (mbox_msg_t)vqid; int ret; - /* Do not forward message to a detached core */ - if (kproc->rproc->state == RPROC_DETACHED) - return; - /* send the index of the triggered virtqueue in the mailbox payload */ ret = mbox_send_message(kproc->mbox, (void *)msg); if (ret < 0) @@ -448,13 +440,36 @@ static int k3_r5_rproc_prepare(struct rproc *rproc) { struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; - struct k3_r5_core *core = kproc->core; + struct k3_r5_core *core = kproc->core, *core0, *core1; struct device *dev = kproc->dev; u32 ctrl = 0, cfg = 0, stat = 0; u64 boot_vec = 0; bool mem_init_dis; int ret; + /* + * R5 cores require to be powered on sequentially, core0 should be in + * higher power state than core1 in a cluster. So, wait for core0 to + * power up before proceeding to core1 and put timeout of 2sec. This + * waiting mechanism is necessary because rproc_auto_boot_callback() for + * core1 can be called before core0 due to thread execution order. + * + * By placing the wait mechanism here in .prepare() ops, this condition + * is enforced for rproc boot requests from sysfs as well. + */ + core0 = list_first_entry(&cluster->cores, struct k3_r5_core, elem); + core1 = list_last_entry(&cluster->cores, struct k3_r5_core, elem); + if (cluster->mode == CLUSTER_MODE_SPLIT && core == core1 && + !core0->released_from_reset) { + ret = wait_event_interruptible_timeout(cluster->core_transition, + core0->released_from_reset, + msecs_to_jiffies(2000)); + if (ret <= 0) { + dev_err(dev, "can not power up core1 before core0"); + return -EPERM; + } + } + ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl, &stat); if (ret < 0) return ret; @@ -470,6 +485,14 @@ static int k3_r5_rproc_prepare(struct rproc *rproc) return ret; } + /* + * Notify all threads in the wait queue when core0 state has changed so + * that threads waiting for this condition can be executed. + */ + core->released_from_reset = true; + if (core == core0) + wake_up_interruptible(&cluster->core_transition); + /* * Newer IP revisions like on J7200 SoCs support h/w auto-initialization * of TCMs, so there is no need to perform the s/w memzero. This bit is @@ -515,10 +538,30 @@ static int k3_r5_rproc_unprepare(struct rproc *rproc) { struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; - struct k3_r5_core *core = kproc->core; + struct k3_r5_core *core = kproc->core, *core0, *core1; struct device *dev = kproc->dev; int ret; + /* + * Ensure power-down of cores is sequential in split mode. Core1 must + * power down before Core0 to maintain the expected state. By placing + * the wait mechanism here in .unprepare() ops, this condition is + * enforced for rproc stop or shutdown requests from sysfs and device + * removal as well. + */ + core0 = list_first_entry(&cluster->cores, struct k3_r5_core, elem); + core1 = list_last_entry(&cluster->cores, struct k3_r5_core, elem); + if (cluster->mode == CLUSTER_MODE_SPLIT && core == core0 && + core1->released_from_reset) { + ret = wait_event_interruptible_timeout(cluster->core_transition, + !core1->released_from_reset, + msecs_to_jiffies(2000)); + if (ret <= 0) { + dev_err(dev, "can not power down core0 before core1"); + return -EPERM; + } + } + /* Re-use LockStep-mode reset logic for Single-CPU mode */ ret = (cluster->mode == CLUSTER_MODE_LOCKSTEP || cluster->mode == CLUSTER_MODE_SINGLECPU) ? @@ -526,6 +569,14 @@ static int k3_r5_rproc_unprepare(struct rproc *rproc) if (ret) dev_err(dev, "unable to disable cores, ret = %d\n", ret); + /* + * Notify all threads in the wait queue when core1 state has changed so + * that threads waiting for this condition can be executed. + */ + core->released_from_reset = false; + if (core == core1) + wake_up_interruptible(&cluster->core_transition); + return ret; } @@ -551,7 +602,7 @@ static int k3_r5_rproc_start(struct rproc *rproc) struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; struct device *dev = kproc->dev; - struct k3_r5_core *core0, *core; + struct k3_r5_core *core; u32 boot_addr; int ret; @@ -573,21 +624,9 @@ static int k3_r5_rproc_start(struct rproc *rproc) goto unroll_core_run; } } else { - /* do not allow core 1 to start before core 0 */ - core0 = list_first_entry(&cluster->cores, struct k3_r5_core, - elem); - if (core != core0 && core0->rproc->state == RPROC_OFFLINE) { - dev_err(dev, "%s: can not start core 1 before core 0\n", - __func__); - return -EPERM; - } - ret = k3_r5_core_run(core); if (ret) return ret; - - core->released_from_reset = true; - wake_up_interruptible(&cluster->core_transition); } return 0; @@ -628,8 +667,7 @@ static int k3_r5_rproc_stop(struct rproc *rproc) { struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; - struct device *dev = kproc->dev; - struct k3_r5_core *core1, *core = kproc->core; + struct k3_r5_core *core = kproc->core; int ret; /* halt all applicable cores */ @@ -642,16 +680,6 @@ static int k3_r5_rproc_stop(struct rproc *rproc) } } } else { - /* do not allow core 0 to stop before core 1 */ - core1 = list_last_entry(&cluster->cores, struct k3_r5_core, - elem); - if (core != core1 && core1->rproc->state != RPROC_OFFLINE) { - dev_err(dev, "%s: can not stop core 0 before core 1\n", - __func__); - ret = -EPERM; - goto out; - } - ret = k3_r5_core_halt(core); if (ret) goto out; @@ -1279,26 +1307,6 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev) cluster->mode == CLUSTER_MODE_SINGLECPU || cluster->mode == CLUSTER_MODE_SINGLECORE) break; - - /* - * R5 cores require to be powered on sequentially, core0 - * should be in higher power state than core1 in a cluster - * So, wait for current core to power up before proceeding - * to next core and put timeout of 2sec for each core. - * - * This waiting mechanism is necessary because - * rproc_auto_boot_callback() for core1 can be called before - * core0 due to thread execution order. - */ - ret = wait_event_interruptible_timeout(cluster->core_transition, - core->released_from_reset, - msecs_to_jiffies(2000)); - if (ret <= 0) { - dev_err(dev, - "Timed out waiting for %s core to power up!\n", - rproc->name); - goto out; - } } return 0; diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 40d386809d6b78..bb161def317533 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -746,7 +746,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data, __le32 hdr[5] = { cpu_to_le32(len), }; int tlen = sizeof(hdr) + len; unsigned long flags; - int ret; + int ret = 0; /* Word aligned channels only accept word size aligned data */ if (channel->info_word && len % 4) diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index b88cd4fb295bce..b1a2be1f9e3b93 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -326,7 +326,7 @@ static void rtc_device_get_offset(struct rtc_device *rtc) * * Otherwise the offset seconds should be 0. */ - if (rtc->start_secs > rtc->range_max || + if ((rtc->start_secs >= 0 && rtc->start_secs > rtc->range_max) || rtc->start_secs + range_secs - 1 < rtc->range_min) rtc->offset_secs = rtc->start_secs - rtc->range_min; else if (rtc->start_secs > rtc->range_min) diff --git a/drivers/rtc/lib.c b/drivers/rtc/lib.c index fe361652727a3f..13b5b1f2046510 100644 --- a/drivers/rtc/lib.c +++ b/drivers/rtc/lib.c @@ -46,24 +46,38 @@ EXPORT_SYMBOL(rtc_year_days); * rtc_time64_to_tm - converts time64_t to rtc_time. * * @time: The number of seconds since 01-01-1970 00:00:00. - * (Must be positive.) + * Works for values since at least 1900 * @tm: Pointer to the struct rtc_time. */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { - unsigned int secs; - int days; + int days, secs; u64 u64tmp; u32 u32tmp, udays, century, day_of_century, year_of_century, year, day_of_year, month, day; bool is_Jan_or_Feb, is_leap_year; - /* time must be positive */ + /* + * Get days and seconds while preserving the sign to + * handle negative time values (dates before 1970-01-01) + */ days = div_s64_rem(time, 86400, &secs); + /* + * We need 0 <= secs < 86400 which isn't given for negative + * values of time. Fixup accordingly. + */ + if (secs < 0) { + days -= 1; + secs += 86400; + } + /* day of the week, 1970-01-01 was a Thursday */ tm->tm_wday = (days + 4) % 7; + /* Ensure tm_wday is always positive */ + if (tm->tm_wday < 0) + tm->tm_wday += 7; /* * The following algorithm is, basically, Proposition 6.3 of Neri @@ -93,7 +107,7 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) * thus, is slightly different from [1]. */ - udays = ((u32) days) + 719468; + udays = days + 719468; u32tmp = 4 * udays + 3; century = u32tmp / 146097; diff --git a/drivers/rtc/rtc-loongson.c b/drivers/rtc/rtc-loongson.c index 97e5625c064ceb..2ca7ffd5d7a92a 100644 --- a/drivers/rtc/rtc-loongson.c +++ b/drivers/rtc/rtc-loongson.c @@ -129,6 +129,14 @@ static u32 loongson_rtc_handler(void *id) { struct loongson_rtc_priv *priv = (struct loongson_rtc_priv *)id; + rtc_update_irq(priv->rtcdev, 1, RTC_AF | RTC_IRQF); + + /* + * The TOY_MATCH0_REG should be cleared 0 here, + * otherwise the interrupt cannot be cleared. + */ + regmap_write(priv->regmap, TOY_MATCH0_REG, 0); + spin_lock(&priv->lock); /* Disable RTC alarm wakeup and interrupt */ writel(readl(priv->pm_base + PM1_EN_REG) & ~RTC_EN, diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c index 9ea40f40188f3e..3409f576422485 100644 --- a/drivers/rtc/rtc-sh.c +++ b/drivers/rtc/rtc-sh.c @@ -485,9 +485,15 @@ static int __init sh_rtc_probe(struct platform_device *pdev) return -ENOENT; } - rtc->periodic_irq = ret; - rtc->carry_irq = platform_get_irq(pdev, 1); - rtc->alarm_irq = platform_get_irq(pdev, 2); + if (!pdev->dev.of_node) { + rtc->periodic_irq = ret; + rtc->carry_irq = platform_get_irq(pdev, 1); + rtc->alarm_irq = platform_get_irq(pdev, 2); + } else { + rtc->alarm_irq = ret; + rtc->periodic_irq = platform_get_irq(pdev, 1); + rtc->carry_irq = platform_get_irq(pdev, 2); + } res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (!res) diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 4bfe469c04aaba..8c1c908d2c6e72 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -5,7 +5,7 @@ comment "S/390 block device drivers" config DCSSBLK def_tristate m prompt "DCSSBLK support" - depends on S390 && BLOCK + depends on S390 && BLOCK && (DAX || DAX=n) help Support for dcss block device @@ -14,7 +14,6 @@ config DCSSBLK_DAX depends on DCSSBLK # requires S390 ZONE_DEVICE support depends on BROKEN - select DAX prompt "DCSSBLK DAX support" help Enable DAX operation for the dcss block device diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 21fa7ac849e5c3..4904b831c0a75f 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -302,11 +302,17 @@ static struct airq_info *new_airq_info(int index) static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs, u64 *first, void **airq_info) { - int i, j; + int i, j, queue_idx, highest_queue_idx = -1; struct airq_info *info; unsigned long *indicator_addr = NULL; unsigned long bit, flags; + /* Array entries without an actual queue pointer must be ignored. */ + for (i = 0; i < nvqs; i++) { + if (vqs[i]) + highest_queue_idx++; + } + for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) { mutex_lock(&airq_areas_lock); if (!airq_areas[i]) @@ -316,7 +322,7 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs, if (!info) return NULL; write_lock_irqsave(&info->lock, flags); - bit = airq_iv_alloc(info->aiv, nvqs); + bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1); if (bit == -1UL) { /* Not enough vacancies. */ write_unlock_irqrestore(&info->lock, flags); @@ -325,8 +331,10 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs, *first = bit; *airq_info = info; indicator_addr = info->aiv->vector; - for (j = 0; j < nvqs; j++) { - airq_iv_set_ptr(info->aiv, bit + j, + for (j = 0, queue_idx = 0; j < nvqs; j++) { + if (!vqs[j]) + continue; + airq_iv_set_ptr(info->aiv, bit + queue_idx++, (unsigned long)vqs[j]); } write_unlock_irqrestore(&info->lock, flags); diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 5a3c670aec27d6..831ffcf7c7308a 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1521,4 +1521,6 @@ endif # SCSI_LOWLEVEL source "drivers/scsi/device_handler/Kconfig" +source "drivers/scsi/vhba/Kconfig" + endmenu diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 16de3e41f94c40..4e88f6e3e67bac 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -152,6 +152,7 @@ obj-$(CONFIG_CHR_DEV_SCH) += ch.o obj-$(CONFIG_SCSI_ENCLOSURE) += ses.o obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/ +obj-$(CONFIG_VHBA) += vhba/ # This goes last, so that "real" scsi devices probe earlier obj-$(CONFIG_SCSI_DEBUG) += scsi_debug.o diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 5cb1d3db4907a0..d3981b6779316b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -935,8 +935,28 @@ static void hisi_sas_phyup_work_common(struct work_struct *work, container_of(work, typeof(*phy), works[event]); struct hisi_hba *hisi_hba = phy->hisi_hba; struct asd_sas_phy *sas_phy = &phy->sas_phy; + struct asd_sas_port *sas_port = sas_phy->port; + struct hisi_sas_port *port = phy->port; + struct device *dev = hisi_hba->dev; + struct domain_device *port_dev; int phy_no = sas_phy->id; + if (!test_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags) && + sas_port && port && (port->id != phy->port_id)) { + dev_info(dev, "phy%d's hw port id changed from %d to %llu\n", + phy_no, port->id, phy->port_id); + port_dev = sas_port->port_dev; + if (port_dev && !dev_is_expander(port_dev->dev_type)) { + /* + * Set the device state to gone to block + * sending IO to the device. + */ + set_bit(SAS_DEV_GONE, &port_dev->state); + hisi_sas_notify_phy_event(phy, HISI_PHYE_LINK_RESET); + return; + } + } + phy->wait_phyup_cnt = 0; if (phy->identify.target_port_protocols == SAS_PROTOCOL_SSP) hisi_hba->hw->sl_notify_ssp(hisi_hba, phy_no); @@ -1865,33 +1885,14 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device) } hisi_sas_dereg_device(hisi_hba, device); - rc = hisi_sas_debug_I_T_nexus_reset(device); - if (rc == TMF_RESP_FUNC_COMPLETE && dev_is_sata(device)) { - struct sas_phy *local_phy; - + if (dev_is_sata(device)) { rc = hisi_sas_softreset_ata_disk(device); - switch (rc) { - case -ECOMM: - rc = -ENODEV; - break; - case TMF_RESP_FUNC_FAILED: - case -EMSGSIZE: - case -EIO: - local_phy = sas_get_local_phy(device); - rc = sas_phy_enable(local_phy, 0); - if (!rc) { - local_phy->enabled = 0; - dev_err(dev, "Disabled local phy of ATA disk %016llx due to softreset fail (%d)\n", - SAS_ADDR(device->sas_addr), rc); - rc = -ENODEV; - } - sas_put_local_phy(local_phy); - break; - default: - break; - } + if (rc == TMF_RESP_FUNC_FAILED) + dev_err(dev, "ata disk %016llx reset (%d)\n", + SAS_ADDR(device->sas_addr), rc); } + rc = hisi_sas_debug_I_T_nexus_reset(device); if ((rc == TMF_RESP_FUNC_COMPLETE) || (rc == -ENODEV)) hisi_sas_release_task(hisi_hba, device); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index a1fc400ab4c3c7..1e9830940f84e9 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2501,6 +2501,7 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_port *port = to_hisi_sas_port(sas_port); struct sas_ata_task *ata_task = &task->ata_task; struct sas_tmf_task *tmf = slot->tmf; + int phy_id; u8 *buf_cmd; int has_data = 0, hdr_tag = 0; u32 dw0, dw1 = 0, dw2 = 0; @@ -2508,10 +2509,14 @@ static void prep_ata_v2_hw(struct hisi_hba *hisi_hba, /* create header */ /* dw0 */ dw0 = port->id << CMD_HDR_PORT_OFF; - if (parent_dev && dev_is_expander(parent_dev->dev_type)) + if (parent_dev && dev_is_expander(parent_dev->dev_type)) { dw0 |= 3 << CMD_HDR_CMD_OFF; - else + } else { + phy_id = device->phy->identify.phy_identifier; + dw0 |= (1U << phy_id) << CMD_HDR_PHY_ID_OFF; + dw0 |= CMD_HDR_FORCE_PHY_MSK; dw0 |= 4 << CMD_HDR_CMD_OFF; + } if (tmf && ata_task->force_phy) { dw0 |= CMD_HDR_FORCE_PHY_MSK; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 2684d64820671d..08dac9ae2f109a 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -359,6 +359,10 @@ #define CMD_HDR_RESP_REPORT_MSK (0x1 << CMD_HDR_RESP_REPORT_OFF) #define CMD_HDR_TLR_CTRL_OFF 6 #define CMD_HDR_TLR_CTRL_MSK (0x3 << CMD_HDR_TLR_CTRL_OFF) +#define CMD_HDR_PHY_ID_OFF 8 +#define CMD_HDR_PHY_ID_MSK (0x1ff << CMD_HDR_PHY_ID_OFF) +#define CMD_HDR_FORCE_PHY_OFF 17 +#define CMD_HDR_FORCE_PHY_MSK (0x1U << CMD_HDR_FORCE_PHY_OFF) #define CMD_HDR_PORT_OFF 18 #define CMD_HDR_PORT_MSK (0xf << CMD_HDR_PORT_OFF) #define CMD_HDR_PRIORITY_OFF 27 @@ -1429,15 +1433,21 @@ static void prep_ata_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_cmd_hdr *hdr = slot->cmd_hdr; struct asd_sas_port *sas_port = device->port; struct hisi_sas_port *port = to_hisi_sas_port(sas_port); + int phy_id; u8 *buf_cmd; int has_data = 0, hdr_tag = 0; u32 dw1 = 0, dw2 = 0; hdr->dw0 = cpu_to_le32(port->id << CMD_HDR_PORT_OFF); - if (parent_dev && dev_is_expander(parent_dev->dev_type)) + if (parent_dev && dev_is_expander(parent_dev->dev_type)) { hdr->dw0 |= cpu_to_le32(3 << CMD_HDR_CMD_OFF); - else + } else { + phy_id = device->phy->identify.phy_identifier; + hdr->dw0 |= cpu_to_le32((1U << phy_id) + << CMD_HDR_PHY_ID_OFF); + hdr->dw0 |= CMD_HDR_FORCE_PHY_MSK; hdr->dw0 |= cpu_to_le32(4U << CMD_HDR_CMD_OFF); + } switch (task->data_dir) { case DMA_TO_DEVICE: diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 179be6c5a43e07..c2ec4db6728697 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -161,7 +161,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) struct lpfc_hba *phba; struct lpfc_work_evt *evtp; unsigned long iflags; - bool nvme_reg = false; + bool drop_initial_node_ref = false; ndlp = ((struct lpfc_rport_data *)rport->dd_data)->pnode; if (!ndlp) @@ -188,8 +188,13 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) spin_lock_irqsave(&ndlp->lock, iflags); ndlp->rport = NULL; - if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) - nvme_reg = true; + /* Only 1 thread can drop the initial node reference. + * If not registered for NVME and NLP_DROPPED flag is + * clear, remove the initial reference. + */ + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) + if (!test_and_set_bit(NLP_DROPPED, &ndlp->nlp_flag)) + drop_initial_node_ref = true; /* The scsi_transport is done with the rport so lpfc cannot * call to unregister. @@ -200,13 +205,16 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) /* If NLP_XPT_REGD was cleared in lpfc_nlp_unreg_node, * unregister calls were made to the scsi and nvme * transports and refcnt was already decremented. Clear - * the NLP_XPT_REGD flag only if the NVME Rport is + * the NLP_XPT_REGD flag only if the NVME nrport is * confirmed unregistered. */ - if (!nvme_reg && ndlp->fc4_xpt_flags & NLP_XPT_REGD) { - ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD; + if (ndlp->fc4_xpt_flags & NLP_XPT_REGD) { + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) + ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD; spin_unlock_irqrestore(&ndlp->lock, iflags); - lpfc_nlp_put(ndlp); /* may free ndlp */ + + /* Release scsi transport reference */ + lpfc_nlp_put(ndlp); } else { spin_unlock_irqrestore(&ndlp->lock, iflags); } @@ -214,14 +222,8 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) spin_unlock_irqrestore(&ndlp->lock, iflags); } - /* Only 1 thread can drop the initial node reference. If - * another thread has set NLP_DROPPED, this thread is done. - */ - if (nvme_reg || test_bit(NLP_DROPPED, &ndlp->nlp_flag)) - return; - - set_bit(NLP_DROPPED, &ndlp->nlp_flag); - lpfc_nlp_put(ndlp); + if (drop_initial_node_ref) + lpfc_nlp_put(ndlp); return; } diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 088cc40ae866a0..8ee2bfe475715a 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -23,8 +23,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.727.03.00-rc1" -#define MEGASAS_RELDATE "Oct 03, 2023" +#define MEGASAS_VERSION "07.734.00.00-rc1" +#define MEGASAS_RELDATE "Apr 03, 2025" #define MEGASAS_MSIX_NAME_LEN 32 diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index c20447b39cb906..5e33d411fa3d8d 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2103,6 +2103,9 @@ static int megasas_sdev_configure(struct scsi_device *sdev, /* This sdev property may change post OCR */ megasas_set_dynamic_target_properties(sdev, lim, is_target_prop); + if (!MEGASAS_IS_LOGICAL(sdev)) + sdev->no_vpd_size = 1; + mutex_unlock(&instance->reset_mutex); return 0; @@ -3662,8 +3665,10 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd, case MFI_STAT_SCSI_IO_FAILED: case MFI_STAT_LD_INIT_IN_PROGRESS: - cmd->scmd->result = - (DID_ERROR << 16) | hdr->scsi_status; + if (hdr->scsi_status == 0xf0) + cmd->scmd->result = (DID_ERROR << 16) | SAM_STAT_CHECK_CONDITION; + else + cmd->scmd->result = (DID_ERROR << 16) | hdr->scsi_status; break; case MFI_STAT_SCSI_DONE_WITH_ERROR: diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 721860cb1ef6ee..a6794f49e9faef 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2043,7 +2043,10 @@ map_cmd_status(struct fusion_context *fusion, case MFI_STAT_SCSI_IO_FAILED: case MFI_STAT_LD_INIT_IN_PROGRESS: - scmd->result = (DID_ERROR << 16) | ext_status; + if (ext_status == 0xf0) + scmd->result = (DID_ERROR << 16) | SAM_STAT_CHECK_CONDITION; + else + scmd->result = (DID_ERROR << 16) | ext_status; break; case MFI_STAT_SCSI_DONE_WITH_ERROR: diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 3fcb1ad3b070d5..1d7901a8f0e406 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -174,6 +174,9 @@ static void mpi3mr_print_event_data(struct mpi3mr_ioc *mrioc, char *desc = NULL; u16 event; + if (!(mrioc->logging_level & MPI3_DEBUG_EVENT)) + return; + event = event_reply->event; switch (event) { @@ -451,6 +454,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) return 0; } + atomic_set(&mrioc->admin_pend_isr, 0); reply_desc = (struct mpi3_default_reply_descriptor *)mrioc->admin_reply_base + admin_reply_ci; @@ -565,7 +569,7 @@ int mpi3mr_process_op_reply_q(struct mpi3mr_ioc *mrioc, WRITE_ONCE(op_req_q->ci, le16_to_cpu(reply_desc->request_queue_ci)); mpi3mr_process_op_reply_desc(mrioc, reply_desc, &reply_dma, reply_qidx); - atomic_dec(&op_reply_q->pend_ios); + if (reply_dma) mpi3mr_repost_reply_buf(mrioc, reply_dma); num_op_reply++; @@ -2925,6 +2929,7 @@ static int mpi3mr_setup_admin_qpair(struct mpi3mr_ioc *mrioc) mrioc->admin_reply_ci = 0; mrioc->admin_reply_ephase = 1; atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (!mrioc->admin_req_base) { mrioc->admin_req_base = dma_alloc_coherent(&mrioc->pdev->dev, @@ -4653,6 +4658,7 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) if (mrioc->admin_reply_base) memset(mrioc->admin_reply_base, 0, mrioc->admin_reply_q_sz); atomic_set(&mrioc->admin_reply_q_in_use, 0); + atomic_set(&mrioc->admin_pend_isr, 0); if (mrioc->init_cmds.reply) { memset(mrioc->init_cmds.reply, 0, sizeof(*mrioc->init_cmds.reply)); diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 063b10dd82514e..02fc204b9bf7b2 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -2869,8 +2869,9 @@ _ctl_get_mpt_mctp_passthru_adapter(int dev_index) if (ioc->facts.IOCCapabilities & MPI26_IOCFACTS_CAPABILITY_MCTP_PASSTHRU) { if (count == dev_index) { spin_unlock(&gioc_lock); - return 0; + return ioc; } + count++; } } spin_unlock(&gioc_lock); diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index dc4bd422b60191..486db5b2f05d2d 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -891,7 +891,7 @@ static bool myrb_enable_mmio(struct myrb_hba *cb, mbox_mmio_init_t mmio_init_fn) status = mmio_init_fn(pdev, base, &mbox); if (status != MYRB_STATUS_SUCCESS) { dev_err(&pdev->dev, - "Failed to enable mailbox, statux %02X\n", + "Failed to enable mailbox, status %02X\n", status); return false; } diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 183ce00aa671ea..f7067878b34f3b 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -766,6 +766,7 @@ static void pm8001_dev_gone_notify(struct domain_device *dev) spin_lock_irqsave(&pm8001_ha->lock, flags); } PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id); + pm8001_ha->phy[pm8001_dev->attached_phy].phy_attached = 0; pm8001_free_dev(pm8001_dev); } else { pm8001_dbg(pm8001_ha, DISC, "Found dev has gone.\n"); diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 436bd29d5ebae6..6b1ebab36fa35b 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -699,7 +699,7 @@ static u32 qedf_get_login_failures(void *cookie) } static struct qed_fcoe_cb_ops qedf_cb_ops = { - { + .common = { .link_update = qedf_link_update, .bw_update = qedf_bw_update, .schedule_recovery_handler = qedf_schedule_recovery_handler, diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 53daf923ad8ef3..518a252eb6aa05 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -707,26 +707,23 @@ void scsi_cdl_check(struct scsi_device *sdev) */ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) { - struct scsi_mode_data data; - struct scsi_sense_hdr sshdr; - struct scsi_vpd *vpd; - bool is_ata = false; char buf[64]; + bool is_ata; int ret; if (!sdev->cdl_supported) return -EOPNOTSUPP; rcu_read_lock(); - vpd = rcu_dereference(sdev->vpd_pg89); - if (vpd) - is_ata = true; + is_ata = rcu_dereference(sdev->vpd_pg89); rcu_read_unlock(); /* * For ATA devices, CDL needs to be enabled with a SET FEATURES command. */ if (is_ata) { + struct scsi_mode_data data; + struct scsi_sense_hdr sshdr; char *buf_data; int len; @@ -735,16 +732,30 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) if (ret) return -EINVAL; - /* Enable CDL using the ATA feature page */ + /* Enable or disable CDL using the ATA feature page */ len = min_t(size_t, sizeof(buf), data.length - data.header_length - data.block_descriptor_length); buf_data = buf + data.header_length + data.block_descriptor_length; - if (enable) - buf_data[4] = 0x02; - else - buf_data[4] = 0; + + /* + * If we want to enable CDL and CDL is already enabled on the + * device, do nothing. This avoids needlessly resetting the CDL + * statistics on the device as that is implied by the CDL enable + * action. Similar to this, there is no need to do anything if + * we want to disable CDL and CDL is already disabled. + */ + if (enable) { + if ((buf_data[4] & 0x03) == 0x02) + goto out; + buf_data[4] &= ~0x03; + buf_data[4] |= 0x02; + } else { + if ((buf_data[4] & 0x03) == 0x00) + goto out; + buf_data[4] &= ~0x03; + } ret = scsi_mode_select(sdev, 1, 0, buf_data, len, 5 * HZ, 3, &data, &sshdr); @@ -756,6 +767,7 @@ int scsi_cdl_enable(struct scsi_device *sdev, bool enable) } } +out: sdev->cdl_enable = enable; return 0; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0d29470e86b0b8..1b43013d72c03b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1253,8 +1253,12 @@ EXPORT_SYMBOL_GPL(scsi_alloc_request); */ static void scsi_cleanup_rq(struct request *rq) { + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); + + cmd->flags = 0; + if (rq->rq_flags & RQF_DONTPREP) { - scsi_mq_uninit_cmd(blk_mq_rq_to_pdu(rq)); + scsi_mq_uninit_cmd(cmd); rq->rq_flags &= ~RQF_DONTPREP; } } diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 9c347c64c315f8..c75a806496d674 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3182,11 +3182,14 @@ iscsi_set_host_param(struct iscsi_transport *transport, } /* see similar check in iscsi_if_set_param() */ - if (strlen(data) > ev->u.set_host_param.len) - return -EINVAL; + if (strlen(data) > ev->u.set_host_param.len) { + err = -EINVAL; + goto out; + } err = transport->set_host_param(shost, ev->u.set_host_param.param, data, ev->u.set_host_param.len); +out: scsi_host_put(shost); return err; } @@ -3496,7 +3499,7 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.new_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_new_fnode; } index = transport->new_flashnode(shost, data, len); @@ -3506,7 +3509,6 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, else err = -EIO; -put_host: scsi_host_put(shost); exit_new_fnode: @@ -3531,7 +3533,7 @@ static int iscsi_del_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.del_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_del_fnode; } idx = ev->u.del_flashnode.flashnode_idx; @@ -3573,7 +3575,7 @@ static int iscsi_login_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.login_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_login_fnode; } idx = ev->u.login_flashnode.flashnode_idx; @@ -3625,7 +3627,7 @@ static int iscsi_logout_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_fnode; } idx = ev->u.logout_flashnode.flashnode_idx; @@ -3675,7 +3677,7 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_sid; } session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 64f6b22e8cc0c9..aeb58a9e6b7f1d 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -388,7 +388,7 @@ static void srp_reconnect_work(struct work_struct *work) "reconnect attempt %d failed (%d)\n", ++rport->failed_reconnects, res); delay = rport->reconnect_delay * - min(100, max(1, rport->failed_reconnects - 10)); + clamp(rport->failed_reconnects - 10, 1, 100); if (delay > 0) queue_delayed_work(system_long_wq, &rport->reconnect_work, delay * HZ); diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 7a447ff600d276..a8db66428f80d4 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -169,6 +169,7 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, unsigned int nr_zones, size_t *buflen) { struct request_queue *q = sdkp->disk->queue; + unsigned int max_segments; size_t bufsize; void *buf; @@ -180,12 +181,15 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, * Furthermore, since the report zone command cannot be split, make * sure that the allocated buffer can always be mapped by limiting the * number of pages allocated to the HBA max segments limit. + * Since max segments can be larger than the max inline bio vectors, + * further limit the allocated buffer to BIO_MAX_INLINE_VECS. */ nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); bufsize = min_t(size_t, bufsize, queue_max_hw_sectors(q) << SECTOR_SHIFT); - bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); + max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q)); + bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT); while (bufsize >= SECTOR_SIZE) { buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 88135fdb8bd14b..6c9dec7e3128fd 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -5246,7 +5247,7 @@ static void pqi_calculate_io_resources(struct pqi_ctrl_info *ctrl_info) ctrl_info->error_buffer_length = ctrl_info->max_io_slots * PQI_ERROR_BUFFER_ELEMENT_LENGTH; - if (reset_devices) + if (is_kdump_kernel()) max_transfer_size = min(ctrl_info->max_transfer_size, PQI_MAX_TRANSFER_SIZE_KDUMP); else @@ -5275,7 +5276,7 @@ static void pqi_calculate_queue_resources(struct pqi_ctrl_info *ctrl_info) u16 num_elements_per_iq; u16 num_elements_per_oq; - if (reset_devices) { + if (is_kdump_kernel()) { num_queue_groups = 1; } else { int num_cpus; @@ -5989,7 +5990,7 @@ static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info, pqi_stream_data->next_lba = rmd.first_block + rmd.block_cnt; pqi_stream_data->last_accessed = jiffies; - per_cpu_ptr(device->raid_io_stats, smp_processor_id())->write_stream_cnt++; + per_cpu_ptr(device->raid_io_stats, raw_smp_processor_id())->write_stream_cnt++; return true; } @@ -6068,7 +6069,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device, scmd, queue_group); if (rc == 0 || rc == SCSI_MLQUEUE_HOST_BUSY) { raid_bypassed = true; - per_cpu_ptr(device->raid_io_stats, smp_processor_id())->raid_bypass_cnt++; + per_cpu_ptr(device->raid_io_stats, raw_smp_processor_id())->raid_bypass_cnt++; } } if (!raid_bypassed) @@ -8288,12 +8289,12 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) u32 product_id; if (reset_devices) { - if (pqi_is_fw_triage_supported(ctrl_info)) { + if (is_kdump_kernel() && pqi_is_fw_triage_supported(ctrl_info)) { rc = sis_wait_for_fw_triage_completion(ctrl_info); if (rc) return rc; } - if (sis_is_ctrl_logging_supported(ctrl_info)) { + if (is_kdump_kernel() && sis_is_ctrl_logging_supported(ctrl_info)) { sis_notify_kdump(ctrl_info); rc = sis_wait_for_ctrl_logging_completion(ctrl_info); if (rc) @@ -8344,7 +8345,7 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) ctrl_info->product_id = (u8)product_id; ctrl_info->product_revision = (u8)(product_id >> 8); - if (reset_devices) { + if (is_kdump_kernel()) { if (ctrl_info->max_outstanding_requests > PQI_MAX_OUTSTANDING_REQUESTS_KDUMP) ctrl_info->max_outstanding_requests = @@ -8480,7 +8481,7 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) if (rc) return rc; - if (ctrl_info->ctrl_logging_supported && !reset_devices) { + if (ctrl_info->ctrl_logging_supported && !is_kdump_kernel()) { pqi_host_setup_buffer(ctrl_info, &ctrl_info->ctrl_log_memory, PQI_CTRL_LOG_TOTAL_SIZE, PQI_CTRL_LOG_MIN_SIZE); pqi_host_memory_update(ctrl_info, &ctrl_info->ctrl_log_memory, PQI_VENDOR_GENERAL_CTRL_LOG_MEMORY_UPDATE); } diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 35db061ae3ecde..2e6b2412d2c946 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1819,6 +1819,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) return SCSI_MLQUEUE_DEVICE_BUSY; } + payload->rangecount = 1; payload->range.len = length; payload->range.offset = offset_in_hvpg; diff --git a/drivers/scsi/vhba/Kconfig b/drivers/scsi/vhba/Kconfig new file mode 100644 index 00000000000000..e70a381fe3dff8 --- /dev/null +++ b/drivers/scsi/vhba/Kconfig @@ -0,0 +1,9 @@ +config VHBA + tristate "Virtual (SCSI) Host Bus Adapter" + depends on SCSI + help + This is the in-kernel part of CDEmu, a CD/DVD-ROM device + emulator. + + This driver can also be built as a module. If so, the module + will be called vhba. diff --git a/drivers/scsi/vhba/Makefile b/drivers/scsi/vhba/Makefile new file mode 100644 index 00000000000000..2d7524b661990a --- /dev/null +++ b/drivers/scsi/vhba/Makefile @@ -0,0 +1,4 @@ +VHBA_VERSION := 20240917 + +obj-$(CONFIG_VHBA) += vhba.o +ccflags-y := -DVHBA_VERSION=\"$(VHBA_VERSION)\" -Werror diff --git a/drivers/scsi/vhba/vhba.c b/drivers/scsi/vhba/vhba.c new file mode 100644 index 00000000000000..878a3be0ba2b2c --- /dev/null +++ b/drivers/scsi/vhba/vhba.c @@ -0,0 +1,1132 @@ +/* + * vhba.c + * + * Copyright (C) 2007-2012 Chia-I Wu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define pr_fmt(fmt) "vhba: " fmt + +#include + +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#ifdef CONFIG_COMPAT +#include +#endif +#include +#include +#include +#include +#include +#include + + +MODULE_AUTHOR("Chia-I Wu"); +MODULE_VERSION(VHBA_VERSION); +MODULE_DESCRIPTION("Virtual SCSI HBA"); +MODULE_LICENSE("GPL"); + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) +#define sdev_dbg(sdev, fmt, a...) \ + dev_dbg(&(sdev)->sdev_gendev, fmt, ##a) +#define scmd_dbg(scmd, fmt, a...) \ + dev_dbg(&(scmd)->device->sdev_gendev, fmt, ##a) +#endif + +#define VHBA_MAX_SECTORS_PER_IO 256 +#define VHBA_MAX_BUS 16 +#define VHBA_MAX_ID 16 +#define VHBA_MAX_DEVICES (VHBA_MAX_BUS * (VHBA_MAX_ID-1)) +#define VHBA_KBUF_SIZE PAGE_SIZE + +#define DATA_TO_DEVICE(dir) ((dir) == DMA_TO_DEVICE || (dir) == DMA_BIDIRECTIONAL) +#define DATA_FROM_DEVICE(dir) ((dir) == DMA_FROM_DEVICE || (dir) == DMA_BIDIRECTIONAL) + + +static int vhba_can_queue = 32; +module_param_named(can_queue, vhba_can_queue, int, 0); + + +enum vhba_req_state { + VHBA_REQ_FREE, + VHBA_REQ_PENDING, + VHBA_REQ_READING, + VHBA_REQ_SENT, + VHBA_REQ_WRITING, +}; + +struct vhba_command { + struct scsi_cmnd *cmd; + /* metatags are per-host. not to be confused with + queue tags that are usually per-lun */ + unsigned long metatag; + int status; + struct list_head entry; +}; + +struct vhba_device { + unsigned int num; + spinlock_t cmd_lock; + struct list_head cmd_list; + wait_queue_head_t cmd_wq; + atomic_t refcnt; + + unsigned char *kbuf; + size_t kbuf_size; +}; + +struct vhba_host { + struct Scsi_Host *shost; + spinlock_t cmd_lock; + int cmd_next; + struct vhba_command *commands; + spinlock_t dev_lock; + struct vhba_device *devices[VHBA_MAX_DEVICES]; + int num_devices; + DECLARE_BITMAP(chgmap, VHBA_MAX_DEVICES); + int chgtype[VHBA_MAX_DEVICES]; + struct work_struct scan_devices; +}; + +#define MAX_COMMAND_SIZE 16 + +struct vhba_request { + __u32 metatag; + __u32 lun; + __u8 cdb[MAX_COMMAND_SIZE]; + __u8 cdb_len; + __u32 data_len; +}; + +struct vhba_response { + __u32 metatag; + __u32 status; + __u32 data_len; +}; + + + +static struct vhba_command *vhba_alloc_command (void); +static void vhba_free_command (struct vhba_command *vcmd); + +static struct platform_device vhba_platform_device; + + + +/* These functions define a symmetric 1:1 mapping between device numbers and + the bus and id. We have reserved the last id per bus for the host itself. */ +static void devnum_to_bus_and_id(unsigned int devnum, unsigned int *bus, unsigned int *id) +{ + *bus = devnum / (VHBA_MAX_ID-1); + *id = devnum % (VHBA_MAX_ID-1); +} + +static unsigned int bus_and_id_to_devnum(unsigned int bus, unsigned int id) +{ + return (bus * (VHBA_MAX_ID-1)) + id; +} + +static struct vhba_device *vhba_device_alloc (void) +{ + struct vhba_device *vdev; + + vdev = kzalloc(sizeof(struct vhba_device), GFP_KERNEL); + if (!vdev) { + return NULL; + } + + spin_lock_init(&vdev->cmd_lock); + INIT_LIST_HEAD(&vdev->cmd_list); + init_waitqueue_head(&vdev->cmd_wq); + atomic_set(&vdev->refcnt, 1); + + vdev->kbuf = NULL; + vdev->kbuf_size = 0; + + return vdev; +} + +static void vhba_device_put (struct vhba_device *vdev) +{ + if (atomic_dec_and_test(&vdev->refcnt)) { + kfree(vdev); + } +} + +static struct vhba_device *vhba_device_get (struct vhba_device *vdev) +{ + atomic_inc(&vdev->refcnt); + + return vdev; +} + +static int vhba_device_queue (struct vhba_device *vdev, struct scsi_cmnd *cmd) +{ + struct vhba_host *vhost; + struct vhba_command *vcmd; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + vcmd = vhba_alloc_command(); + if (!vcmd) { + return SCSI_MLQUEUE_HOST_BUSY; + } + + vcmd->cmd = cmd; + + spin_lock_irqsave(&vdev->cmd_lock, flags); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) + vcmd->metatag = scsi_cmd_to_rq(vcmd->cmd)->tag; +#else + vcmd->metatag = vcmd->cmd->request->tag; +#endif + list_add_tail(&vcmd->entry, &vdev->cmd_list); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + wake_up_interruptible(&vdev->cmd_wq); + + return 0; +} + +static int vhba_device_dequeue (struct vhba_device *vdev, struct scsi_cmnd *cmd) +{ + struct vhba_command *vcmd; + int retval; + unsigned long flags; + + spin_lock_irqsave(&vdev->cmd_lock, flags); + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + if (vcmd->cmd == cmd) { + list_del_init(&vcmd->entry); + break; + } + } + + /* command not found */ + if (&vcmd->entry == &vdev->cmd_list) { + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + return SUCCESS; + } + + while (vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING) { + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + scmd_dbg(cmd, "wait for I/O before aborting\n"); + schedule_timeout(1); + spin_lock_irqsave(&vdev->cmd_lock, flags); + } + + retval = (vcmd->status == VHBA_REQ_SENT) ? FAILED : SUCCESS; + + vhba_free_command(vcmd); + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return retval; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) +static int vhba_slave_alloc(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + + sdev_dbg(sdev, "enabling tagging (queue depth: %i).\n", sdev->queue_depth); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) + if (!shost_use_blk_mq(shost) && shost->bqt) { +#else + if (shost->bqt) { +#endif + blk_queue_init_tags(sdev->request_queue, sdev->queue_depth, shost->bqt); + } + scsi_adjust_queue_depth(sdev, 0, sdev->queue_depth); + + return 0; +} +#endif + +static void vhba_scan_devices_add (struct vhba_host *vhost, int bus, int id) +{ + struct scsi_device *sdev; + + sdev = scsi_device_lookup(vhost->shost, bus, id, 0); + if (!sdev) { + scsi_add_device(vhost->shost, bus, id, 0); + } else { + dev_warn(&vhost->shost->shost_gendev, "tried to add an already-existing device %d:%d:0!\n", bus, id); + scsi_device_put(sdev); + } +} + +static void vhba_scan_devices_remove (struct vhba_host *vhost, int bus, int id) +{ + struct scsi_device *sdev; + + sdev = scsi_device_lookup(vhost->shost, bus, id, 0); + if (sdev) { + scsi_remove_device(sdev); + scsi_device_put(sdev); + } else { + dev_warn(&vhost->shost->shost_gendev, "tried to remove non-existing device %d:%d:0!\n", bus, id); + } +} + +static void vhba_scan_devices (struct work_struct *work) +{ + struct vhba_host *vhost = container_of(work, struct vhba_host, scan_devices); + unsigned long flags; + int change, exists; + unsigned int devnum; + unsigned int bus, id; + + for (;;) { + spin_lock_irqsave(&vhost->dev_lock, flags); + + devnum = find_first_bit(vhost->chgmap, VHBA_MAX_DEVICES); + if (devnum >= VHBA_MAX_DEVICES) { + spin_unlock_irqrestore(&vhost->dev_lock, flags); + break; + } + change = vhost->chgtype[devnum]; + exists = vhost->devices[devnum] != NULL; + + vhost->chgtype[devnum] = 0; + clear_bit(devnum, vhost->chgmap); + + spin_unlock_irqrestore(&vhost->dev_lock, flags); + + devnum_to_bus_and_id(devnum, &bus, &id); + + if (change < 0) { + dev_dbg(&vhost->shost->shost_gendev, "trying to remove target %d:%d:0\n", bus, id); + vhba_scan_devices_remove(vhost, bus, id); + } else if (change > 0) { + dev_dbg(&vhost->shost->shost_gendev, "trying to add target %d:%d:0\n", bus, id); + vhba_scan_devices_add(vhost, bus, id); + } else { + /* quick sequence of add/remove or remove/add; we determine + which one it was by checking if device structure exists */ + if (exists) { + /* remove followed by add: remove and (re)add */ + dev_dbg(&vhost->shost->shost_gendev, "trying to (re)add target %d:%d:0\n", bus, id); + vhba_scan_devices_remove(vhost, bus, id); + vhba_scan_devices_add(vhost, bus, id); + } else { + /* add followed by remove: no-op */ + dev_dbg(&vhost->shost->shost_gendev, "no-op for target %d:%d:0\n", bus, id); + } + } + } +} + +static int vhba_add_device (struct vhba_device *vdev) +{ + struct vhba_host *vhost; + unsigned int devnum; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + vhba_device_get(vdev); + + spin_lock_irqsave(&vhost->dev_lock, flags); + if (vhost->num_devices >= VHBA_MAX_DEVICES) { + spin_unlock_irqrestore(&vhost->dev_lock, flags); + vhba_device_put(vdev); + return -EBUSY; + } + + for (devnum = 0; devnum < VHBA_MAX_DEVICES; devnum++) { + if (vhost->devices[devnum] == NULL) { + vdev->num = devnum; + vhost->devices[devnum] = vdev; + vhost->num_devices++; + set_bit(devnum, vhost->chgmap); + vhost->chgtype[devnum]++; + break; + } + } + spin_unlock_irqrestore(&vhost->dev_lock, flags); + + schedule_work(&vhost->scan_devices); + + return 0; +} + +static int vhba_remove_device (struct vhba_device *vdev) +{ + struct vhba_host *vhost; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + spin_lock_irqsave(&vhost->dev_lock, flags); + set_bit(vdev->num, vhost->chgmap); + vhost->chgtype[vdev->num]--; + vhost->devices[vdev->num] = NULL; + vhost->num_devices--; + spin_unlock_irqrestore(&vhost->dev_lock, flags); + + vhba_device_put(vdev); + + schedule_work(&vhost->scan_devices); + + return 0; +} + +static struct vhba_device *vhba_lookup_device (int devnum) +{ + struct vhba_host *vhost; + struct vhba_device *vdev = NULL; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + if (likely(devnum < VHBA_MAX_DEVICES)) { + spin_lock_irqsave(&vhost->dev_lock, flags); + vdev = vhost->devices[devnum]; + if (vdev) { + vdev = vhba_device_get(vdev); + } + + spin_unlock_irqrestore(&vhost->dev_lock, flags); + } + + return vdev; +} + +static struct vhba_command *vhba_alloc_command (void) +{ + struct vhba_host *vhost; + struct vhba_command *vcmd; + unsigned long flags; + int i; + + vhost = platform_get_drvdata(&vhba_platform_device); + + spin_lock_irqsave(&vhost->cmd_lock, flags); + + vcmd = vhost->commands + vhost->cmd_next++; + if (vcmd->status != VHBA_REQ_FREE) { + for (i = 0; i < vhba_can_queue; i++) { + vcmd = vhost->commands + i; + + if (vcmd->status == VHBA_REQ_FREE) { + vhost->cmd_next = i + 1; + break; + } + } + + if (i == vhba_can_queue) { + vcmd = NULL; + } + } + + if (vcmd) { + vcmd->status = VHBA_REQ_PENDING; + } + + vhost->cmd_next %= vhba_can_queue; + + spin_unlock_irqrestore(&vhost->cmd_lock, flags); + + return vcmd; +} + +static void vhba_free_command (struct vhba_command *vcmd) +{ + struct vhba_host *vhost; + unsigned long flags; + + vhost = platform_get_drvdata(&vhba_platform_device); + + spin_lock_irqsave(&vhost->cmd_lock, flags); + vcmd->status = VHBA_REQ_FREE; + spin_unlock_irqrestore(&vhost->cmd_lock, flags); +} + +static int vhba_queuecommand (struct Scsi_Host *shost, struct scsi_cmnd *cmd) +{ + struct vhba_device *vdev; + int retval; + unsigned int devnum; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) + scmd_dbg(cmd, "queue %p tag %i\n", cmd, scsi_cmd_to_rq(cmd)->tag); +#else + scmd_dbg(cmd, "queue %p tag %i\n", cmd, cmd->request->tag); +#endif + + devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); + vdev = vhba_lookup_device(devnum); + if (!vdev) { + scmd_dbg(cmd, "no such device\n"); + + cmd->result = DID_NO_CONNECT << 16; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + scsi_done(cmd); +#else + cmd->scsi_done(cmd); +#endif + + return 0; + } + + retval = vhba_device_queue(vdev, cmd); + + vhba_device_put(vdev); + + return retval; +} + +static int vhba_abort (struct scsi_cmnd *cmd) +{ + struct vhba_device *vdev; + int retval = SUCCESS; + unsigned int devnum; + + scmd_dbg(cmd, "abort %p\n", cmd); + + devnum = bus_and_id_to_devnum(cmd->device->channel, cmd->device->id); + vdev = vhba_lookup_device(devnum); + if (vdev) { + retval = vhba_device_dequeue(vdev, cmd); + vhba_device_put(vdev); + } else { + cmd->result = DID_NO_CONNECT << 16; + } + + return retval; +} + +static struct scsi_host_template vhba_template = { + .module = THIS_MODULE, + .name = "vhba", + .proc_name = "vhba", + .queuecommand = vhba_queuecommand, + .eh_abort_handler = vhba_abort, + .this_id = -1, + .max_sectors = VHBA_MAX_SECTORS_PER_IO, + .sg_tablesize = 256, +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) + .slave_alloc = vhba_slave_alloc, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(6, 14, 0) + .tag_alloc_policy = BLK_TAG_ALLOC_RR, +#else + .tag_alloc_policy_rr = true, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + .use_blk_tags = 1, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0) + .max_segment_size = VHBA_KBUF_SIZE, +#endif +}; + +static ssize_t do_request (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, char __user *buf, size_t buf_len) +{ + struct vhba_request vreq; + ssize_t ret; + + scmd_dbg(cmd, "request %lu (%p), cdb 0x%x, bufflen %d, sg count %d\n", + metatag, cmd, cmd->cmnd[0], scsi_bufflen(cmd), scsi_sg_count(cmd)); + + ret = sizeof(vreq); + if (DATA_TO_DEVICE(cmd->sc_data_direction)) { + ret += scsi_bufflen(cmd); + } + + if (ret > buf_len) { + scmd_dbg(cmd, "buffer too small (%zd < %zd) for a request\n", buf_len, ret); + return -EIO; + } + + vreq.metatag = metatag; + vreq.lun = cmd->device->lun; + memcpy(vreq.cdb, cmd->cmnd, MAX_COMMAND_SIZE); + vreq.cdb_len = cmd->cmd_len; + vreq.data_len = scsi_bufflen(cmd); + + if (copy_to_user(buf, &vreq, sizeof(vreq))) { + return -EFAULT; + } + + if (DATA_TO_DEVICE(cmd->sc_data_direction) && vreq.data_len) { + buf += sizeof(vreq); + + if (scsi_sg_count(cmd)) { + unsigned char *kaddr, *uaddr; + struct scatterlist *sglist = scsi_sglist(cmd); + struct scatterlist *sg; + int i; + + uaddr = (unsigned char *) buf; + + for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { + size_t len = sg->length; + + if (len > vdev->kbuf_size) { + scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); + len = vdev->kbuf_size; + } + + kaddr = kmap_atomic(sg_page(sg)); + memcpy(vdev->kbuf, kaddr + sg->offset, len); + kunmap_atomic(kaddr); + + if (copy_to_user(uaddr, vdev->kbuf, len)) { + return -EFAULT; + } + uaddr += len; + } + } else { + if (copy_to_user(buf, scsi_sglist(cmd), vreq.data_len)) { + return -EFAULT; + } + } + } + + return ret; +} + +static ssize_t do_response (struct vhba_device *vdev, unsigned long metatag, struct scsi_cmnd *cmd, const char __user *buf, size_t buf_len, struct vhba_response *res) +{ + ssize_t ret = 0; + + scmd_dbg(cmd, "response %lu (%p), status %x, data len %d, sg count %d\n", + metatag, cmd, res->status, res->data_len, scsi_sg_count(cmd)); + + if (res->status) { + if (res->data_len > SCSI_SENSE_BUFFERSIZE) { + scmd_dbg(cmd, "truncate sense (%d < %d)", SCSI_SENSE_BUFFERSIZE, res->data_len); + res->data_len = SCSI_SENSE_BUFFERSIZE; + } + + if (copy_from_user(cmd->sense_buffer, buf, res->data_len)) { + return -EFAULT; + } + + cmd->result = res->status; + + ret += res->data_len; + } else if (DATA_FROM_DEVICE(cmd->sc_data_direction) && scsi_bufflen(cmd)) { + size_t to_read; + + if (res->data_len > scsi_bufflen(cmd)) { + scmd_dbg(cmd, "truncate data (%d < %d)\n", scsi_bufflen(cmd), res->data_len); + res->data_len = scsi_bufflen(cmd); + } + + to_read = res->data_len; + + if (scsi_sg_count(cmd)) { + unsigned char *kaddr, *uaddr; + struct scatterlist *sglist = scsi_sglist(cmd); + struct scatterlist *sg; + int i; + + uaddr = (unsigned char *)buf; + + for_each_sg(sglist, sg, scsi_sg_count(cmd), i) { + size_t len = (sg->length < to_read) ? sg->length : to_read; + + if (len > vdev->kbuf_size) { + scmd_dbg(cmd, "segment size (%zu) exceeds kbuf size (%zu)!", len, vdev->kbuf_size); + len = vdev->kbuf_size; + } + + if (copy_from_user(vdev->kbuf, uaddr, len)) { + return -EFAULT; + } + uaddr += len; + + kaddr = kmap_atomic(sg_page(sg)); + memcpy(kaddr + sg->offset, vdev->kbuf, len); + kunmap_atomic(kaddr); + + to_read -= len; + if (to_read == 0) { + break; + } + } + } else { + if (copy_from_user(scsi_sglist(cmd), buf, res->data_len)) { + return -EFAULT; + } + + to_read -= res->data_len; + } + + scsi_set_resid(cmd, to_read); + + ret += res->data_len - to_read; + } + + return ret; +} + +static struct vhba_command *next_command (struct vhba_device *vdev) +{ + struct vhba_command *vcmd; + + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + if (vcmd->status == VHBA_REQ_PENDING) { + break; + } + } + + if (&vcmd->entry == &vdev->cmd_list) { + vcmd = NULL; + } + + return vcmd; +} + +static struct vhba_command *match_command (struct vhba_device *vdev, __u32 metatag) +{ + struct vhba_command *vcmd; + + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + if (vcmd->metatag == metatag) { + break; + } + } + + if (&vcmd->entry == &vdev->cmd_list) { + vcmd = NULL; + } + + return vcmd; +} + +static struct vhba_command *wait_command (struct vhba_device *vdev, unsigned long flags) +{ + struct vhba_command *vcmd; + DEFINE_WAIT(wait); + + while (!(vcmd = next_command(vdev))) { + if (signal_pending(current)) { + break; + } + + prepare_to_wait(&vdev->cmd_wq, &wait, TASK_INTERRUPTIBLE); + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + schedule(); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + } + + finish_wait(&vdev->cmd_wq, &wait); + if (vcmd) { + vcmd->status = VHBA_REQ_READING; + } + + return vcmd; +} + +static ssize_t vhba_ctl_read (struct file *file, char __user *buf, size_t buf_len, loff_t *offset) +{ + struct vhba_device *vdev; + struct vhba_command *vcmd; + ssize_t ret; + unsigned long flags; + + vdev = file->private_data; + + /* Get next command */ + if (file->f_flags & O_NONBLOCK) { + /* Non-blocking variant */ + spin_lock_irqsave(&vdev->cmd_lock, flags); + vcmd = next_command(vdev); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + if (!vcmd) { + return -EWOULDBLOCK; + } + } else { + /* Blocking variant */ + spin_lock_irqsave(&vdev->cmd_lock, flags); + vcmd = wait_command(vdev, flags); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + if (!vcmd) { + return -ERESTARTSYS; + } + } + + ret = do_request(vdev, vcmd->metatag, vcmd->cmd, buf, buf_len); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + if (ret >= 0) { + vcmd->status = VHBA_REQ_SENT; + *offset += ret; + } else { + vcmd->status = VHBA_REQ_PENDING; + } + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return ret; +} + +static ssize_t vhba_ctl_write (struct file *file, const char __user *buf, size_t buf_len, loff_t *offset) +{ + struct vhba_device *vdev; + struct vhba_command *vcmd; + struct vhba_response res; + ssize_t ret; + unsigned long flags; + + if (buf_len < sizeof(res)) { + return -EIO; + } + + if (copy_from_user(&res, buf, sizeof(res))) { + return -EFAULT; + } + + vdev = file->private_data; + + spin_lock_irqsave(&vdev->cmd_lock, flags); + vcmd = match_command(vdev, res.metatag); + if (!vcmd || vcmd->status != VHBA_REQ_SENT) { + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + pr_debug("ctl dev #%u not expecting response\n", vdev->num); + return -EIO; + } + vcmd->status = VHBA_REQ_WRITING; + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + ret = do_response(vdev, vcmd->metatag, vcmd->cmd, buf + sizeof(res), buf_len - sizeof(res), &res); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + if (ret >= 0) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + scsi_done(vcmd->cmd); +#else + vcmd->cmd->scsi_done(vcmd->cmd); +#endif + ret += sizeof(res); + + /* don't compete with vhba_device_dequeue */ + if (!list_empty(&vcmd->entry)) { + list_del_init(&vcmd->entry); + vhba_free_command(vcmd); + } + } else { + vcmd->status = VHBA_REQ_SENT; + } + + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return ret; +} + +static long vhba_ctl_ioctl (struct file *file, unsigned int cmd, unsigned long arg) +{ + struct vhba_device *vdev = file->private_data; + struct vhba_host *vhost = platform_get_drvdata(&vhba_platform_device); + + switch (cmd) { + case 0xBEEF001: { + unsigned int ident[4]; /* host, channel, id, lun */ + + ident[0] = vhost->shost->host_no; + devnum_to_bus_and_id(vdev->num, &ident[1], &ident[2]); + ident[3] = 0; /* lun */ + + if (copy_to_user((void *) arg, ident, sizeof(ident))) { + return -EFAULT; + } + + return 0; + } + case 0xBEEF002: { + unsigned int devnum = vdev->num; + + if (copy_to_user((void *) arg, &devnum, sizeof(devnum))) { + return -EFAULT; + } + + return 0; + } + } + + return -ENOTTY; +} + +#ifdef CONFIG_COMPAT +static long vhba_ctl_compat_ioctl (struct file *file, unsigned int cmd, unsigned long arg) +{ + unsigned long compat_arg = (unsigned long)compat_ptr(arg); + return vhba_ctl_ioctl(file, cmd, compat_arg); +} +#endif + +static unsigned int vhba_ctl_poll (struct file *file, poll_table *wait) +{ + struct vhba_device *vdev = file->private_data; + unsigned int mask = 0; + unsigned long flags; + + poll_wait(file, &vdev->cmd_wq, wait); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + if (next_command(vdev)) { + mask |= POLLIN | POLLRDNORM; + } + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + return mask; +} + +static int vhba_ctl_open (struct inode *inode, struct file *file) +{ + struct vhba_device *vdev; + int retval; + + pr_debug("ctl dev open\n"); + + /* check if vhba is probed */ + if (!platform_get_drvdata(&vhba_platform_device)) { + return -ENODEV; + } + + vdev = vhba_device_alloc(); + if (!vdev) { + return -ENOMEM; + } + + vdev->kbuf_size = VHBA_KBUF_SIZE; + vdev->kbuf = kzalloc(vdev->kbuf_size, GFP_KERNEL); + if (!vdev->kbuf) { + return -ENOMEM; + } + + if (!(retval = vhba_add_device(vdev))) { + file->private_data = vdev; + } + + vhba_device_put(vdev); + + return retval; +} + +static int vhba_ctl_release (struct inode *inode, struct file *file) +{ + struct vhba_device *vdev; + struct vhba_command *vcmd; + unsigned long flags; + + vdev = file->private_data; + + pr_debug("ctl dev release\n"); + + vhba_device_get(vdev); + vhba_remove_device(vdev); + + spin_lock_irqsave(&vdev->cmd_lock, flags); + list_for_each_entry(vcmd, &vdev->cmd_list, entry) { + WARN_ON(vcmd->status == VHBA_REQ_READING || vcmd->status == VHBA_REQ_WRITING); + + scmd_dbg(vcmd->cmd, "device released with command %lu (%p)\n", vcmd->metatag, vcmd->cmd); + vcmd->cmd->result = DID_NO_CONNECT << 16; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 16, 0) + scsi_done(vcmd->cmd); +#else + vcmd->cmd->scsi_done(vcmd->cmd); +#endif + vhba_free_command(vcmd); + } + INIT_LIST_HEAD(&vdev->cmd_list); + spin_unlock_irqrestore(&vdev->cmd_lock, flags); + + kfree(vdev->kbuf); + vdev->kbuf = NULL; + + vhba_device_put(vdev); + + return 0; +} + +static struct file_operations vhba_ctl_fops = { + .owner = THIS_MODULE, + .open = vhba_ctl_open, + .release = vhba_ctl_release, + .read = vhba_ctl_read, + .write = vhba_ctl_write, + .poll = vhba_ctl_poll, + .unlocked_ioctl = vhba_ctl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vhba_ctl_compat_ioctl, +#endif +}; + +static struct miscdevice vhba_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vhba_ctl", + .fops = &vhba_ctl_fops, +}; + +static int vhba_probe (struct platform_device *pdev) +{ + struct Scsi_Host *shost; + struct vhba_host *vhost; + int i; + + vhba_can_queue = clamp(vhba_can_queue, 1, 256); + + shost = scsi_host_alloc(&vhba_template, sizeof(struct vhba_host)); + if (!shost) { + return -ENOMEM; + } + + shost->max_channel = VHBA_MAX_BUS-1; + shost->max_id = VHBA_MAX_ID; + /* we don't support lun > 0 */ + shost->max_lun = 1; + shost->max_cmd_len = MAX_COMMAND_SIZE; + shost->can_queue = vhba_can_queue; + shost->cmd_per_lun = vhba_can_queue; + + vhost = (struct vhba_host *)shost->hostdata; + memset(vhost, 0, sizeof(struct vhba_host)); + + vhost->shost = shost; + vhost->num_devices = 0; + spin_lock_init(&vhost->dev_lock); + spin_lock_init(&vhost->cmd_lock); + INIT_WORK(&vhost->scan_devices, vhba_scan_devices); + vhost->cmd_next = 0; + vhost->commands = kzalloc(vhba_can_queue * sizeof(struct vhba_command), GFP_KERNEL); + if (!vhost->commands) { + return -ENOMEM; + } + + for (i = 0; i < vhba_can_queue; i++) { + vhost->commands[i].status = VHBA_REQ_FREE; + } + + platform_set_drvdata(pdev, vhost); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0) + i = scsi_init_shared_tag_map(shost, vhba_can_queue); + if (i) return i; +#endif + + if (scsi_add_host(shost, &pdev->dev)) { + scsi_host_put(shost); + return -ENOMEM; + } + + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) +static int vhba_remove (struct platform_device *pdev) +#else +static void vhba_remove (struct platform_device *pdev) +#endif +{ + struct vhba_host *vhost; + struct Scsi_Host *shost; + + vhost = platform_get_drvdata(pdev); + shost = vhost->shost; + + scsi_remove_host(shost); + scsi_host_put(shost); + + kfree(vhost->commands); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 11, 0) + return 0; +#endif +} + +static void vhba_release (struct device * dev) +{ + return; +} + +static struct platform_device vhba_platform_device = { + .name = "vhba", + .id = -1, + .dev = { + .release = vhba_release, + }, +}; + +static struct platform_driver vhba_platform_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "vhba", + }, + .probe = vhba_probe, + .remove = vhba_remove, +}; + +static int __init vhba_init (void) +{ + int ret; + + ret = platform_device_register(&vhba_platform_device); + if (ret < 0) { + return ret; + } + + ret = platform_driver_register(&vhba_platform_driver); + if (ret < 0) { + platform_device_unregister(&vhba_platform_device); + return ret; + } + + ret = misc_register(&vhba_miscdev); + if (ret < 0) { + platform_driver_unregister(&vhba_platform_driver); + platform_device_unregister(&vhba_platform_device); + return ret; + } + + return 0; +} + +static void __exit vhba_exit(void) +{ + misc_deregister(&vhba_miscdev); + platform_driver_unregister(&vhba_platform_driver); + platform_device_unregister(&vhba_platform_device); +} + +module_init(vhba_init); +module_exit(vhba_exit); + diff --git a/drivers/soc/aspeed/aspeed-lpc-snoop.c b/drivers/soc/aspeed/aspeed-lpc-snoop.c index 9ab5ba9cf1d61c..ef8f355589a584 100644 --- a/drivers/soc/aspeed/aspeed-lpc-snoop.c +++ b/drivers/soc/aspeed/aspeed-lpc-snoop.c @@ -166,7 +166,7 @@ static int aspeed_lpc_snoop_config_irq(struct aspeed_lpc_snoop *lpc_snoop, int rc; lpc_snoop->irq = platform_get_irq(pdev, 0); - if (!lpc_snoop->irq) + if (lpc_snoop->irq < 0) return -ENODEV; rc = devm_request_irq(dev, lpc_snoop->irq, @@ -200,11 +200,15 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, lpc_snoop->chan[channel].miscdev.minor = MISC_DYNAMIC_MINOR; lpc_snoop->chan[channel].miscdev.name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", DEVICE_NAME, channel); + if (!lpc_snoop->chan[channel].miscdev.name) { + rc = -ENOMEM; + goto err_free_fifo; + } lpc_snoop->chan[channel].miscdev.fops = &snoop_fops; lpc_snoop->chan[channel].miscdev.parent = dev; rc = misc_register(&lpc_snoop->chan[channel].miscdev); if (rc) - return rc; + goto err_free_fifo; /* Enable LPC snoop channel at requested port */ switch (channel) { @@ -221,7 +225,8 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, hicrb_en = HICRB_ENSNP1D; break; default: - return -EINVAL; + rc = -EINVAL; + goto err_misc_deregister; } regmap_update_bits(lpc_snoop->regmap, HICR5, hicr5_en, hicr5_en); @@ -231,6 +236,12 @@ static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, regmap_update_bits(lpc_snoop->regmap, HICRB, hicrb_en, hicrb_en); + return 0; + +err_misc_deregister: + misc_deregister(&lpc_snoop->chan[channel].miscdev); +err_free_fifo: + kfifo_free(&lpc_snoop->chan[channel].fifo); return rc; } diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index a3e88ced328a91..c9199d6fbe26ec 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -575,7 +575,7 @@ static int qcom_smp2p_probe(struct platform_device *pdev) smp2p->mbox_client.knows_txdone = true; smp2p->mbox_chan = mbox_request_channel(&smp2p->mbox_client, 0); if (IS_ERR(smp2p->mbox_chan)) { - if (PTR_ERR(smp2p->mbox_chan) != -ENODEV) + if (PTR_ERR(smp2p->mbox_chan) != -ENOENT) return PTR_ERR(smp2p->mbox_chan); smp2p->mbox_chan = NULL; diff --git a/drivers/soc/samsung/exynos-usi.c b/drivers/soc/samsung/exynos-usi.c index c5661ac19f7b3a..5f7bdf3bab05cc 100644 --- a/drivers/soc/samsung/exynos-usi.c +++ b/drivers/soc/samsung/exynos-usi.c @@ -233,7 +233,7 @@ static void exynos_usi_unconfigure(void *data) /* Make sure that we've stopped providing the clock to USI IP */ val = readl(usi->regs + USI_OPTION); val &= ~USI_OPTION_CLKREQ_ON; - val |= ~USI_OPTION_CLKSTOP_ON; + val |= USI_OPTION_CLKSTOP_ON; writel(val, usi->regs + USI_OPTION); /* Set USI block state to reset */ diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 6f8a20014e76d4..39aecd34c6414b 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -122,6 +122,10 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, set_bit(SDW_GROUP13_DEV_NUM, bus->assigned); set_bit(SDW_MASTER_DEV_NUM, bus->assigned); + ret = sdw_irq_create(bus, fwnode); + if (ret) + return ret; + /* * SDW is an enumerable bus, but devices can be powered off. So, * they won't be able to report as present. @@ -138,6 +142,7 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, if (ret < 0) { dev_err(bus->dev, "Finding slaves failed:%d\n", ret); + sdw_irq_delete(bus); return ret; } @@ -156,10 +161,6 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, bus->params.curr_bank = SDW_BANK0; bus->params.next_bank = SDW_BANK1; - ret = sdw_irq_create(bus, fwnode); - if (ret) - return ret; - return 0; } EXPORT_SYMBOL(sdw_bus_master_add); diff --git a/drivers/soundwire/generic_bandwidth_allocation.c b/drivers/soundwire/generic_bandwidth_allocation.c index 1cfaccf43eac50..c18f0c16f92973 100644 --- a/drivers/soundwire/generic_bandwidth_allocation.c +++ b/drivers/soundwire/generic_bandwidth_allocation.c @@ -204,6 +204,13 @@ static void _sdw_compute_port_params(struct sdw_bus *bus, port_bo = 1; list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) { + /* + * Only runtimes with CONFIGURED, PREPARED, ENABLED, and DISABLED + * states should be included in the bandwidth calculation. + */ + if (m_rt->stream->state > SDW_STREAM_DISABLED || + m_rt->stream->state < SDW_STREAM_CONFIGURED) + continue; sdw_compute_master_ports(m_rt, ¶ms[i], &port_bo, hstop); } diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 5ea6399e6c9b01..10a602d4843ae8 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -353,9 +353,6 @@ static int intel_link_probe(struct auxiliary_device *auxdev, /* use generic bandwidth allocation algorithm */ sdw->cdns.bus.compute_params = sdw_compute_params; - /* avoid resuming from pm_runtime suspend if it's not required */ - dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - ret = sdw_bus_master_add(bus, dev, dev->fwnode); if (ret) { dev_err(dev, "sdw_bus_master_add fail: %d\n", ret); @@ -640,7 +637,10 @@ static int __maybe_unused intel_suspend(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { + /* Prevent runtime PM from racing with the code below. */ + pm_runtime_disable(dev); + + if (pm_runtime_status_suspended(dev)) { dev_dbg(dev, "pm_runtime status: suspended\n"); clock_stop_quirks = sdw->link_res->clock_stop_quirks; @@ -648,7 +648,7 @@ static int __maybe_unused intel_suspend(struct device *dev) if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) || !clock_stop_quirks) { - if (pm_runtime_suspended(dev->parent)) { + if (pm_runtime_status_suspended(dev->parent)) { /* * paranoia check: this should not happen with the .prepare * resume to full power @@ -715,7 +715,6 @@ static int __maybe_unused intel_resume(struct device *dev) struct sdw_cdns *cdns = dev_get_drvdata(dev); struct sdw_intel *sdw = cdns_to_intel(cdns); struct sdw_bus *bus = &cdns->bus; - int link_flags; int ret; if (bus->prop.hw_disabled || !sdw->startup_done) { @@ -724,23 +723,6 @@ static int __maybe_unused intel_resume(struct device *dev) return 0; } - if (pm_runtime_suspended(dev)) { - dev_dbg(dev, "pm_runtime status was suspended, forcing active\n"); - - /* follow required sequence from runtime_pm.rst */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_mark_last_busy(dev); - pm_runtime_enable(dev); - - pm_runtime_resume(bus->dev); - - link_flags = md_flags >> (bus->link_id * 8); - - if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE)) - pm_runtime_idle(dev); - } - ret = sdw_intel_link_power_up(sdw); if (ret) { dev_err(dev, "%s failed: %d\n", __func__, ret); @@ -760,6 +742,14 @@ static int __maybe_unused intel_resume(struct device *dev) return ret; } + /* + * Runtime PM has been disabled in intel_suspend(), so set the status + * to active because the device has just been resumed and re-enable + * runtime PM. + */ + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + /* * after system resume, the pm_runtime suspend() may kick in * during the enumeration, before any children device force the diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index 244ac010686298..e7b61dc4ce6766 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -1436,22 +1436,17 @@ static int atmel_qspi_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, 500); pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - pm_runtime_get_noresume(&pdev->dev); + devm_pm_runtime_set_active_enabled(&pdev->dev); + devm_pm_runtime_get_noresume(&pdev->dev); err = atmel_qspi_init(aq); if (err) goto dma_release; err = spi_register_controller(ctrl); - if (err) { - pm_runtime_put_noidle(&pdev->dev); - pm_runtime_disable(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_dont_use_autosuspend(&pdev->dev); + if (err) goto dma_release; - } + pm_runtime_mark_last_busy(&pdev->dev); pm_runtime_put_autosuspend(&pdev->dev); @@ -1530,10 +1525,6 @@ static void atmel_qspi_remove(struct platform_device *pdev) */ dev_warn(&pdev->dev, "Failed to resume device on remove\n"); } - - pm_runtime_disable(&pdev->dev); - pm_runtime_dont_use_autosuspend(&pdev->dev); - pm_runtime_put_noidle(&pdev->dev); } static int __maybe_unused atmel_qspi_suspend(struct device *dev) diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index 644b44d2aef24e..18261cbd413b49 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -745,7 +745,7 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev) if (IS_ERR(clk)) return PTR_ERR(clk); - reset = devm_reset_control_get_optional_exclusive(dev, NULL); + reset = devm_reset_control_get_optional_shared(dev, NULL); if (IS_ERR(reset)) return PTR_ERR(reset); diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index c8f64ec69344af..b56210734caafc 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -523,7 +523,7 @@ static int bcm63xx_spi_probe(struct platform_device *pdev) return PTR_ERR(clk); } - reset = devm_reset_control_get_optional_exclusive(dev, NULL); + reset = devm_reset_control_get_optional_shared(dev, NULL); if (IS_ERR(reset)) return PTR_ERR(reset); diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 067c954cb6ea03..863781ba6c1601 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // // Copyright 2013 Freescale Semiconductor, Inc. -// Copyright 2020 NXP +// Copyright 2020-2025 NXP // // Freescale DSPI driver // This file contains a driver for the Freescale DSPI @@ -62,6 +62,7 @@ #define SPI_SR_TFIWF BIT(18) #define SPI_SR_RFDF BIT(17) #define SPI_SR_CMDFFF BIT(16) +#define SPI_SR_TXRXS BIT(30) #define SPI_SR_CLEAR (SPI_SR_TCFQF | \ SPI_SR_TFUF | SPI_SR_TFFF | \ SPI_SR_CMDTCF | SPI_SR_SPEF | \ @@ -921,9 +922,20 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, struct spi_transfer *transfer; bool cs = false; int status = 0; + u32 val = 0; + bool cs_change = false; message->actual_length = 0; + /* Put DSPI in running mode if halted. */ + regmap_read(dspi->regmap, SPI_MCR, &val); + if (val & SPI_MCR_HALT) { + regmap_update_bits(dspi->regmap, SPI_MCR, SPI_MCR_HALT, 0); + while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 && + !(val & SPI_SR_TXRXS)) + ; + } + list_for_each_entry(transfer, &message->transfers, transfer_list) { dspi->cur_transfer = transfer; dspi->cur_msg = message; @@ -953,6 +965,7 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi->tx_cmd |= SPI_PUSHR_CMD_CONT; } + cs_change = transfer->cs_change; dspi->tx = transfer->tx_buf; dspi->rx = transfer->rx_buf; dspi->len = transfer->len; @@ -962,6 +975,8 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF, SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF); + regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); + spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer, dspi->progress, !dspi->irq); @@ -988,6 +1003,15 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, dspi_deassert_cs(spi, &cs); } + if (status || !cs_change) { + /* Put DSPI in stop mode */ + regmap_update_bits(dspi->regmap, SPI_MCR, + SPI_MCR_HALT, SPI_MCR_HALT); + while (regmap_read(dspi->regmap, SPI_SR, &val) >= 0 && + val & SPI_SR_TXRXS) + ; + } + message->status = status; spi_finalize_current_message(ctlr); @@ -1167,6 +1191,20 @@ static int dspi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(dspi_pm, dspi_suspend, dspi_resume); +static const struct regmap_range dspi_yes_ranges[] = { + regmap_reg_range(SPI_MCR, SPI_MCR), + regmap_reg_range(SPI_TCR, SPI_CTAR(3)), + regmap_reg_range(SPI_SR, SPI_TXFR3), + regmap_reg_range(SPI_RXFR0, SPI_RXFR3), + regmap_reg_range(SPI_CTARE(0), SPI_CTARE(3)), + regmap_reg_range(SPI_SREX, SPI_SREX), +}; + +static const struct regmap_access_table dspi_access_table = { + .yes_ranges = dspi_yes_ranges, + .n_yes_ranges = ARRAY_SIZE(dspi_yes_ranges), +}; + static const struct regmap_range dspi_volatile_ranges[] = { regmap_reg_range(SPI_MCR, SPI_TCR), regmap_reg_range(SPI_SR, SPI_SR), @@ -1184,6 +1222,8 @@ static const struct regmap_config dspi_regmap_config = { .reg_stride = 4, .max_register = 0x88, .volatile_table = &dspi_volatile_table, + .rd_table = &dspi_access_table, + .wr_table = &dspi_access_table, }; static const struct regmap_range dspi_xspi_volatile_ranges[] = { @@ -1205,6 +1245,8 @@ static const struct regmap_config dspi_xspi_regmap_config[] = { .reg_stride = 4, .max_register = 0x13c, .volatile_table = &dspi_xspi_volatile_table, + .rd_table = &dspi_access_table, + .wr_table = &dspi_access_table, }, { .name = "pushr", @@ -1227,6 +1269,8 @@ static int dspi_init(struct fsl_dspi *dspi) if (!spi_controller_is_target(dspi->ctlr)) mcr |= SPI_MCR_HOST; + mcr |= SPI_MCR_HALT; + regmap_write(dspi->regmap, SPI_MCR, mcr); regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c index 5c59fddb32c1b9..b5ecffcaf7955e 100644 --- a/drivers/spi/spi-fsl-qspi.c +++ b/drivers/spi/spi-fsl-qspi.c @@ -949,24 +949,20 @@ static int fsl_qspi_probe(struct platform_device *pdev) ret = devm_add_action_or_reset(dev, fsl_qspi_cleanup, q); if (ret) - goto err_destroy_mutex; + goto err_put_ctrl; ret = devm_spi_register_controller(dev, ctlr); if (ret) - goto err_destroy_mutex; + goto err_put_ctrl; return 0; -err_destroy_mutex: - mutex_destroy(&q->lock); - err_disable_clk: fsl_qspi_clk_disable_unprep(q); err_put_ctrl: spi_controller_put(ctlr); - dev_err(dev, "Freescale QuadSPI probe failed\n"); return ret; } diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 832d6e9009ebe7..c93d80a4d734ec 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1695,9 +1695,12 @@ static int spi_imx_transfer_one(struct spi_controller *controller, struct spi_device *spi, struct spi_transfer *transfer) { + int ret; struct spi_imx_data *spi_imx = spi_controller_get_devdata(spi->controller); - spi_imx_setupxfer(spi, transfer); + ret = spi_imx_setupxfer(spi, transfer); + if (ret < 0) + return ret; transfer->effective_speed_hz = spi_imx->spi_bus_clk; /* flush rxfifo before transfer */ diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c index 31a878d9458d95..7740f94847a883 100644 --- a/drivers/spi/spi-loopback-test.c +++ b/drivers/spi/spi-loopback-test.c @@ -420,7 +420,7 @@ MODULE_LICENSE("GPL"); static void spi_test_print_hex_dump(char *pre, const void *ptr, size_t len) { /* limit the hex_dump */ - if (len < 1024) { + if (len <= 1024) { print_hex_dump(KERN_INFO, pre, DUMP_PREFIX_OFFSET, 16, 1, ptr, len, 0); diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c index a31a1db07aa4d2..5db0639d3b0159 100644 --- a/drivers/spi/spi-mem.c +++ b/drivers/spi/spi-mem.c @@ -596,7 +596,11 @@ u64 spi_mem_calc_op_duration(struct spi_mem_op *op) ns_per_cycles = 1000000000 / op->max_freq; ncycles += ((op->cmd.nbytes * 8) / op->cmd.buswidth) / (op->cmd.dtr ? 2 : 1); ncycles += ((op->addr.nbytes * 8) / op->addr.buswidth) / (op->addr.dtr ? 2 : 1); - ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + + /* Dummy bytes are optional for some SPI flash memory operations */ + if (op->dummy.nbytes) + ncycles += ((op->dummy.nbytes * 8) / op->dummy.buswidth) / (op->dummy.dtr ? 2 : 1); + ncycles += ((op->data.nbytes * 8) / op->data.buswidth) / (op->data.dtr ? 2 : 1); return ncycles * ns_per_cycles; diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 29c616e2c408cf..70bb74b3bd9c32 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -134,6 +134,7 @@ struct omap2_mcspi { size_t max_xfer_len; u32 ref_clk_hz; bool use_multi_mode; + bool last_msg_kept_cs; }; struct omap2_mcspi_cs { @@ -1269,6 +1270,10 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, * multi-mode is applicable. */ mcspi->use_multi_mode = true; + + if (mcspi->last_msg_kept_cs) + mcspi->use_multi_mode = false; + list_for_each_entry(tr, &msg->transfers, transfer_list) { if (!tr->bits_per_word) bits_per_word = msg->spi->bits_per_word; @@ -1287,18 +1292,19 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, mcspi->use_multi_mode = false; } - /* Check if transfer asks to change the CS status after the transfer */ - if (!tr->cs_change) - mcspi->use_multi_mode = false; - - /* - * If at least one message is not compatible, switch back to single mode - * - * The bits_per_word of certain transfer can be different, but it will have no - * impact on the signal itself. - */ - if (!mcspi->use_multi_mode) - break; + if (list_is_last(&tr->transfer_list, &msg->transfers)) { + /* Check if transfer asks to keep the CS status after the whole message */ + if (tr->cs_change) { + mcspi->use_multi_mode = false; + mcspi->last_msg_kept_cs = true; + } else { + mcspi->last_msg_kept_cs = false; + } + } else { + /* Check if transfer asks to change the CS status after the transfer */ + if (!tr->cs_change) + mcspi->use_multi_mode = false; + } } omap2_mcspi_set_mode(ctlr); diff --git a/drivers/spi/spi-qpic-snand.c b/drivers/spi/spi-qpic-snand.c index 17eb67e1913261..44a8f58e46fe12 100644 --- a/drivers/spi/spi-qpic-snand.c +++ b/drivers/spi/spi-qpic-snand.c @@ -142,7 +142,7 @@ static void qcom_spi_set_read_loc_first(struct qcom_nand_controller *snandc, else if (reg == NAND_READ_LOCATION_1) snandc->regs->read_location1 = locreg_val; else if (reg == NAND_READ_LOCATION_2) - snandc->regs->read_location1 = locreg_val; + snandc->regs->read_location2 = locreg_val; else if (reg == NAND_READ_LOCATION_3) snandc->regs->read_location3 = locreg_val; } @@ -250,9 +250,11 @@ static const struct mtd_ooblayout_ops qcom_spi_ooblayout = { static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) { struct qcom_nand_controller *snandc = nand_to_qcom_snand(nand); + struct nand_ecc_props *reqs = &nand->ecc.requirements; + struct nand_ecc_props *user = &nand->ecc.user_conf; struct nand_ecc_props *conf = &nand->ecc.ctx.conf; struct mtd_info *mtd = nanddev_to_mtd(nand); - int cwperpage, bad_block_byte; + int cwperpage, bad_block_byte, ret; struct qpic_ecc *ecc_cfg; cwperpage = mtd->writesize / NANDC_STEP_SIZE; @@ -261,11 +263,39 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg = kzalloc(sizeof(*ecc_cfg), GFP_KERNEL); if (!ecc_cfg) return -ENOMEM; - snandc->qspi->oob_buf = kzalloc(mtd->writesize + mtd->oobsize, + + if (user->step_size && user->strength) { + ecc_cfg->step_size = user->step_size; + ecc_cfg->strength = user->strength; + } else if (reqs->step_size && reqs->strength) { + ecc_cfg->step_size = reqs->step_size; + ecc_cfg->strength = reqs->strength; + } else { + /* use defaults */ + ecc_cfg->step_size = NANDC_STEP_SIZE; + ecc_cfg->strength = 4; + } + + if (ecc_cfg->step_size != NANDC_STEP_SIZE) { + dev_err(snandc->dev, + "only %u bytes ECC step size is supported\n", + NANDC_STEP_SIZE); + ret = -EOPNOTSUPP; + goto err_free_ecc_cfg; + } + + if (ecc_cfg->strength != 4) { + dev_err(snandc->dev, + "only 4 bits ECC strength is supported\n"); + ret = -EOPNOTSUPP; + goto err_free_ecc_cfg; + } + + snandc->qspi->oob_buf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); if (!snandc->qspi->oob_buf) { - kfree(ecc_cfg); - return -ENOMEM; + ret = -ENOMEM; + goto err_free_ecc_cfg; } memset(snandc->qspi->oob_buf, 0xff, mtd->writesize + mtd->oobsize); @@ -280,8 +310,6 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg->bytes = ecc_cfg->ecc_bytes_hw + ecc_cfg->spare_bytes + ecc_cfg->bbm_size; ecc_cfg->steps = 4; - ecc_cfg->strength = 4; - ecc_cfg->step_size = 512; ecc_cfg->cw_data = 516; ecc_cfg->cw_size = ecc_cfg->cw_data + ecc_cfg->bytes; bad_block_byte = mtd->writesize - ecc_cfg->cw_size * (cwperpage - 1) + 1; @@ -339,6 +367,10 @@ static int qcom_spi_ecc_init_ctx_pipelined(struct nand_device *nand) ecc_cfg->strength, ecc_cfg->step_size); return 0; + +err_free_ecc_cfg: + kfree(ecc_cfg); + return ret; } static void qcom_spi_ecc_cleanup_ctx_pipelined(struct nand_device *nand) @@ -1307,8 +1339,7 @@ static int qcom_spi_send_cmdaddr(struct qcom_nand_controller *snandc, snandc->qspi->addr1 = cpu_to_le32(s_op.addr1_reg << 16); snandc->qspi->addr2 = cpu_to_le32(s_op.addr2_reg); snandc->qspi->cmd = cpu_to_le32(cmd); - qcom_spi_block_erase(snandc); - return 0; + return qcom_spi_block_erase(snandc); default: break; } diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 8a98c313548e37..7d8a7998f8ae73 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -918,6 +918,7 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr, void *rx_buf = t->rx_buf; unsigned int len = t->len; unsigned int bits = t->bits_per_word; + unsigned int max_wdlen = 256; unsigned int bytes_per_word; unsigned int words; int n; @@ -931,17 +932,17 @@ static int sh_msiof_transfer_one(struct spi_controller *ctlr, if (!spi_controller_is_target(p->ctlr)) sh_msiof_spi_set_clk_regs(p, t); + if (tx_buf) + max_wdlen = min(max_wdlen, p->tx_fifo_size); + if (rx_buf) + max_wdlen = min(max_wdlen, p->rx_fifo_size); + while (ctlr->dma_tx && len > 15) { /* * DMA supports 32-bit words only, hence pack 8-bit and 16-bit * words, with byte resp. word swapping. */ - unsigned int l = 0; - - if (tx_buf) - l = min(round_down(len, 4), p->tx_fifo_size * 4); - if (rx_buf) - l = min(round_down(len, 4), p->rx_fifo_size * 4); + unsigned int l = min(round_down(len, 4), max_wdlen * 4); if (bits <= 8) { copy32 = copy_bswap32; diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 668022098b1eac..9ec9823409cc3d 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -960,6 +960,10 @@ static int stm32_ospi_probe(struct platform_device *pdev) err_pm_enable: pm_runtime_force_suspend(ospi->dev); mutex_destroy(&ospi->lock); + if (ospi->dma_chtx) + dma_release_channel(ospi->dma_chtx); + if (ospi->dma_chrx) + dma_release_channel(ospi->dma_chrx); return ret; } diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index fcbe864c9b7d69..aa92fd5a35a98f 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -264,6 +264,9 @@ static int sun4i_spi_transfer_one(struct spi_controller *host, else reg |= SUN4I_CTL_DHB; + /* Now that the settings are correct, enable the interface */ + reg |= SUN4I_CTL_ENABLE; + sun4i_spi_write(sspi, SUN4I_CTL_REG, reg); /* Ensure that we have a parent clock fast enough */ @@ -404,7 +407,7 @@ static int sun4i_spi_runtime_resume(struct device *dev) } sun4i_spi_write(sspi, SUN4I_CTL_REG, - SUN4I_CTL_ENABLE | SUN4I_CTL_MASTER | SUN4I_CTL_TP); + SUN4I_CTL_MASTER | SUN4I_CTL_TP); return 0; @@ -462,6 +465,7 @@ static int sun4i_spi_probe(struct platform_device *pdev) sspi->host = host; host->max_speed_hz = 100 * 1000 * 1000; host->min_speed_hz = 3 * 1000; + host->use_gpio_descriptors = true; host->set_cs = sun4i_spi_set_cs; host->transfer_one = sun4i_spi_transfer_one; host->num_chipselect = 4; diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 3822d7c8d8edb9..795a8482c2c700 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -728,9 +728,9 @@ static int tegra_spi_set_hw_cs_timing(struct spi_device *spi) u32 inactive_cycles; u8 cs_state; - if (setup->unit != SPI_DELAY_UNIT_SCK || - hold->unit != SPI_DELAY_UNIT_SCK || - inactive->unit != SPI_DELAY_UNIT_SCK) { + if ((setup->value && setup->unit != SPI_DELAY_UNIT_SCK) || + (hold->value && hold->unit != SPI_DELAY_UNIT_SCK) || + (inactive->value && inactive->unit != SPI_DELAY_UNIT_SCK)) { dev_err(&spi->dev, "Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n", SPI_DELAY_UNIT_SCK); diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 08e49a8768943c..665c06e1473beb 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -134,7 +134,7 @@ #define QSPI_COMMAND_VALUE_SET(X) (((x) & 0xFF) << 0) #define QSPI_CMB_SEQ_CMD_CFG 0x1a0 -#define QSPI_COMMAND_X1_X2_X4(x) (((x) & 0x3) << 13) +#define QSPI_COMMAND_X1_X2_X4(x) ((((x) >> 1) & 0x3) << 13) #define QSPI_COMMAND_X1_X2_X4_MASK (0x03 << 13) #define QSPI_COMMAND_SDR_DDR BIT(12) #define QSPI_COMMAND_SIZE_SET(x) (((x) & 0xFF) << 0) @@ -147,7 +147,7 @@ #define QSPI_ADDRESS_VALUE_SET(X) (((x) & 0xFFFF) << 0) #define QSPI_CMB_SEQ_ADDR_CFG 0x1ac -#define QSPI_ADDRESS_X1_X2_X4(x) (((x) & 0x3) << 13) +#define QSPI_ADDRESS_X1_X2_X4(x) ((((x) >> 1) & 0x3) << 13) #define QSPI_ADDRESS_X1_X2_X4_MASK (0x03 << 13) #define QSPI_ADDRESS_SDR_DDR BIT(12) #define QSPI_ADDRESS_SIZE_SET(x) (((x) & 0xFF) << 0) @@ -1036,10 +1036,6 @@ static u32 tegra_qspi_addr_config(bool is_ddr, u8 bus_width, u8 len) { u32 addr_config = 0; - /* Extract Address configuration and value */ - is_ddr = 0; //Only SDR mode supported - bus_width = 0; //X1 mode - if (is_ddr) addr_config |= QSPI_ADDRESS_SDR_DDR; else @@ -1079,13 +1075,13 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, switch (transfer_phase) { case CMD_TRANSFER: /* X1 SDR mode */ - cmd_config = tegra_qspi_cmd_config(false, 0, + cmd_config = tegra_qspi_cmd_config(false, xfer->tx_nbits, xfer->len); cmd_value = *((const u8 *)(xfer->tx_buf)); break; case ADDR_TRANSFER: /* X1 SDR mode */ - addr_config = tegra_qspi_addr_config(false, 0, + addr_config = tegra_qspi_addr_config(false, xfer->tx_nbits, xfer->len); address_value = *((const u32 *)(xfer->tx_buf)); break; @@ -1117,9 +1113,9 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, (&tqspi->xfer_completion, QSPI_DMA_TIMEOUT); - if (WARN_ON(ret == 0)) { - dev_err(tqspi->dev, "QSPI Transfer failed with timeout: %d\n", - ret); + if (WARN_ON_ONCE(ret == 0)) { + dev_err_ratelimited(tqspi->dev, + "QSPI Transfer failed with timeout\n"); if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_TX)) dmaengine_terminate_all @@ -1163,26 +1159,22 @@ static int tegra_qspi_combined_seq_xfer(struct tegra_qspi *tqspi, ret = -EIO; goto exit; } - if (!xfer->cs_change) { - tegra_qspi_transfer_end(spi); - spi_transfer_delay_exec(xfer); - } break; default: ret = -EINVAL; goto exit; } msg->actual_length += xfer->len; + if (!xfer->cs_change && transfer_phase == DATA_TRANSFER) { + tegra_qspi_transfer_end(spi); + spi_transfer_delay_exec(xfer); + } transfer_phase++; } ret = 0; exit: msg->status = ret; - if (ret < 0) { - tegra_qspi_transfer_end(spi); - spi_transfer_delay_exec(xfer); - } return ret; } diff --git a/drivers/staging/axis-fifo/axis-fifo.c b/drivers/staging/axis-fifo/axis-fifo.c index 7540c20090c78b..351f983ef9149b 100644 --- a/drivers/staging/axis-fifo/axis-fifo.c +++ b/drivers/staging/axis-fifo/axis-fifo.c @@ -393,16 +393,14 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, bytes_available = ioread32(fifo->base_addr + XLLF_RLR_OFFSET); if (!bytes_available) { - dev_err(fifo->dt_device, "received a packet of length 0 - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet of length 0\n"); ret = -EIO; goto end_unlock; } if (bytes_available > len) { - dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu) - fifo core will be reset\n", + dev_err(fifo->dt_device, "user read buffer too small (available bytes=%zu user buffer bytes=%zu)\n", bytes_available, len); - reset_ip_core(fifo); ret = -EINVAL; goto end_unlock; } @@ -411,8 +409,7 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, /* this probably can't happen unless IP * registers were previously mishandled */ - dev_err(fifo->dt_device, "received a packet that isn't word-aligned - fifo core will be reset\n"); - reset_ip_core(fifo); + dev_err(fifo->dt_device, "received a packet that isn't word-aligned\n"); ret = -EIO; goto end_unlock; } @@ -433,7 +430,6 @@ static ssize_t axis_fifo_read(struct file *f, char __user *buf, if (copy_to_user(buf + copied * sizeof(u32), tmp_buf, copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } @@ -542,7 +538,6 @@ static ssize_t axis_fifo_write(struct file *f, const char __user *buf, if (copy_from_user(tmp_buf, buf + copied * sizeof(u32), copy * sizeof(u32))) { - reset_ip_core(fifo); ret = -EFAULT; goto end_unlock; } @@ -775,9 +770,6 @@ static int axis_fifo_parse_dt(struct axis_fifo *fifo) goto end; } - /* IP sets TDFV to fifo depth - 4 so we will do the same */ - fifo->tx_fifo_depth -= 4; - ret = get_dts_property(fifo, "xlnx,use-rx-data", &fifo->has_rx_fifo); if (ret) { dev_err(fifo->dt_device, "missing xlnx,use-rx-data property\n"); diff --git a/drivers/staging/gpib/ines/ines_gpib.c b/drivers/staging/gpib/ines/ines_gpib.c index d93eb05dab9038..8e2375d8ddac24 100644 --- a/drivers/staging/gpib/ines/ines_gpib.c +++ b/drivers/staging/gpib/ines/ines_gpib.c @@ -1484,7 +1484,7 @@ static void __exit ines_exit_module(void) gpib_unregister_driver(&ines_pci_unaccel_interface); gpib_unregister_driver(&ines_pci_accel_interface); gpib_unregister_driver(&ines_isa_interface); -#ifdef GPIB__PCMCIA +#ifdef CONFIG_GPIB_PCMCIA gpib_unregister_driver(&ines_pcmcia_interface); gpib_unregister_driver(&ines_pcmcia_unaccel_interface); gpib_unregister_driver(&ines_pcmcia_accel_interface); diff --git a/drivers/staging/gpib/uapi/gpib_user.h b/drivers/staging/gpib/uapi/gpib_user.h index 5ff4588686fde3..0fd32fb9e7a64d 100644 --- a/drivers/staging/gpib/uapi/gpib_user.h +++ b/drivers/staging/gpib/uapi/gpib_user.h @@ -178,7 +178,7 @@ static inline uint8_t MTA(unsigned int addr) static inline uint8_t MSA(unsigned int addr) { - return gpib_address_restrict(addr) | SAD; + return (addr & 0x1f) | SAD; } static inline uint8_t PPE_byte(unsigned int dio_line, int sense) diff --git a/drivers/staging/iio/adc/ad7816.c b/drivers/staging/iio/adc/ad7816.c index 6c14d7bcdd6750..081b17f498638b 100644 --- a/drivers/staging/iio/adc/ad7816.c +++ b/drivers/staging/iio/adc/ad7816.c @@ -136,7 +136,7 @@ static ssize_t ad7816_store_mode(struct device *dev, struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct ad7816_chip_info *chip = iio_priv(indio_dev); - if (strcmp(buf, "full")) { + if (strcmp(buf, "full") == 0) { gpiod_set_value(chip->rdwr_pin, 1); chip->mode = AD7816_FULL; } else { diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index f9bef5173bf25c..a9bfd5305410c2 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -213,8 +213,14 @@ static int rkvdec_enum_framesizes(struct file *file, void *priv, if (!fmt) return -EINVAL; - fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; - fsize->stepwise = fmt->frmsize; + fsize->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + fsize->stepwise.min_width = 1; + fsize->stepwise.max_width = fmt->frmsize.max_width; + fsize->stepwise.step_width = 1; + fsize->stepwise.min_height = 1; + fsize->stepwise.max_height = fmt->frmsize.max_height; + fsize->stepwise.step_height = 1; + return 0; } diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index b839b50ac26a54..fa7ea4ca4c36f4 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -1900,6 +1900,7 @@ static int bcm2835_mmal_probe(struct vchiq_device *device) __func__, ret); goto free_dev; } + dev->v4l2_dev.dev = &device->dev; /* setup v4l controls */ ret = bcm2835_mmal_init_controls(dev, &dev->ctrl_handler); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 1244ef3aa86c1d..620ba6e0ab0756 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4263,8 +4263,8 @@ int iscsit_close_connection( spin_unlock(&iscsit_global->ts_bitmap_lock); iscsit_stop_timers_for_cmds(conn); - iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + iscsit_stop_nopin_response_timer(conn); if (conn->conn_transport->iscsit_wait_conn) conn->conn_transport->iscsit_wait_conn(conn); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index a55aaa8cef422f..2097aae3994676 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -485,7 +485,7 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_REQ) }, { PCI_DEVICE_DATA(INTEL, LNLM_THERMAL, PROC_THERMAL_FEATURE_MSI_SUPPORT | - PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR | + PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR) }, { PCI_DEVICE_DATA(INTEL, MTLP_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_DLVR | @@ -495,8 +495,9 @@ static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_WT_REQ) }, { PCI_DEVICE_DATA(INTEL, PTL_THERMAL, PROC_THERMAL_FEATURE_RAPL | - PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_MSI_SUPPORT | - PROC_THERMAL_FEATURE_WT_HINT | PROC_THERMAL_FEATURE_POWER_FLOOR) }, + PROC_THERMAL_FEATURE_DLVR | PROC_THERMAL_FEATURE_DVFS | + PROC_THERMAL_FEATURE_MSI_SUPPORT | PROC_THERMAL_FEATURE_WT_HINT | + PROC_THERMAL_FEATURE_POWER_FLOOR) }, { }, }; diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c index dad63f2d5f90fc..3a028b78d9afc0 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -166,15 +166,18 @@ static const struct mmio_reg adl_dvfs_mmio_regs[] = { { 0, 0x5A40, 1, 0x1, 0}, /* rfi_disable */ }; +static const struct mapping_table *dlvr_mapping; +static const struct mmio_reg *dlvr_mmio_regs_table; + #define RFIM_SHOW(suffix, table)\ static ssize_t suffix##_show(struct device *dev,\ struct device_attribute *attr,\ char *buf)\ {\ - const struct mapping_table *mapping = NULL;\ + const struct mmio_reg *mmio_regs = dlvr_mmio_regs_table;\ + const struct mapping_table *mapping = dlvr_mapping;\ struct proc_thermal_device *proc_priv;\ struct pci_dev *pdev = to_pci_dev(dev);\ - const struct mmio_reg *mmio_regs;\ const char **match_strs;\ int ret, err;\ u32 reg_val;\ @@ -186,12 +189,6 @@ static ssize_t suffix##_show(struct device *dev,\ mmio_regs = adl_dvfs_mmio_regs;\ } else if (table == 2) { \ match_strs = (const char **)dlvr_strings;\ - if (pdev->device == PCI_DEVICE_ID_INTEL_LNLM_THERMAL) {\ - mmio_regs = lnl_dlvr_mmio_regs;\ - mapping = lnl_dlvr_mapping;\ - } else {\ - mmio_regs = dlvr_mmio_regs;\ - } \ } else {\ match_strs = (const char **)fivr_strings;\ mmio_regs = tgl_fivr_mmio_regs;\ @@ -214,12 +211,12 @@ static ssize_t suffix##_store(struct device *dev,\ struct device_attribute *attr,\ const char *buf, size_t count)\ {\ - const struct mapping_table *mapping = NULL;\ + const struct mmio_reg *mmio_regs = dlvr_mmio_regs_table;\ + const struct mapping_table *mapping = dlvr_mapping;\ struct proc_thermal_device *proc_priv;\ struct pci_dev *pdev = to_pci_dev(dev);\ unsigned int input;\ const char **match_strs;\ - const struct mmio_reg *mmio_regs;\ int ret, err;\ u32 reg_val;\ u32 mask;\ @@ -230,12 +227,6 @@ static ssize_t suffix##_store(struct device *dev,\ mmio_regs = adl_dvfs_mmio_regs;\ } else if (table == 2) { \ match_strs = (const char **)dlvr_strings;\ - if (pdev->device == PCI_DEVICE_ID_INTEL_LNLM_THERMAL) {\ - mmio_regs = lnl_dlvr_mmio_regs;\ - mapping = lnl_dlvr_mapping;\ - } else {\ - mmio_regs = dlvr_mmio_regs;\ - } \ } else {\ match_strs = (const char **)fivr_strings;\ mmio_regs = tgl_fivr_mmio_regs;\ @@ -448,6 +439,16 @@ int proc_thermal_rfim_add(struct pci_dev *pdev, struct proc_thermal_device *proc } if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DLVR) { + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_LNLM_THERMAL: + case PCI_DEVICE_ID_INTEL_PTL_THERMAL: + dlvr_mmio_regs_table = lnl_dlvr_mmio_regs; + dlvr_mapping = lnl_dlvr_mapping; + break; + default: + dlvr_mmio_regs_table = dlvr_mmio_regs; + break; + } ret = sysfs_create_group(&pdev->dev.kobj, &dlvr_attribute_group); if (ret) return ret; diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 496abf8e55e0d5..2841d14914b710 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -329,6 +329,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) tj_max = intel_tcc_get_tjmax(cpu); if (tj_max < 0) return tj_max; + tj_max *= 1000; zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL); if (!zonedev) diff --git a/drivers/thermal/mediatek/lvts_thermal.c b/drivers/thermal/mediatek/lvts_thermal.c index 088481d91e6e29..985925147ac068 100644 --- a/drivers/thermal/mediatek/lvts_thermal.c +++ b/drivers/thermal/mediatek/lvts_thermal.c @@ -213,6 +213,13 @@ static const struct debugfs_reg32 lvts_regs[] = { LVTS_DEBUG_FS_REGS(LVTS_CLKEN), }; +static void lvts_debugfs_exit(void *data) +{ + struct lvts_domain *lvts_td = data; + + debugfs_remove_recursive(lvts_td->dom_dentry); +} + static int lvts_debugfs_init(struct device *dev, struct lvts_domain *lvts_td) { struct debugfs_regset32 *regset; @@ -245,12 +252,7 @@ static int lvts_debugfs_init(struct device *dev, struct lvts_domain *lvts_td) debugfs_create_regset32("registers", 0400, dentry, regset); } - return 0; -} - -static void lvts_debugfs_exit(struct lvts_domain *lvts_td) -{ - debugfs_remove_recursive(lvts_td->dom_dentry); + return devm_add_action_or_reset(dev, lvts_debugfs_exit, lvts_td); } #else @@ -261,8 +263,6 @@ static inline int lvts_debugfs_init(struct device *dev, return 0; } -static void lvts_debugfs_exit(struct lvts_domain *lvts_td) { } - #endif static int lvts_raw_to_temp(u32 raw_temp, int temp_factor) @@ -1374,8 +1374,6 @@ static void lvts_remove(struct platform_device *pdev) for (i = 0; i < lvts_td->num_lvts_ctrl; i++) lvts_ctrl_set_enable(&lvts_td->lvts_ctrl[i], false); - - lvts_debugfs_exit(lvts_td); } static const struct lvts_ctrl_data mt7988_lvts_ap_data_ctrl[] = { diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index cd15e84c47f475..1db2e951b53fac 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -151,6 +151,11 @@ static void tb_cfg_request_dequeue(struct tb_cfg_request *req) struct tb_ctl *ctl = req->ctl; mutex_lock(&ctl->request_queue_lock); + if (!test_bit(TB_CFG_REQUEST_ACTIVE, &req->flags)) { + mutex_unlock(&ctl->request_queue_lock); + return; + } + list_del(&req->list); clear_bit(TB_CFG_REQUEST_ACTIVE, &req->flags); if (test_bit(TB_CFG_REQUEST_CANCELED, &req->flags)) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index e51d01671d8e7c..3e96f1afd4268e 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -440,10 +440,10 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) bool configured = val & PORT_CS_19_PC; usb4 = port->usb4; - if (((flags & TB_WAKE_ON_CONNECT) | + if (((flags & TB_WAKE_ON_CONNECT) && device_may_wakeup(&usb4->dev)) && !configured) val |= PORT_CS_19_WOC; - if (((flags & TB_WAKE_ON_DISCONNECT) | + if (((flags & TB_WAKE_ON_DISCONNECT) && device_may_wakeup(&usb4->dev)) && configured) val |= PORT_CS_19_WOD; if ((flags & TB_WAKE_ON_USB4) && configured) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 2a0ce11f405d2d..72ae08d6204ff4 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1173,16 +1173,6 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) return 0; } - sg_init_table(&sg, 1); - ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, - UART_XMIT_SIZE, dma->tx_addr); - if (ret != 1) { - serial8250_clear_THRI(p); - return 0; - } - - dma->tx_size = sg_dma_len(&sg); - if (priv->habit & OMAP_DMA_TX_KICK) { unsigned char c; u8 tx_lvl; @@ -1207,18 +1197,22 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) ret = -EBUSY; goto err; } - if (dma->tx_size < 4) { + if (kfifo_len(&tport->xmit_fifo) < 4) { ret = -EINVAL; goto err; } - if (!kfifo_get(&tport->xmit_fifo, &c)) { + if (!uart_fifo_out(&p->port, &c, 1)) { ret = -EINVAL; goto err; } skip_byte = c; - /* now we need to recompute due to kfifo_get */ - kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, - UART_XMIT_SIZE, dma->tx_addr); + } + + sg_init_table(&sg, 1); + ret = kfifo_dma_out_prepare_mapped(&tport->xmit_fifo, &sg, 1, UART_XMIT_SIZE, dma->tx_addr); + if (ret != 1) { + ret = -EINVAL; + goto err; } desc = dmaengine_prep_slave_sg(dma->txchan, &sg, 1, DMA_MEM_TO_DEV, @@ -1228,6 +1222,7 @@ static int omap_8250_tx_dma(struct uart_8250_port *p) goto err; } + dma->tx_size = sg_dma_len(&sg); dma->tx_running = 1; desc->callback = omap_8250_dma_tx_complete; diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c index ce0fef7e2c665c..be2f130696b3a0 100644 --- a/drivers/tty/serial/jsm/jsm_tty.c +++ b/drivers/tty/serial/jsm/jsm_tty.c @@ -451,6 +451,7 @@ int jsm_uart_port_init(struct jsm_board *brd) if (!brd->channels[i]) continue; + brd->channels[i]->uart_port.dev = &brd->pci_dev->dev; brd->channels[i]->uart_port.irq = brd->irq; brd->channels[i]->uart_port.uartclk = 14745600; brd->channels[i]->uart_port.type = PORT_JSM; diff --git a/drivers/tty/serial/milbeaut_usio.c b/drivers/tty/serial/milbeaut_usio.c index 059bea18dbab56..4e47dca2c4ed9c 100644 --- a/drivers/tty/serial/milbeaut_usio.c +++ b/drivers/tty/serial/milbeaut_usio.c @@ -523,7 +523,10 @@ static int mlb_usio_probe(struct platform_device *pdev) } port->membase = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - + if (!port->membase) { + ret = -ENOMEM; + goto failed; + } ret = platform_get_irq_byname(pdev, "rx"); mlb_usio_irq[index][RX] = ret; diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 1b137e06844425..3449945493ceb4 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -1746,6 +1746,12 @@ msm_serial_early_console_setup_dm(struct earlycon_device *device, if (!device->port.membase) return -ENODEV; + /* Disable DM / single-character modes */ + msm_write(&device->port, 0, UARTDM_DMEN); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_RX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_CMD_RESET_TX, MSM_UART_CR); + msm_write(&device->port, MSM_UART_CR_TX_ENABLE, MSM_UART_CR); + device->con->write = msm_serial_early_write_dm; return 0; } diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 5904a2d4cefa71..054a8e630aceac 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -563,8 +563,11 @@ static void sifive_serial_break_ctl(struct uart_port *port, int break_state) static int sifive_serial_startup(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_enable_rxwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); return 0; } @@ -572,9 +575,12 @@ static int sifive_serial_startup(struct uart_port *port) static void sifive_serial_shutdown(struct uart_port *port) { struct sifive_serial_port *ssp = port_to_sifive_serial_port(port); + unsigned long flags; + uart_port_lock_irqsave(&ssp->port, &flags); __ssp_disable_rxwm(ssp); __ssp_disable_txwm(ssp); + uart_port_unlock_irqrestore(&ssp->port, flags); } /** diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 0bd6544e30a6b3..791e2f1f7c0b65 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -193,13 +193,12 @@ int set_selection_user(const struct tiocl_selection __user *sel, return -EFAULT; /* - * TIOCL_SELCLEAR, TIOCL_SELPOINTER and TIOCL_SELMOUSEREPORT are OK to - * use without CAP_SYS_ADMIN as they do not modify the selection. + * TIOCL_SELCLEAR and TIOCL_SELPOINTER are OK to use without + * CAP_SYS_ADMIN as they do not modify the selection. */ switch (v.sel_mode) { case TIOCL_SELCLEAR: case TIOCL_SELPOINTER: - case TIOCL_SELMOUSEREPORT: break; default: if (!capable(CAP_SYS_ADMIN)) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 4b91072f3a4e91..1f2bdd2e1cc593 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -1103,8 +1103,6 @@ long vt_compat_ioctl(struct tty_struct *tty, case VT_WAITACTIVE: case VT_RELDISP: case VT_DISALLOCATE: - case VT_RESIZE: - case VT_RESIZEX: return vt_ioctl(tty, cmd, arg); /* diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 240ce135bbfbc3..1e50675772febb 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -674,16 +674,8 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) int tag = scsi_cmd_to_rq(cmd)->tag; struct ufshcd_lrb *lrbp = &hba->lrb[tag]; struct ufs_hw_queue *hwq; - unsigned long flags; int err; - if (!ufshcd_cmd_inflight(lrbp->cmd)) { - dev_err(hba->dev, - "%s: skip abort. cmd at tag %d already completed.\n", - __func__, tag); - return FAILED; - } - /* Skip task abort in case previous aborts failed and report failure */ if (lrbp->req_abort_skip) { dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n", @@ -692,6 +684,11 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) } hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) { + dev_err(hba->dev, "%s: skip abort. cmd at tag %d already completed.\n", + __func__, tag); + return FAILED; + } if (ufshcd_mcq_sqe_search(hba, hwq, tag)) { /* @@ -715,10 +712,5 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) return FAILED; } - spin_lock_irqsave(&hwq->cq_lock, flags); - if (ufshcd_cmd_inflight(lrbp->cmd)) - ufshcd_release_scsi_cmd(hba, lrbp); - spin_unlock_irqrestore(&hwq->cq_lock, flags); - return SUCCESS; } diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 90b5ab60f5ae4a..634cf163f4cb10 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -466,6 +466,56 @@ static ssize_t critical_health_show(struct device *dev, return sysfs_emit(buf, "%d\n", hba->critical_health_count); } +static ssize_t device_lvl_exception_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + if (hba->dev_info.wspecversion < 0x410) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", atomic_read(&hba->dev_lvl_exception_count)); +} + +static ssize_t device_lvl_exception_count_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + unsigned int value; + + if (kstrtouint(buf, 0, &value)) + return -EINVAL; + + /* the only supported usecase is to reset the dev_lvl_exception_count */ + if (value) + return -EINVAL; + + atomic_set(&hba->dev_lvl_exception_count, 0); + + return count; +} + +static ssize_t device_lvl_exception_id_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + u64 exception_id; + int err; + + ufshcd_rpm_get_sync(hba); + err = ufshcd_read_device_lvl_exception_id(hba, &exception_id); + ufshcd_rpm_put_sync(hba); + + if (err) + return err; + + hba->dev_lvl_exception_id = exception_id; + return sysfs_emit(buf, "%llu\n", exception_id); +} + static DEVICE_ATTR_RW(rpm_lvl); static DEVICE_ATTR_RO(rpm_target_dev_state); static DEVICE_ATTR_RO(rpm_target_link_state); @@ -479,6 +529,8 @@ static DEVICE_ATTR_RW(wb_flush_threshold); static DEVICE_ATTR_RW(rtc_update_ms); static DEVICE_ATTR_RW(pm_qos_enable); static DEVICE_ATTR_RO(critical_health); +static DEVICE_ATTR_RW(device_lvl_exception_count); +static DEVICE_ATTR_RO(device_lvl_exception_id); static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_rpm_lvl.attr, @@ -494,6 +546,8 @@ static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_rtc_update_ms.attr, &dev_attr_pm_qos_enable.attr, &dev_attr_critical_health.attr, + &dev_attr_device_lvl_exception_count.attr, + &dev_attr_device_lvl_exception_id.attr, NULL }; diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index 10b4a19a70f10e..d0a2c963a27d37 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -94,6 +94,7 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, enum query_opcode desc_op); int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable); +int ufshcd_read_device_lvl_exception_id(struct ufs_hba *hba, u64 *exception_id); /* Wrapper functions for safely calling variant operations */ static inline const char *ufshcd_get_var_name(struct ufs_hba *hba) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0534390c2a35d0..04f769d907a446 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -278,6 +278,7 @@ static const struct ufs_dev_quirk ufs_fixups[] = { .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE | + UFS_DEVICE_QUIRK_PA_HIBER8TIME | UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS }, { .wmanufacturerid = UFS_VENDOR_SKHYNIX, .model = UFS_ANY_MODEL, @@ -3176,16 +3177,10 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, int err; retry: - time_left = wait_for_completion_timeout(hba->dev_cmd.complete, + time_left = wait_for_completion_timeout(&hba->dev_cmd.complete, time_left); if (likely(time_left)) { - /* - * The completion handler called complete() and the caller of - * this function still owns the @lrbp tag so the code below does - * not trigger any race conditions. - */ - hba->dev_cmd.complete = NULL; err = ufshcd_get_tr_ocs(lrbp, NULL); if (!err) err = ufshcd_dev_cmd_completion(hba, lrbp); @@ -3199,7 +3194,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, /* successfully cleared the command, retry if needed */ if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0) err = -EAGAIN; - hba->dev_cmd.complete = NULL; return err; } @@ -3215,11 +3209,9 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) { - hba->dev_cmd.complete = NULL; + if (pending) __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); - } spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3237,8 +3229,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) - hba->dev_cmd.complete = NULL; spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3272,13 +3262,9 @@ static void ufshcd_dev_man_unlock(struct ufs_hba *hba) static int ufshcd_issue_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, const u32 tag, int timeout) { - DECLARE_COMPLETION_ONSTACK(wait); int err; - hba->dev_cmd.complete = &wait; - ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr); - ufshcd_send_command(hba, tag, hba->dev_cmd_queue); err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout); @@ -5585,12 +5571,12 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, ufshcd_release_scsi_cmd(hba, lrbp); /* Do not touch lrbp after scsi done */ scsi_done(cmd); - } else if (hba->dev_cmd.complete) { + } else { if (cqe) { ocs = le32_to_cpu(cqe->status) & MASK_OCS; lrbp->utr_descriptor_ptr->header.ocs = ocs; } - complete(hba->dev_cmd.complete); + complete(&hba->dev_cmd.complete); } } @@ -5692,6 +5678,8 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba, continue; hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); + if (!hwq) + continue; if (force_compl) { ufshcd_mcq_compl_all_cqes_lock(hba, hwq); @@ -6013,6 +6001,42 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } +int ufshcd_read_device_lvl_exception_id(struct ufs_hba *hba, u64 *exception_id) +{ + struct utp_upiu_query_v4_0 *upiu_resp; + struct ufs_query_req *request = NULL; + struct ufs_query_res *response = NULL; + int err; + + if (hba->dev_info.wspecversion < 0x410) + return -EOPNOTSUPP; + + ufshcd_hold(hba); + mutex_lock(&hba->dev_cmd.lock); + + ufshcd_init_query(hba, &request, &response, + UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_DEV_LVL_EXCEPTION_ID, 0, 0); + + request->query_func = UPIU_QUERY_FUNC_STANDARD_READ_REQUEST; + + err = ufshcd_exec_dev_cmd(hba, DEV_CMD_TYPE_QUERY, QUERY_REQ_TIMEOUT); + + if (err) { + dev_err(hba->dev, "%s: failed to read device level exception %d\n", + __func__, err); + goto out; + } + + upiu_resp = (struct utp_upiu_query_v4_0 *)response; + *exception_id = get_unaligned_be64(&upiu_resp->osf3); +out: + mutex_unlock(&hba->dev_cmd.lock); + ufshcd_release(hba); + + return err; +} + static int __ufshcd_wb_toggle(struct ufs_hba *hba, bool set, enum flag_idn idn) { u8 index; @@ -6083,7 +6107,7 @@ int ufshcd_wb_toggle_buf_flush(struct ufs_hba *hba, bool enable) return ret; } -static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, +static bool ufshcd_wb_curr_buff_threshold_check(struct ufs_hba *hba, u32 avail_buf) { u32 cur_buf; @@ -6165,15 +6189,13 @@ static bool ufshcd_wb_need_flush(struct ufs_hba *hba) } /* - * The ufs device needs the vcc to be ON to flush. * With user-space reduction enabled, it's enough to enable flush * by checking only the available buffer. The threshold * defined here is > 90% full. * With user-space preserved enabled, the current-buffer * should be checked too because the wb buffer size can reduce * when disk tends to be full. This info is provided by current - * buffer (dCurrentWriteBoosterBufferSize). There's no point in - * keeping vcc on when current buffer is empty. + * buffer (dCurrentWriteBoosterBufferSize). */ index = ufshcd_wb_get_query_index(hba); ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, @@ -6188,7 +6210,7 @@ static bool ufshcd_wb_need_flush(struct ufs_hba *hba) if (!hba->dev_info.b_presrv_uspc_en) return avail_buf <= UFS_WB_BUF_REMAIN_PERCENT(10); - return ufshcd_wb_presrv_usrspc_keep_vcc_on(hba, avail_buf); + return ufshcd_wb_curr_buff_threshold_check(hba, avail_buf); } static void ufshcd_rpm_dev_flush_recheck_work(struct work_struct *work) @@ -6240,6 +6262,11 @@ static void ufshcd_exception_event_handler(struct work_struct *work) sysfs_notify(&hba->dev->kobj, NULL, "critical_health"); } + if (status & hba->ee_drv_mask & MASK_EE_DEV_LVL_EXCEPTION) { + atomic_inc(&hba->dev_lvl_exception_count); + sysfs_notify(&hba->dev->kobj, NULL, "device_lvl_exception_count"); + } + ufs_debugfs_exception_event(hba, status); } @@ -6560,9 +6587,14 @@ static void ufshcd_err_handler(struct work_struct *work) up(&hba->host_sem); return; } - ufshcd_set_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); + ufshcd_err_handling_prepare(hba); + + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_set_eh_in_progress(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba, false); spin_lock_irqsave(hba->host->host_lock, flags); @@ -7238,8 +7270,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, err = -EINVAL; } } - ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, - (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); return err; } @@ -8139,6 +8169,22 @@ static void ufshcd_temp_notif_probe(struct ufs_hba *hba, const u8 *desc_buf) } } +static void ufshcd_device_lvl_exception_probe(struct ufs_hba *hba, u8 *desc_buf) +{ + u32 ext_ufs_feature; + + if (hba->dev_info.wspecversion < 0x410) + return; + + ext_ufs_feature = get_unaligned_be32(desc_buf + + DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP); + if (!(ext_ufs_feature & UFS_DEV_LVL_EXCEPTION_SUP)) + return; + + atomic_set(&hba->dev_lvl_exception_count, 0); + ufshcd_enable_ee(hba, MASK_EE_DEV_LVL_EXCEPTION); +} + static void ufshcd_set_rtt(struct ufs_hba *hba) { struct ufs_dev_info *dev_info = &hba->dev_info; @@ -8339,6 +8385,8 @@ static int ufs_get_device_desc(struct ufs_hba *hba) ufs_init_rtc(hba, desc_buf); + ufshcd_device_lvl_exception_probe(hba, desc_buf); + /* * ufshcd_read_string_desc returns size of the string * reset the error value @@ -8428,6 +8476,31 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba) return ret; } +/** + * ufshcd_quirk_override_pa_h8time - Ensures proper adjustment of PA_HIBERN8TIME. + * @hba: per-adapter instance + * + * Some UFS devices require specific adjustments to the PA_HIBERN8TIME parameter + * to ensure proper hibernation timing. This function retrieves the current + * PA_HIBERN8TIME value and increments it by 100us. + */ +static void ufshcd_quirk_override_pa_h8time(struct ufs_hba *hba) +{ + u32 pa_h8time; + int ret; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_HIBERN8TIME), &pa_h8time); + if (ret) { + dev_err(hba->dev, "Failed to get PA_HIBERN8TIME: %d\n", ret); + return; + } + + /* Increment by 1 to increase hibernation time by 100 µs */ + ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HIBERN8TIME), pa_h8time + 1); + if (ret) + dev_err(hba->dev, "Failed updating PA_HIBERN8TIME: %d\n", ret); +} + static void ufshcd_tune_unipro_params(struct ufs_hba *hba) { ufshcd_vops_apply_dev_quirks(hba); @@ -8438,6 +8511,9 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE) ufshcd_quirk_tune_host_pa_tactivate(hba); + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_HIBER8TIME) + ufshcd_quirk_override_pa_h8time(hba); } static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) @@ -10490,6 +10566,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) UFS_SLEEP_PWR_MODE, UIC_LINK_HIBERN8_STATE); + init_completion(&hba->dev_cmd.complete); + err = ufshcd_hba_init(hba); if (err) goto out_error; diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index d7539cda97da50..3e545af536e53e 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -34,7 +34,7 @@ * Exynos's Vendor specific registers for UFSHCI */ #define HCI_TXPRDT_ENTRY_SIZE 0x00 -#define PRDT_PREFECT_EN BIT(31) +#define PRDT_PREFETCH_EN BIT(31) #define HCI_RXPRDT_ENTRY_SIZE 0x04 #define HCI_1US_TO_CNT_VAL 0x0C #define CNT_VAL_1US_MASK 0x3FF @@ -92,11 +92,16 @@ UIC_TRANSPORT_NO_CONNECTION_RX |\ UIC_TRANSPORT_BAD_TC) -/* FSYS UFS Shareability */ -#define UFS_WR_SHARABLE BIT(2) -#define UFS_RD_SHARABLE BIT(1) -#define UFS_SHARABLE (UFS_WR_SHARABLE | UFS_RD_SHARABLE) -#define UFS_SHAREABILITY_OFFSET 0x710 +/* UFS Shareability */ +#define UFS_EXYNOSAUTO_WR_SHARABLE BIT(2) +#define UFS_EXYNOSAUTO_RD_SHARABLE BIT(1) +#define UFS_EXYNOSAUTO_SHARABLE (UFS_EXYNOSAUTO_WR_SHARABLE | \ + UFS_EXYNOSAUTO_RD_SHARABLE) +#define UFS_GS101_WR_SHARABLE BIT(1) +#define UFS_GS101_RD_SHARABLE BIT(0) +#define UFS_GS101_SHARABLE (UFS_GS101_WR_SHARABLE | \ + UFS_GS101_RD_SHARABLE) +#define UFS_SHAREABILITY_OFFSET 0x710 /* Multi-host registers */ #define MHCTRL 0xC4 @@ -209,8 +214,8 @@ static int exynos_ufs_shareability(struct exynos_ufs *ufs) /* IO Coherency setting */ if (ufs->sysreg) { return regmap_update_bits(ufs->sysreg, - ufs->shareability_reg_offset, - UFS_SHARABLE, UFS_SHARABLE); + ufs->iocc_offset, + ufs->iocc_mask, ufs->iocc_val); } return 0; @@ -957,6 +962,12 @@ static int exynos_ufs_phy_init(struct exynos_ufs *ufs) } phy_set_bus_width(generic_phy, ufs->avail_ln_rx); + + if (generic_phy->power_count) { + phy_power_off(generic_phy); + phy_exit(generic_phy); + } + ret = phy_init(generic_phy); if (ret) { dev_err(hba->dev, "%s: phy init failed, ret = %d\n", @@ -1049,9 +1060,14 @@ static int exynos_ufs_pre_link(struct ufs_hba *hba) exynos_ufs_config_intr(ufs, DFES_DEF_L4_ERRS, UNIPRO_L4); exynos_ufs_set_unipro_pclk_div(ufs); + exynos_ufs_setup_clocks(hba, true, PRE_CHANGE); + /* unipro */ exynos_ufs_config_unipro(ufs); + if (ufs->drv_data->pre_link) + ufs->drv_data->pre_link(ufs); + /* m-phy */ exynos_ufs_phy_init(ufs); if (!(ufs->opts & EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR)) { @@ -1059,11 +1075,6 @@ static int exynos_ufs_pre_link(struct ufs_hba *hba) exynos_ufs_config_phy_cap_attr(ufs); } - exynos_ufs_setup_clocks(hba, true, PRE_CHANGE); - - if (ufs->drv_data->pre_link) - ufs->drv_data->pre_link(ufs); - return 0; } @@ -1087,12 +1098,17 @@ static int exynos_ufs_post_link(struct ufs_hba *hba) struct exynos_ufs *ufs = ufshcd_get_variant(hba); struct phy *generic_phy = ufs->phy; struct exynos_ufs_uic_attr *attr = ufs->drv_data->uic_attr; + u32 val = ilog2(DATA_UNIT_SIZE); exynos_ufs_establish_connt(ufs); exynos_ufs_fit_aggr_timeout(ufs); hci_writel(ufs, 0xa, HCI_DATA_REORDER); - hci_writel(ufs, ilog2(DATA_UNIT_SIZE), HCI_TXPRDT_ENTRY_SIZE); + + if (hba->caps & UFSHCD_CAP_CRYPTO) + val |= PRDT_PREFETCH_EN; + hci_writel(ufs, val, HCI_TXPRDT_ENTRY_SIZE); + hci_writel(ufs, ilog2(DATA_UNIT_SIZE), HCI_RXPRDT_ENTRY_SIZE); hci_writel(ufs, (1 << hba->nutrs) - 1, HCI_UTRL_NEXUS_TYPE); hci_writel(ufs, (1 << hba->nutmrs) - 1, HCI_UTMRL_NEXUS_TYPE); @@ -1168,12 +1184,22 @@ static int exynos_ufs_parse_dt(struct device *dev, struct exynos_ufs *ufs) ufs->sysreg = NULL; else { if (of_property_read_u32_index(np, "samsung,sysreg", 1, - &ufs->shareability_reg_offset)) { + &ufs->iocc_offset)) { dev_warn(dev, "can't get an offset from sysreg. Set to default value\n"); - ufs->shareability_reg_offset = UFS_SHAREABILITY_OFFSET; + ufs->iocc_offset = UFS_SHAREABILITY_OFFSET; } } + ufs->iocc_mask = ufs->drv_data->iocc_mask; + /* + * no 'dma-coherent' property means the descriptors are + * non-cacheable so iocc shareability should be disabled. + */ + if (of_dma_is_coherent(dev->of_node)) + ufs->iocc_val = ufs->iocc_mask; + else + ufs->iocc_val = 0; + ufs->pclk_avail_min = PCLK_AVAIL_MIN; ufs->pclk_avail_max = PCLK_AVAIL_MAX; @@ -1497,6 +1523,14 @@ static int exynos_ufs_init(struct ufs_hba *hba) return ret; } +static void exynos_ufs_exit(struct ufs_hba *hba) +{ + struct exynos_ufs *ufs = ufshcd_get_variant(hba); + + phy_power_off(ufs->phy); + phy_exit(ufs->phy); +} + static int exynos_ufs_host_reset(struct ufs_hba *hba) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); @@ -1667,6 +1701,12 @@ static void exynos_ufs_hibern8_notify(struct ufs_hba *hba, } } +static int gs101_ufs_suspend(struct exynos_ufs *ufs) +{ + hci_writel(ufs, 0 << 0, HCI_GPIO_OUT); + return 0; +} + static int exynos_ufs_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, enum ufs_notify_change_status status) { @@ -1675,6 +1715,9 @@ static int exynos_ufs_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, if (status == PRE_CHANGE) return 0; + if (ufs->drv_data->suspend) + ufs->drv_data->suspend(ufs); + if (!ufshcd_is_link_active(hba)) phy_power_off(ufs->phy); @@ -1952,6 +1995,7 @@ static int gs101_ufs_pre_pwr_change(struct exynos_ufs *ufs, static const struct ufs_hba_variant_ops ufs_hba_exynos_ops = { .name = "exynos_ufs", .init = exynos_ufs_init, + .exit = exynos_ufs_exit, .hce_enable_notify = exynos_ufs_hce_enable_notify, .link_startup_notify = exynos_ufs_link_startup_notify, .pwr_change_notify = exynos_ufs_pwr_change_notify, @@ -1990,13 +2034,7 @@ static int exynos_ufs_probe(struct platform_device *pdev) static void exynos_ufs_remove(struct platform_device *pdev) { - struct ufs_hba *hba = platform_get_drvdata(pdev); - struct exynos_ufs *ufs = ufshcd_get_variant(hba); - ufshcd_pltfrm_remove(pdev); - - phy_power_off(ufs->phy); - phy_exit(ufs->phy); } static struct exynos_ufs_uic_attr exynos7_uic_attr = { @@ -2035,6 +2073,7 @@ static const struct exynos_ufs_drv_data exynosauto_ufs_drvs = { .opts = EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL | EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR | EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX, + .iocc_mask = UFS_EXYNOSAUTO_SHARABLE, .drv_init = exynosauto_ufs_drv_init, .post_hce_enable = exynosauto_ufs_post_hce_enable, .pre_link = exynosauto_ufs_pre_link, @@ -2136,10 +2175,12 @@ static const struct exynos_ufs_drv_data gs101_ufs_drvs = { .opts = EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR | EXYNOS_UFS_OPT_UFSPR_SECURE | EXYNOS_UFS_OPT_TIMER_TICK_SELECT, + .iocc_mask = UFS_GS101_SHARABLE, .drv_init = gs101_ufs_drv_init, .pre_link = gs101_ufs_pre_link, .post_link = gs101_ufs_post_link, .pre_pwr_change = gs101_ufs_pre_pwr_change, + .suspend = gs101_ufs_suspend, }; static const struct of_device_id exynos_ufs_of_match[] = { diff --git a/drivers/ufs/host/ufs-exynos.h b/drivers/ufs/host/ufs-exynos.h index aac5172761899a..abe7e472759e94 100644 --- a/drivers/ufs/host/ufs-exynos.h +++ b/drivers/ufs/host/ufs-exynos.h @@ -181,6 +181,7 @@ struct exynos_ufs_drv_data { struct exynos_ufs_uic_attr *uic_attr; unsigned int quirks; unsigned int opts; + u32 iocc_mask; /* SoC's specific operations */ int (*drv_init)(struct exynos_ufs *ufs); int (*pre_link)(struct exynos_ufs *ufs); @@ -191,6 +192,7 @@ struct exynos_ufs_drv_data { const struct ufs_pa_layer_attr *pwr); int (*pre_hce_enable)(struct exynos_ufs *ufs); int (*post_hce_enable)(struct exynos_ufs *ufs); + int (*suspend)(struct exynos_ufs *ufs); }; struct ufs_phy_time_cfg { @@ -230,7 +232,9 @@ struct exynos_ufs { ktime_t entry_hibern8_t; const struct exynos_ufs_drv_data *drv_data; struct regmap *sysreg; - u32 shareability_reg_offset; + u32 iocc_offset; + u32 iocc_mask; + u32 iocc_val; u32 opts; #define EXYNOS_UFS_OPT_HAS_APB_CLK_CTRL BIT(0) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 1b37449fbffc50..31649f908dd466 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -33,6 +33,10 @@ ((((c) >> 16) & MCQ_QCFGPTR_MASK) * MCQ_QCFGPTR_UNIT) #define MCQ_QCFG_SIZE 0x40 +/* De-emphasis for gear-5 */ +#define DEEMPHASIS_3_5_dB 0x04 +#define NO_DEEMPHASIS 0x0 + enum { TSTBUS_UAWM, TSTBUS_UARM, @@ -99,7 +103,9 @@ static const struct __ufs_qcom_bw_table { }; static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host); -static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq); +static unsigned long ufs_qcom_opp_freq_to_clk_freq(struct ufs_hba *hba, + unsigned long freq, char *name); +static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up, unsigned long freq); static struct ufs_qcom_host *rcdev_to_ufs_host(struct reset_controller_dev *rcd) { @@ -448,10 +454,9 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba) if (ret) return ret; - if (phy->power_count) { + if (phy->power_count) phy_power_off(phy); - phy_exit(phy); - } + /* phy initialization - calibrate the phy */ ret = phy_init(phy); @@ -598,7 +603,7 @@ static int ufs_qcom_link_startup_notify(struct ufs_hba *hba, return -EINVAL; } - err = ufs_qcom_set_core_clk_ctrl(hba, ULONG_MAX); + err = ufs_qcom_set_core_clk_ctrl(hba, true, ULONG_MAX); if (err) dev_err(hba->dev, "cfg core clk ctrl failed\n"); /* @@ -795,6 +800,23 @@ static int ufs_qcom_icc_update_bw(struct ufs_qcom_host *host) return ufs_qcom_icc_set_bw(host, bw_table.mem_bw, bw_table.cfg_bw); } +static void ufs_qcom_set_tx_hs_equalizer(struct ufs_hba *hba, u32 gear, u32 tx_lanes) +{ + u32 equalizer_val; + int ret, i; + + /* Determine the equalizer value based on the gear */ + equalizer_val = (gear == 5) ? DEEMPHASIS_3_5_dB : NO_DEEMPHASIS; + + for (i = 0; i < tx_lanes; i++) { + ret = ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(TX_HS_EQUALIZER, i), + equalizer_val); + if (ret) + dev_err(hba->dev, "%s: failed equalizer lane %d\n", + __func__, i); + } +} + static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, enum ufs_notify_change_status status, const struct ufs_pa_layer_attr *dev_max_params, @@ -846,6 +868,11 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, dev_req_params->gear_tx, PA_INITIAL_ADAPT); } + + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING) + ufs_qcom_set_tx_hs_equalizer(hba, + dev_req_params->gear_tx, dev_req_params->lane_tx); + break; case POST_CHANGE: if (ufs_qcom_cfg_timers(hba, false)) { @@ -893,6 +920,16 @@ static int ufs_qcom_quirk_host_pa_saveconfigtime(struct ufs_hba *hba) (pa_vs_config_reg1 | (1 << 12))); } +static void ufs_qcom_override_pa_tx_hsg1_sync_len(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_dme_peer_set(hba, UIC_ARG_MIB(PA_TX_HSG1_SYNC_LENGTH), + PA_TX_HSG1_SYNC_LENGTH_VAL); + if (err) + dev_err(hba->dev, "Failed (%d) set PA_TX_HSG1_SYNC_LENGTH\n", err); +} + static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) { int err = 0; @@ -900,6 +937,9 @@ static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_SAVECONFIGTIME) err = ufs_qcom_quirk_host_pa_saveconfigtime(hba); + if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH) + ufs_qcom_override_pa_tx_hsg1_sync_len(hba); + return err; } @@ -914,6 +954,10 @@ static struct ufs_dev_quirk ufs_qcom_dev_fixups[] = { { .wmanufacturerid = UFS_VENDOR_WDC, .model = UFS_ANY_MODEL, .quirk = UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE }, + { .wmanufacturerid = UFS_VENDOR_SAMSUNG, + .model = UFS_ANY_MODEL, + .quirk = UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH | + UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING }, {} }; @@ -1317,29 +1361,46 @@ static int ufs_qcom_set_clk_40ns_cycles(struct ufs_hba *hba, return ufshcd_dme_set(hba, UIC_ARG_MIB(PA_VS_CORE_CLK_40NS_CYCLES), reg); } -static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq) +static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up, unsigned long freq) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); struct list_head *head = &hba->clk_list_head; struct ufs_clk_info *clki; u32 cycles_in_1us = 0; u32 core_clk_ctrl_reg; + unsigned long clk_freq; int err; + if (hba->use_pm_opp && freq != ULONG_MAX) { + clk_freq = ufs_qcom_opp_freq_to_clk_freq(hba, freq, "core_clk_unipro"); + if (clk_freq) { + cycles_in_1us = ceil(clk_freq, HZ_PER_MHZ); + goto set_core_clk_ctrl; + } + } + list_for_each_entry(clki, head, list) { if (!IS_ERR_OR_NULL(clki->clk) && !strcmp(clki->name, "core_clk_unipro")) { - if (!clki->max_freq) + if (!clki->max_freq) { cycles_in_1us = 150; /* default for backwards compatibility */ - else if (freq == ULONG_MAX) + break; + } + + if (freq == ULONG_MAX) { cycles_in_1us = ceil(clki->max_freq, HZ_PER_MHZ); - else - cycles_in_1us = ceil(freq, HZ_PER_MHZ); + break; + } + if (is_scale_up) + cycles_in_1us = ceil(clki->max_freq, HZ_PER_MHZ); + else + cycles_in_1us = ceil(clk_get_rate(clki->clk), HZ_PER_MHZ); break; } } +set_core_clk_ctrl: err = ufshcd_dme_get(hba, UIC_ARG_MIB(DME_VS_CORE_CLK_CTRL), &core_clk_ctrl_reg); @@ -1382,7 +1443,7 @@ static int ufs_qcom_clk_scale_up_pre_change(struct ufs_hba *hba, unsigned long f return ret; } /* set unipro core clock attributes and clear clock divider */ - return ufs_qcom_set_core_clk_ctrl(hba, freq); + return ufs_qcom_set_core_clk_ctrl(hba, true, freq); } static int ufs_qcom_clk_scale_up_post_change(struct ufs_hba *hba) @@ -1414,7 +1475,7 @@ static int ufs_qcom_clk_scale_down_pre_change(struct ufs_hba *hba) static int ufs_qcom_clk_scale_down_post_change(struct ufs_hba *hba, unsigned long freq) { /* set unipro core clock attributes and clear clock divider */ - return ufs_qcom_set_core_clk_ctrl(hba, freq); + return ufs_qcom_set_core_clk_ctrl(hba, false, freq); } static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, bool scale_up, @@ -1879,11 +1940,53 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) return ret; } +static unsigned long ufs_qcom_opp_freq_to_clk_freq(struct ufs_hba *hba, + unsigned long freq, char *name) +{ + struct ufs_clk_info *clki; + struct dev_pm_opp *opp; + unsigned long clk_freq; + int idx = 0; + bool found = false; + + opp = dev_pm_opp_find_freq_exact_indexed(hba->dev, freq, 0, true); + if (IS_ERR(opp)) { + dev_err(hba->dev, "Failed to find OPP for exact frequency %lu\n", freq); + return 0; + } + + list_for_each_entry(clki, &hba->clk_list_head, list) { + if (!strcmp(clki->name, name)) { + found = true; + break; + } + + idx++; + } + + if (!found) { + dev_err(hba->dev, "Failed to find clock '%s' in clk list\n", name); + dev_pm_opp_put(opp); + return 0; + } + + clk_freq = dev_pm_opp_get_freq_indexed(opp, idx); + + dev_pm_opp_put(opp); + + return clk_freq; +} + static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq) { - u32 gear = 0; + u32 gear = UFS_HS_DONT_CHANGE; + unsigned long unipro_freq; + + if (!hba->use_pm_opp) + return gear; - switch (freq) { + unipro_freq = ufs_qcom_opp_freq_to_clk_freq(hba, freq, "core_clk_unipro"); + switch (unipro_freq) { case 403000000: gear = UFS_HS_G5; break; @@ -1903,10 +2006,10 @@ static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq) break; default: dev_err(hba->dev, "%s: Unsupported clock freq : %lu\n", __func__, freq); - break; + return UFS_HS_DONT_CHANGE; } - return gear; + return min_t(u32, gear, hba->max_pwr_info.info.gear_rx); } /* diff --git a/drivers/ufs/host/ufs-qcom.h b/drivers/ufs/host/ufs-qcom.h index d0e6ec9128e79d..05d4cb569c5005 100644 --- a/drivers/ufs/host/ufs-qcom.h +++ b/drivers/ufs/host/ufs-qcom.h @@ -122,8 +122,11 @@ enum { TMRLUT_HW_CGC_EN | OCSC_HW_CGC_EN) /* QUniPro Vendor specific attributes */ +#define PA_TX_HSG1_SYNC_LENGTH 0x1552 #define PA_VS_CONFIG_REG1 0x9000 #define DME_VS_CORE_CLK_CTRL 0xD002 +#define TX_HS_EQUALIZER 0x0037 + /* bit and mask definitions for DME_VS_CORE_CLK_CTRL attribute */ #define CLK_1US_CYCLES_MASK_V4 GENMASK(27, 16) #define CLK_1US_CYCLES_MASK GENMASK(7, 0) @@ -141,6 +144,21 @@ enum { #define UNIPRO_CORE_CLK_FREQ_201_5_MHZ 202 #define UNIPRO_CORE_CLK_FREQ_403_MHZ 403 +/* TX_HSG1_SYNC_LENGTH attr value */ +#define PA_TX_HSG1_SYNC_LENGTH_VAL 0x4A + +/* + * Some ufs device vendors need a different TSync length. + * Enable this quirk to give an additional TX_HS_SYNC_LENGTH. + */ +#define UFS_DEVICE_QUIRK_PA_TX_HSG1_SYNC_LENGTH BIT(16) + +/* + * Some ufs device vendors need a different Deemphasis setting. + * Enable this quirk to tune TX Deemphasis parameters. + */ +#define UFS_DEVICE_QUIRK_PA_TX_DEEMPHASIS_TUNING BIT(17) + /* ICE allocator type to share AES engines among TX stream and RX stream */ #define ICE_ALLOCATOR_TYPE 2 diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 1b19b56474950f..69c1df0f4ca541 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -131,15 +131,12 @@ static void hv_uio_rescind(struct vmbus_channel *channel) vmbus_device_unregister(channel->device_obj); } -/* Sysfs API to allow mmap of the ring buffers +/* Function used for mmap of ring buffer sysfs interface. * The ring buffer is allocated as contiguous memory by vmbus_open */ -static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, - const struct bin_attribute *attr, - struct vm_area_struct *vma) +static int +hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma) { - struct vmbus_channel *channel - = container_of(kobj, struct vmbus_channel, kobj); void *ring_buffer = page_address(channel->ringbuffer_page); if (channel->state != CHANNEL_OPENED_STATE) @@ -149,15 +146,6 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, channel->ringbuffer_pagecount << PAGE_SHIFT); } -static const struct bin_attribute ring_buffer_bin_attr = { - .attr = { - .name = "ring", - .mode = 0600, - }, - .size = 2 * SZ_2M, - .mmap = hv_uio_ring_mmap, -}; - /* Callback from VMBUS subsystem when new channel created. */ static void hv_uio_new_channel(struct vmbus_channel *new_sc) @@ -178,8 +166,7 @@ hv_uio_new_channel(struct vmbus_channel *new_sc) /* Disable interrupts on sub channel */ new_sc->inbound.ring_buffer->interrupt_mask = 1; set_channel_read_mode(new_sc, HV_CALL_ISR); - - ret = sysfs_create_bin_file(&new_sc->kobj, &ring_buffer_bin_attr); + ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap); if (ret) { dev_err(device, "sysfs create ring bin file failed; %d\n", ret); vmbus_close(new_sc); @@ -350,10 +337,18 @@ hv_uio_probe(struct hv_device *dev, goto fail_close; } - ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr); - if (ret) - dev_notice(&dev->device, - "sysfs create ring bin file failed; %d\n", ret); + /* + * This internally calls sysfs_update_group, which returns a non-zero value if it executes + * before sysfs_create_group. This is expected as the 'ring' will be created later in + * vmbus_device_register() -> vmbus_add_channel_kobj(). Thus, no need to check the return + * value and print warning. + * + * Creating/exposing sysfs in driver probe is not encouraged as it can lead to race + * conditions with userspace. For backward compatibility, "ring" sysfs could not be removed + * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify + * APIs to make sure that ring is created. + */ + hv_create_ring_sysfs(channel, hv_uio_ring_mmap); hv_set_drvdata(dev, pdata); @@ -375,7 +370,7 @@ hv_uio_remove(struct hv_device *dev) if (!pdata) return; - sysfs_remove_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr); + hv_remove_ring_sysfs(dev->channel); uio_unregister_device(&pdata->info); hv_uio_cleanup(dev, pdata); diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 694aa14577390b..d9d8dc05b23599 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -1963,6 +1963,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) unsigned int bit; unsigned long reg; + local_bh_disable(); spin_lock_irqsave(&priv_dev->lock, flags); reg = readl(&priv_dev->regs->usb_ists); @@ -2004,6 +2005,7 @@ static irqreturn_t cdns3_device_thread_irq_handler(int irq, void *data) irqend: writel(~0, &priv_dev->regs->ep_ien); spin_unlock_irqrestore(&priv_dev->lock, flags); + local_bh_enable(); return ret; } diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 87f31084173509..55f95f41b3b4dd 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -29,7 +29,8 @@ unsigned int cdnsp_port_speed(unsigned int port_status) { /*Detect gadget speed based on PORTSC register*/ - if (DEV_SUPERSPEEDPLUS(port_status)) + if (DEV_SUPERSPEEDPLUS(port_status) || + DEV_SSP_GEN1x2(port_status) || DEV_SSP_GEN2x2(port_status)) return USB_SPEED_SUPER_PLUS; else if (DEV_SUPERSPEED(port_status)) return USB_SPEED_SUPER; @@ -139,6 +140,26 @@ static void cdnsp_clear_port_change_bit(struct cdnsp_device *pdev, (portsc & PORT_CHANGE_BITS), port_regs); } +static void cdnsp_set_apb_timeout_value(struct cdnsp_device *pdev) +{ + struct cdns *cdns = dev_get_drvdata(pdev->dev); + __le32 __iomem *reg; + void __iomem *base; + u32 offset = 0; + u32 val; + + if (!cdns->override_apb_timeout) + return; + + base = &pdev->cap_regs->hc_capbase; + offset = cdnsp_find_next_ext_cap(base, offset, D_XEC_PRE_REGS_CAP); + reg = base + offset + REG_CHICKEN_BITS_3_OFFSET; + + val = le32_to_cpu(readl(reg)); + val = CHICKEN_APB_TIMEOUT_SET(val, cdns->override_apb_timeout); + writel(cpu_to_le32(val), reg); +} + static void cdnsp_set_chicken_bits_2(struct cdnsp_device *pdev, u32 bit) { __le32 __iomem *reg; @@ -527,6 +548,7 @@ int cdnsp_wait_for_cmd_compl(struct cdnsp_device *pdev) dma_addr_t cmd_deq_dma; union cdnsp_trb *event; u32 cycle_state; + u32 retry = 10; int ret, val; u64 cmd_dma; u32 flags; @@ -558,8 +580,23 @@ int cdnsp_wait_for_cmd_compl(struct cdnsp_device *pdev) flags = le32_to_cpu(event->event_cmd.flags); /* Check the owner of the TRB. */ - if ((flags & TRB_CYCLE) != cycle_state) + if ((flags & TRB_CYCLE) != cycle_state) { + /* + * Give some extra time to get chance controller + * to finish command before returning error code. + * Checking CMD_RING_BUSY is not sufficient because + * this bit is cleared to '0' when the Command + * Descriptor has been executed by controller + * and not when command completion event has + * be added to event ring. + */ + if (retry--) { + udelay(20); + continue; + } + return -EINVAL; + } cmd_dma = le64_to_cpu(event->event_cmd.cmd_trb); @@ -1773,6 +1810,8 @@ static void cdnsp_get_rev_cap(struct cdnsp_device *pdev) reg += cdnsp_find_next_ext_cap(reg, 0, RTL_REV_CAP); pdev->rev_cap = reg; + pdev->rtl_revision = readl(&pdev->rev_cap->rtl_revision); + dev_info(pdev->dev, "Rev: %08x/%08x, eps: %08x, buff: %08x/%08x\n", readl(&pdev->rev_cap->ctrl_revision), readl(&pdev->rev_cap->rtl_revision), @@ -1798,6 +1837,15 @@ static int cdnsp_gen_setup(struct cdnsp_device *pdev) pdev->hci_version = HC_VERSION(pdev->hcc_params); pdev->hcc_params = readl(&pdev->cap_regs->hcc_params); + /* + * Override the APB timeout value to give the controller more time for + * enabling UTMI clock and synchronizing APB and UTMI clock domains. + * This fix is platform specific and is required to fixes issue with + * reading incorrect value from PORTSC register after resuming + * from L1 state. + */ + cdnsp_set_apb_timeout_value(pdev); + cdnsp_get_rev_cap(pdev); /* Make sure the Device Controller is halted. */ diff --git a/drivers/usb/cdns3/cdnsp-gadget.h b/drivers/usb/cdns3/cdnsp-gadget.h index 84887dfea7635b..2afa3e558f85ca 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.h +++ b/drivers/usb/cdns3/cdnsp-gadget.h @@ -285,11 +285,15 @@ struct cdnsp_port_regs { #define XDEV_HS (0x3 << 10) #define XDEV_SS (0x4 << 10) #define XDEV_SSP (0x5 << 10) +#define XDEV_SSP1x2 (0x6 << 10) +#define XDEV_SSP2x2 (0x7 << 10) #define DEV_UNDEFSPEED(p) (((p) & DEV_SPEED_MASK) == (0x0 << 10)) #define DEV_FULLSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_FS) #define DEV_HIGHSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_HS) #define DEV_SUPERSPEED(p) (((p) & DEV_SPEED_MASK) == XDEV_SS) #define DEV_SUPERSPEEDPLUS(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP) +#define DEV_SSP_GEN1x2(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP1x2) +#define DEV_SSP_GEN2x2(p) (((p) & DEV_SPEED_MASK) == XDEV_SSP2x2) #define DEV_SUPERSPEED_ANY(p) (((p) & DEV_SPEED_MASK) >= XDEV_SS) #define DEV_PORT_SPEED(p) (((p) >> 10) & 0x0f) /* Port Link State Write Strobe - set this when changing link state */ @@ -520,6 +524,9 @@ struct cdnsp_rev_cap { #define REG_CHICKEN_BITS_2_OFFSET 0x48 #define CHICKEN_XDMA_2_TP_CACHE_DIS BIT(28) +#define REG_CHICKEN_BITS_3_OFFSET 0x4C +#define CHICKEN_APB_TIMEOUT_SET(p, val) (((p) & ~GENMASK(21, 0)) | (val)) + /* XBUF Extended Capability ID. */ #define XBUF_CAP_ID 0xCB #define XBUF_RX_TAG_MASK_0_OFFSET 0x1C @@ -1357,6 +1364,7 @@ struct cdnsp_port { * @rev_cap: Controller Capabilities Registers. * @hcs_params1: Cached register copies of read-only HCSPARAMS1 * @hcc_params: Cached register copies of read-only HCCPARAMS1 + * @rtl_revision: Cached controller rtl revision. * @setup: Temporary buffer for setup packet. * @ep0_preq: Internal allocated request used during enumeration. * @ep0_stage: ep0 stage during enumeration process. @@ -1411,6 +1419,8 @@ struct cdnsp_device { __u32 hcs_params1; __u32 hcs_params3; __u32 hcc_params; + #define RTL_REVISION_NEW_LPM 0x2700 + __u32 rtl_revision; /* Lock used in interrupt thread context. */ spinlock_t lock; struct usb_ctrlrequest setup; diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c index a51144504ff337..8c361b8394e959 100644 --- a/drivers/usb/cdns3/cdnsp-pci.c +++ b/drivers/usb/cdns3/cdnsp-pci.c @@ -28,6 +28,8 @@ #define PCI_DRIVER_NAME "cdns-pci-usbssp" #define PLAT_DRIVER_NAME "cdns-usbssp" +#define CHICKEN_APB_TIMEOUT_VALUE 0x1C20 + static struct pci_dev *cdnsp_get_second_fun(struct pci_dev *pdev) { /* @@ -139,6 +141,14 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, cdnsp->otg_irq = pdev->irq; } + /* + * Cadence PCI based platform require some longer timeout for APB + * to fixes domain clock synchronization issue after resuming + * controller from L1 state. + */ + cdnsp->override_apb_timeout = CHICKEN_APB_TIMEOUT_VALUE; + pci_set_drvdata(pdev, cdnsp); + if (pci_is_enabled(func)) { cdnsp->dev = dev; cdnsp->gadget_init = cdnsp_gadget_init; @@ -148,8 +158,6 @@ static int cdnsp_pci_probe(struct pci_dev *pdev, goto free_cdnsp; } - pci_set_drvdata(pdev, cdnsp); - device_wakeup_enable(&pdev->dev); if (pci_dev_run_wake(pdev)) pm_runtime_put_noidle(&pdev->dev); diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 46852529499d16..fd06cb85c4ea84 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -308,7 +308,8 @@ static bool cdnsp_ring_ep_doorbell(struct cdnsp_device *pdev, writel(db_value, reg_addr); - cdnsp_force_l0_go(pdev); + if (pdev->rtl_revision < RTL_REVISION_NEW_LPM) + cdnsp_force_l0_go(pdev); /* Doorbell was set. */ return true; diff --git a/drivers/usb/cdns3/core.h b/drivers/usb/cdns3/core.h index 921cccf1ca9db2..801be9e61340e7 100644 --- a/drivers/usb/cdns3/core.h +++ b/drivers/usb/cdns3/core.h @@ -79,6 +79,8 @@ struct cdns3_platform_data { * @pdata: platform data from glue layer * @lock: spinlock structure * @xhci_plat_data: xhci private data structure pointer + * @override_apb_timeout: hold value of APB timeout. For value 0 the default + * value in CHICKEN_BITS_3 will be preserved. * @gadget_init: pointer to gadget initialization function */ struct cdns { @@ -117,6 +119,7 @@ struct cdns { struct cdns3_platform_data *pdata; spinlock_t lock; struct xhci_plat_priv *xhci_plat_data; + u32 override_apb_timeout; int (*gadget_init)(struct cdns *cdns); }; diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 1a7fc638213eba..4f8bfd242b5953 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -336,6 +336,13 @@ static int ci_hdrc_imx_notify_event(struct ci_hdrc *ci, unsigned int event) return ret; } +static void ci_hdrc_imx_disable_regulator(void *arg) +{ + struct ci_hdrc_imx_data *data = arg; + + regulator_disable(data->hsic_pad_regulator); +} + static int ci_hdrc_imx_probe(struct platform_device *pdev) { struct ci_hdrc_imx_data *data; @@ -394,6 +401,13 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) "Failed to enable HSIC pad regulator\n"); goto err_put; } + ret = devm_add_action_or_reset(dev, + ci_hdrc_imx_disable_regulator, data); + if (ret) { + dev_err(dev, + "Failed to add regulator devm action\n"); + goto err_put; + } } } @@ -432,11 +446,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_get_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = imx_prepare_enable_clks(dev); if (ret) - goto disable_hsic_regulator; + goto qos_remove_request; ret = clk_prepare_enable(data->clk_wakeup); if (ret) @@ -470,7 +484,11 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI) { pdata.flags |= CI_HDRC_OVERRIDE_PHY_CONTROL; data->override_phy_control = true; - usb_phy_init(pdata.usb_phy); + ret = usb_phy_init(pdata.usb_phy); + if (ret) { + dev_err(dev, "Failed to init phy\n"); + goto err_clk; + } } if (pdata.flags & CI_HDRC_SUPPORTS_RUNTIME_PM) @@ -479,7 +497,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) ret = imx_usbmisc_init(data->usbmisc_data); if (ret) { dev_err(dev, "usbmisc init failed, ret=%d\n", ret); - goto err_clk; + goto phy_shutdown; } data->ci_pdev = ci_hdrc_add_device(dev, @@ -488,7 +506,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (IS_ERR(data->ci_pdev)) { ret = PTR_ERR(data->ci_pdev); dev_err_probe(dev, ret, "ci_hdrc_add_device failed\n"); - goto err_clk; + goto phy_shutdown; } if (data->usbmisc_data) { @@ -522,19 +540,20 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); +phy_shutdown: + if (data->override_phy_control) + usb_phy_shutdown(data->phy); err_clk: clk_disable_unprepare(data->clk_wakeup); err_wakeup_clk: imx_disable_unprepare_clks(dev); -disable_hsic_regulator: - if (data->hsic_pad_regulator) - /* don't overwrite original ret (cf. EPROBE_DEFER) */ - regulator_disable(data->hsic_pad_regulator); +qos_remove_request: if (pdata.flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); data->ci_pdev = NULL; err_put: - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); return ret; } @@ -556,10 +575,9 @@ static void ci_hdrc_imx_remove(struct platform_device *pdev) clk_disable_unprepare(data->clk_wakeup); if (data->plat_data->flags & CI_HDRC_PMQOS) cpu_latency_qos_remove_request(&data->pm_qos_req); - if (data->hsic_pad_regulator) - regulator_disable(data->hsic_pad_regulator); } - put_device(data->usbmisc_data->dev); + if (data->usbmisc_data) + put_device(data->usbmisc_data->dev); } static void ci_hdrc_imx_shutdown(struct platform_device *pdev) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 86ee39db013f39..16e7fa4d488d37 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -726,7 +726,7 @@ static int wdm_open(struct inode *inode, struct file *file) rv = -EBUSY; goto out; } - + smp_rmb(); /* ordered against wdm_wwan_port_stop() */ rv = usb_autopm_get_interface(desc->intf); if (rv < 0) { dev_err(&desc->intf->dev, "Error autopm - %d\n", rv); @@ -829,6 +829,7 @@ static struct usb_class_driver wdm_class = { static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); + int rv; /* The interface is both exposed via the WWAN framework and as a * legacy usbmisc chardev. If chardev is already open, just fail @@ -848,7 +849,15 @@ static int wdm_wwan_port_start(struct wwan_port *port) wwan_port_txon(port); /* Start getting events */ - return usb_submit_urb(desc->validity, GFP_KERNEL); + rv = usb_submit_urb(desc->validity, GFP_KERNEL); + if (rv < 0) { + wwan_port_txoff(port); + desc->manage_power(desc->intf, 0); + /* this must be last lest we race with chardev open */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); + } + + return rv; } static void wdm_wwan_port_stop(struct wwan_port *port) @@ -859,8 +868,10 @@ static void wdm_wwan_port_stop(struct wwan_port *port) poison_urbs(desc); desc->manage_power(desc->intf, 0); clear_bit(WDM_READ, &desc->flags); - clear_bit(WDM_WWAN_IN_USE, &desc->flags); unpoison_urbs(desc); + smp_wmb(); /* ordered against wdm_open() */ + /* this must be last lest we open a poisoned device */ + clear_bit(WDM_WWAN_IN_USE, &desc->flags); } static void wdm_wwan_port_tx_complete(struct urb *urb) @@ -868,7 +879,7 @@ static void wdm_wwan_port_tx_complete(struct urb *urb) struct sk_buff *skb = urb->context; struct wdm_device *desc = skb_shinfo(skb)->destructor_arg; - usb_autopm_put_interface(desc->intf); + usb_autopm_put_interface_async(desc->intf); wwan_port_txon(desc->wwanp); kfree_skb(skb); } @@ -898,7 +909,7 @@ static int wdm_wwan_port_tx(struct wwan_port *port, struct sk_buff *skb) req->bRequestType = (USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE); req->bRequest = USB_CDC_SEND_ENCAPSULATED_COMMAND; req->wValue = 0; - req->wIndex = desc->inum; + req->wIndex = desc->inum; /* already converted */ req->wLength = cpu_to_le16(skb->len); skb_shinfo(skb)->destructor_arg = desc; diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 34e46ef308abfd..75de29725a450c 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -482,6 +482,8 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) u8 *buffer; u8 tag; int rv; + long wait_rv; + unsigned long expire; dev_dbg(dev, "Enter ioctl_read_stb iin_ep_present: %d\n", data->iin_ep_present); @@ -511,16 +513,18 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) } if (data->iin_ep_present) { - rv = wait_event_interruptible_timeout( + expire = msecs_to_jiffies(file_data->timeout); + wait_rv = wait_event_interruptible_timeout( data->waitq, atomic_read(&data->iin_data_valid) != 0, - file_data->timeout); - if (rv < 0) { - dev_dbg(dev, "wait interrupted %d\n", rv); + expire); + if (wait_rv < 0) { + dev_dbg(dev, "wait interrupted %ld\n", wait_rv); + rv = wait_rv; goto exit; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "wait timed out\n"); rv = -ETIMEDOUT; goto exit; @@ -539,6 +543,8 @@ static int usbtmc_get_stb(struct usbtmc_file_data *file_data, __u8 *stb) dev_dbg(dev, "stb:0x%02x received %d\n", (unsigned int)*stb, rv); + rv = 0; + exit: /* bump interrupt bTag */ data->iin_bTag += 1; @@ -559,14 +565,15 @@ static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data, rv = usbtmc_get_stb(file_data, &stb); - if (rv > 0) { - srq_asserted = atomic_xchg(&file_data->srq_asserted, - srq_asserted); - if (srq_asserted) - stb |= 0x40; /* Set RQS bit */ + if (rv < 0) + return rv; + + srq_asserted = atomic_xchg(&file_data->srq_asserted, srq_asserted); + if (srq_asserted) + stb |= 0x40; /* Set RQS bit */ + + rv = put_user(stb, (__u8 __user *)arg); - rv = put_user(stb, (__u8 __user *)arg); - } return rv; } @@ -602,9 +609,9 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, { struct usbtmc_device_data *data = file_data->data; struct device *dev = &data->intf->dev; - int rv; u32 timeout; unsigned long expire; + long wait_rv; if (!data->iin_ep_present) { dev_dbg(dev, "no interrupt endpoint present\n"); @@ -618,25 +625,24 @@ static int usbtmc488_ioctl_wait_srq(struct usbtmc_file_data *file_data, mutex_unlock(&data->io_mutex); - rv = wait_event_interruptible_timeout( - data->waitq, - atomic_read(&file_data->srq_asserted) != 0 || - atomic_read(&file_data->closing), - expire); + wait_rv = wait_event_interruptible_timeout( + data->waitq, + atomic_read(&file_data->srq_asserted) != 0 || + atomic_read(&file_data->closing), + expire); mutex_lock(&data->io_mutex); /* Note! disconnect or close could be called in the meantime */ if (atomic_read(&file_data->closing) || data->zombie) - rv = -ENODEV; + return -ENODEV; - if (rv < 0) { - /* dev can be invalid now! */ - pr_debug("%s - wait interrupted %d\n", __func__, rv); - return rv; + if (wait_rv < 0) { + dev_dbg(dev, "%s - wait interrupted %ld\n", __func__, wait_rv); + return wait_rv; } - if (rv == 0) { + if (wait_rv == 0) { dev_dbg(dev, "%s - wait timed out\n", __func__); return -ETIMEDOUT; } @@ -830,6 +836,7 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, unsigned long expire; int bufcount = 1; int again = 0; + long wait_rv; /* mutex already locked */ @@ -942,19 +949,24 @@ static ssize_t usbtmc_generic_read(struct usbtmc_file_data *file_data, if (!(flags & USBTMC_FLAG_ASYNC)) { dev_dbg(dev, "%s: before wait time %lu\n", __func__, expire); - retval = wait_event_interruptible_timeout( + wait_rv = wait_event_interruptible_timeout( file_data->wait_bulk_in, usbtmc_do_transfer(file_data), expire); - dev_dbg(dev, "%s: wait returned %d\n", - __func__, retval); + dev_dbg(dev, "%s: wait returned %ld\n", + __func__, wait_rv); + + if (wait_rv < 0) { + retval = wait_rv; + goto error; + } - if (retval <= 0) { - if (retval == 0) - retval = -ETIMEDOUT; + if (wait_rv == 0) { + retval = -ETIMEDOUT; goto error; } + } urb = usb_get_from_anchor(&file_data->in_anchor); @@ -1380,7 +1392,10 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, if (!buffer) return -ENOMEM; - mutex_lock(&data->io_mutex); + retval = mutex_lock_interruptible(&data->io_mutex); + if (retval < 0) + goto exit_nolock; + if (data->zombie) { retval = -ENODEV; goto exit; @@ -1503,6 +1518,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf, exit: mutex_unlock(&data->io_mutex); +exit_nolock: kfree(buffer); return retval; } @@ -2186,7 +2202,7 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case USBTMC_IOCTL_GET_STB: retval = usbtmc_get_stb(file_data, &tmp_byte); - if (retval > 0) + if (!retval) retval = put_user(tmp_byte, (__u8 __user *)arg); break; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0e1dd6ef60a719..9f19fc7494e022 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -6133,6 +6133,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) struct usb_hub *parent_hub; struct usb_hcd *hcd = bus_to_hcd(udev->bus); struct usb_device_descriptor descriptor; + struct usb_interface *intf; struct usb_host_bos *bos; int i, j, ret = 0; int port1 = udev->portnum; @@ -6190,6 +6191,18 @@ static int usb_reset_and_verify_device(struct usb_device *udev) if (!udev->actconfig) goto done; + /* + * Some devices can't handle setting default altsetting 0 with a + * Set-Interface request. Disable host-side endpoints of those + * interfaces here. Enable and reset them back after host has set + * its internal endpoint structures during usb_hcd_alloc_bandwith() + */ + for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { + intf = udev->actconfig->interface[i]; + if (intf->cur_altsetting->desc.bAlternateSetting == 0) + usb_disable_interface(udev, intf, true); + } + mutex_lock(hcd->bandwidth_mutex); ret = usb_hcd_alloc_bandwidth(udev, udev->actconfig, NULL, NULL); if (ret < 0) { @@ -6221,12 +6234,11 @@ static int usb_reset_and_verify_device(struct usb_device *udev) */ for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { struct usb_host_config *config = udev->actconfig; - struct usb_interface *intf = config->interface[i]; struct usb_interface_descriptor *desc; + intf = config->interface[i]; desc = &intf->cur_altsetting->desc; if (desc->bAlternateSetting == 0) { - usb_disable_interface(udev, intf, true); usb_enable_interface(udev, intf, true); ret = 0; } else { diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8efbacc5bc3411..53d68d20fb62e0 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -369,6 +369,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* SanDisk Corp. SanDisk 3.2Gen1 */ + { USB_DEVICE(0x0781, 0x55a3), .driver_info = USB_QUIRK_DELAY_INIT }, + + /* SanDisk Extreme 55AE */ + { USB_DEVICE(0x0781, 0x55ae), .driver_info = USB_QUIRK_NO_LPM }, + /* Realforce 87U Keyboard */ { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, @@ -383,6 +389,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0904, 0x6103), .driver_info = USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Silicon Motion Flash Drive */ + { USB_DEVICE(0x090c, 0x1000), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Sound Devices USBPre2 */ { USB_DEVICE(0x0926, 0x0202), .driver_info = USB_QUIRK_ENDPOINT_IGNORE }, @@ -539,6 +548,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* VLI disk */ + { USB_DEVICE(0x2109, 0x0711), .driver_info = USB_QUIRK_NO_LPM }, + /* Raydium Touchscreen */ { USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM }, diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index 935c0efea0b640..ea1ce8beb0cbb2 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -165,6 +165,8 @@ static int usb_acpi_add_usb4_devlink(struct usb_device *udev) return 0; hub = usb_hub_to_struct_hub(udev->parent); + if (!hub) + return 0; port_dev = hub->ports[udev->portnum - 1]; struct fwnode_handle *nhi_fwnode __free(fwnode_handle) = diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index aaa39e663f60a5..27eae4cf223dfd 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1164,6 +1164,9 @@ struct dwc3_scratchpad_array { * @gsbuscfg0_reqinfo: store GSBUSCFG0.DATRDREQINFO, DESRDREQINFO, * DATWRREQINFO, and DESWRREQINFO value passed from * glue driver. + * @wakeup_pending_funcs: Indicates whether any interface has requested for + * function wakeup in bitmap format where bit position + * represents interface_id. */ struct dwc3 { struct work_struct drd_work; @@ -1394,6 +1397,7 @@ struct dwc3 { int num_ep_resized; struct dentry *debug_root; u32 gsbuscfg0_reqinfo; + u32 wakeup_pending_funcs; }; #define INCRX_BURST_MODE 0 diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index a33a42ba0249ab..4ca7f6240d07df 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -207,15 +207,13 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) skip_usb3_phy: /* ulpi reset via gpio-modepin or gpio-framework driver */ - reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); + reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(reset_gpio)) { return dev_err_probe(dev, PTR_ERR(reset_gpio), "Failed to request reset GPIO\n"); } if (reset_gpio) { - /* Toggle ulpi to reset the phy. */ - gpiod_set_value_cansleep(reset_gpio, 1); usleep_range(5000, 10000); gpiod_set_value_cansleep(reset_gpio, 0); usleep_range(5000, 10000); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 47e73c4ed62d3e..321361288935db 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -276,8 +276,6 @@ int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd, return ret; } -static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async); - /** * dwc3_send_gadget_ep_cmd - issue an endpoint command * @dep: the endpoint to which the command is going to be issued @@ -2359,10 +2357,8 @@ static int dwc3_gadget_get_frame(struct usb_gadget *g) return __dwc3_gadget_get_frame(dwc); } -static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) +static int __dwc3_gadget_wakeup(struct dwc3 *dwc) { - int retries; - int ret; u32 reg; @@ -2390,8 +2386,7 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) return -EINVAL; } - if (async) - dwc3_gadget_enable_linksts_evts(dwc, true); + dwc3_gadget_enable_linksts_evts(dwc, true); ret = dwc3_gadget_set_link_state(dwc, DWC3_LINK_STATE_RECOV); if (ret < 0) { @@ -2410,27 +2405,8 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async) /* * Since link status change events are enabled we will receive - * an U0 event when wakeup is successful. So bail out. + * an U0 event when wakeup is successful. */ - if (async) - return 0; - - /* poll until Link State changes to ON */ - retries = 20000; - - while (retries--) { - reg = dwc3_readl(dwc->regs, DWC3_DSTS); - - /* in HS, means ON */ - if (DWC3_DSTS_USBLNKST(reg) == DWC3_LINK_STATE_U0) - break; - } - - if (DWC3_DSTS_USBLNKST(reg) != DWC3_LINK_STATE_U0) { - dev_err(dwc->dev, "failed to send remote wakeup\n"); - return -EINVAL; - } - return 0; } @@ -2451,7 +2427,7 @@ static int dwc3_gadget_wakeup(struct usb_gadget *g) spin_unlock_irqrestore(&dwc->lock, flags); return -EINVAL; } - ret = __dwc3_gadget_wakeup(dwc, true); + ret = __dwc3_gadget_wakeup(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -2479,14 +2455,10 @@ static int dwc3_gadget_func_wakeup(struct usb_gadget *g, int intf_id) */ link_state = dwc3_gadget_get_link_state(dwc); if (link_state == DWC3_LINK_STATE_U3) { - ret = __dwc3_gadget_wakeup(dwc, false); - if (ret) { - spin_unlock_irqrestore(&dwc->lock, flags); - return -EINVAL; - } - dwc3_resume_gadget(dwc); - dwc->suspended = false; - dwc->link_state = DWC3_LINK_STATE_U0; + dwc->wakeup_pending_funcs |= BIT(intf_id); + ret = __dwc3_gadget_wakeup(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); + return ret; } ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, @@ -4353,6 +4325,8 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, { enum dwc3_link_state next = evtinfo & DWC3_LINK_STATE_MASK; unsigned int pwropt; + int ret; + int intf_id; /* * WORKAROUND: DWC3 < 2.50a have an issue when configured without @@ -4428,7 +4402,7 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, switch (next) { case DWC3_LINK_STATE_U0: - if (dwc->gadget->wakeup_armed) { + if (dwc->gadget->wakeup_armed || dwc->wakeup_pending_funcs) { dwc3_gadget_enable_linksts_evts(dwc, false); dwc3_resume_gadget(dwc); dwc->suspended = false; @@ -4451,6 +4425,18 @@ static void dwc3_gadget_linksts_change_interrupt(struct dwc3 *dwc, } dwc->link_state = next; + + /* Proceed with func wakeup if any interfaces that has requested */ + while (dwc->wakeup_pending_funcs && (next == DWC3_LINK_STATE_U0)) { + intf_id = ffs(dwc->wakeup_pending_funcs) - 1; + ret = dwc3_send_gadget_generic_command(dwc, DWC3_DGCMD_DEV_NOTIFICATION, + DWC3_DGCMDPAR_DN_FUNC_WAKE | + DWC3_DGCMDPAR_INTF_SEL(intf_id)); + if (ret) + dev_err(dwc->dev, "Failed to send DN wake for intf %d\n", intf_id); + + dwc->wakeup_pending_funcs &= ~BIT(intf_id); + } } static void dwc3_gadget_suspend_interrupt(struct dwc3 *dwc, @@ -4617,6 +4603,12 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt) if (!count) return IRQ_NONE; + if (count > evt->length) { + dev_err_ratelimited(dwc->dev, "invalid count(%u) > evt->length(%u)\n", + count, evt->length); + return IRQ_NONE; + } + evt->count = count; evt->flags |= DWC3_EVENT_PENDING; diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 869ad99afb48bb..8dbc132a505e39 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2011,15 +2011,13 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) if (f->get_status) { status = f->get_status(f); + if (status < 0) break; - } else { - /* Set D0 and D1 bits based on func wakeup capability */ - if (f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP) { - status |= USB_INTRF_STAT_FUNC_RW_CAP; - if (f->func_wakeup_armed) - status |= USB_INTRF_STAT_FUNC_RW; - } + + /* if D5 is not set, then device is not wakeup capable */ + if (!(f->config->bmAttributes & USB_CONFIG_ATT_WAKEUP)) + status &= ~(USB_INTRF_STAT_FUNC_RW_CAP | USB_INTRF_STAT_FUNC_RW); } put_unaligned_le16(status & 0x0000ffff, req->buf); diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 80841de845b091..027226325039f0 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -892,6 +892,12 @@ static void ecm_resume(struct usb_function *f) gether_resume(&ecm->port); } +static int ecm_get_status(struct usb_function *f) +{ + return (f->func_wakeup_armed ? USB_INTRF_STAT_FUNC_RW : 0) | + USB_INTRF_STAT_FUNC_RW_CAP; +} + static void ecm_free(struct usb_function *f) { struct f_ecm *ecm; @@ -960,6 +966,7 @@ static struct usb_function *ecm_alloc(struct usb_function_instance *fi) ecm->port.func.disable = ecm_disable; ecm->port.func.free_func = ecm_free; ecm->port.func.suspend = ecm_suspend; + ecm->port.func.get_status = ecm_get_status; ecm->port.func.resume = ecm_resume; return &ecm->port.func; diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 740311c4fa2496..c7a05f842745bc 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -144,8 +144,8 @@ static struct hid_descriptor hidg_desc = { .bcdHID = cpu_to_le16(0x0101), .bCountryCode = 0x00, .bNumDescriptors = 0x1, - /*.desc[0].bDescriptorType = DYNAMIC */ - /*.desc[0].wDescriptorLenght = DYNAMIC */ + /*.rpt_desc.bDescriptorType = DYNAMIC */ + /*.rpt_desc.wDescriptorLength = DYNAMIC */ }; /* Super-Speed Support */ @@ -939,8 +939,8 @@ static int hidg_setup(struct usb_function *f, struct hid_descriptor hidg_desc_copy = hidg_desc; VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: HID\n"); - hidg_desc_copy.desc[0].bDescriptorType = HID_DT_REPORT; - hidg_desc_copy.desc[0].wDescriptorLength = + hidg_desc_copy.rpt_desc.bDescriptorType = HID_DT_REPORT; + hidg_desc_copy.rpt_desc.wDescriptorLength = cpu_to_le16(hidg->report_desc_length); length = min_t(unsigned short, length, @@ -1210,8 +1210,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f) * We can use hidg_desc struct here but we should not relay * that its content won't change after returning from this function. */ - hidg_desc.desc[0].bDescriptorType = HID_DT_REPORT; - hidg_desc.desc[0].wDescriptorLength = + hidg_desc.rpt_desc.bDescriptorType = HID_DT_REPORT; + hidg_desc.rpt_desc.wDescriptorLength = cpu_to_le16(hidg->report_desc_length); hidg_hs_in_ep_desc.bEndpointAddress = diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 12e866fb311d63..0a800ba53816a8 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -475,7 +475,7 @@ static void reply_ump_stream_ep_info(struct f_midi2_ep *ep) /* reply a UMP EP device info */ static void reply_ump_stream_ep_device(struct f_midi2_ep *ep) { - struct snd_ump_stream_msg_devince_info rep = { + struct snd_ump_stream_msg_device_info rep = { .type = UMP_MSG_TYPE_STREAM, .status = UMP_STREAM_MSG_STATUS_DEVICE_INFO, .manufacture_id = ep->info.manufacturer, diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 4b3d5075621aa0..d709e24c1fd422 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1570,7 +1570,7 @@ static int gadget_match_driver(struct device *dev, const struct device_driver *d { struct usb_gadget *gadget = dev_to_usb_gadget(dev); struct usb_udc *udc = gadget->udc; - struct usb_gadget_driver *driver = container_of(drv, + const struct usb_gadget_driver *driver = container_of(drv, struct usb_gadget_driver, driver); /* If the driver specifies a udc_name, it must match the UDC's name */ diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index c7fdbc55fb0b97..2957316fd3d003 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -1749,6 +1749,10 @@ static int __tegra_xudc_ep_disable(struct tegra_xudc_ep *ep) val = xudc_readl(xudc, CTRL); val &= ~CTRL_RUN; xudc_writel(xudc, val, CTRL); + + val = xudc_readl(xudc, ST); + if (val & ST_RC) + xudc_writel(xudc, ST_RC, ST); } dev_info(xudc->dev, "ep %u disabled\n", ep->index); diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 900ea0d368e034..9f0a6b27e47cb6 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -165,6 +165,25 @@ static int ohci_quirk_amd700(struct usb_hcd *hcd) return 0; } +static int ohci_quirk_loongson(struct usb_hcd *hcd) +{ + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + + /* + * Loongson's LS7A OHCI controller (rev 0x02) has a + * flaw. MMIO register with offset 0x60/64 is treated + * as legacy PS2-compatible keyboard/mouse interface. + * Since OHCI only use 4KB BAR resource, LS7A OHCI's + * 32KB BAR is wrapped around (the 2nd 4KB BAR space + * is the same as the 1st 4KB internally). So add 4KB + * offset (0x1000) to the OHCI registers as a quirk. + */ + if (pdev->revision == 0x2) + hcd->regs += SZ_4K; /* SZ_4K = 0x1000 */ + + return 0; +} + static int ohci_quirk_qemu(struct usb_hcd *hcd) { struct ohci_hcd *ohci = hcd_to_ohci(hcd); @@ -224,6 +243,10 @@ static const struct pci_device_id ohci_pci_quirks[] = { PCI_DEVICE(PCI_VENDOR_ID_ATI, 0x4399), .driver_data = (unsigned long)ohci_quirk_amd700, }, + { + PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, 0x7a24), + .driver_data = (unsigned long)ohci_quirk_loongson, + }, { .vendor = PCI_VENDOR_ID_APPLE, .device = 0x003f, diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c index a7c934404ebc7e..62318291f5664c 100644 --- a/drivers/usb/host/uhci-platform.c +++ b/drivers/usb/host/uhci-platform.c @@ -121,7 +121,7 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev) } /* Get and enable clock if any specified */ - uhci->clk = devm_clk_get(&pdev->dev, NULL); + uhci->clk = devm_clk_get_optional(&pdev->dev, NULL); if (IS_ERR(uhci->clk)) { ret = PTR_ERR(uhci->clk); goto err_rmr; diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index fd7895b24367db..0d4ce5734165ed 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -823,6 +823,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) { dma_addr_t deq; union xhci_trb *evt; + enum evtreturn ret = EVT_DONE; u32 ctrl, portsc; bool update_erdp = false; @@ -909,6 +910,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) break; case TRB_TYPE(TRB_TRANSFER): dbc_handle_xfer_event(dbc, evt); + ret = EVT_XFER_DONE; break; default: break; @@ -927,7 +929,7 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) lo_hi_writeq(deq, &dbc->regs->erdp); } - return EVT_DONE; + return ret; } static void xhci_dbc_handle_events(struct work_struct *work) @@ -936,6 +938,7 @@ static void xhci_dbc_handle_events(struct work_struct *work) struct xhci_dbc *dbc; unsigned long flags; unsigned int poll_interval; + unsigned long busypoll_timelimit; dbc = container_of(to_delayed_work(work), struct xhci_dbc, event_work); poll_interval = dbc->poll_interval; @@ -954,11 +957,21 @@ static void xhci_dbc_handle_events(struct work_struct *work) dbc->driver->disconnect(dbc); break; case EVT_DONE: - /* set fast poll rate if there are pending data transfers */ + /* + * Set fast poll rate if there are pending out transfers, or + * a transfer was recently processed + */ + busypoll_timelimit = dbc->xfer_timestamp + + msecs_to_jiffies(DBC_XFER_INACTIVITY_TIMEOUT); + if (!list_empty(&dbc->eps[BULK_OUT].list_pending) || - !list_empty(&dbc->eps[BULK_IN].list_pending)) + time_is_after_jiffies(busypoll_timelimit)) poll_interval = 0; break; + case EVT_XFER_DONE: + dbc->xfer_timestamp = jiffies; + poll_interval = 0; + break; default: dev_info(dbc->dev, "stop handling dbc events\n"); return; diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h index 9dc8f4d8077cc4..47ac72c2286d9a 100644 --- a/drivers/usb/host/xhci-dbgcap.h +++ b/drivers/usb/host/xhci-dbgcap.h @@ -96,6 +96,7 @@ struct dbc_ep { #define DBC_WRITE_BUF_SIZE 8192 #define DBC_POLL_INTERVAL_DEFAULT 64 /* milliseconds */ #define DBC_POLL_INTERVAL_MAX 5000 /* milliseconds */ +#define DBC_XFER_INACTIVITY_TIMEOUT 10 /* milliseconds */ /* * Private structure for DbC hardware state: */ @@ -142,6 +143,7 @@ struct xhci_dbc { enum dbc_state state; struct delayed_work event_work; unsigned int poll_interval; /* ms */ + unsigned long xfer_timestamp; unsigned resume_required:1; struct dbc_ep eps[2]; @@ -187,6 +189,7 @@ struct dbc_request { enum evtreturn { EVT_ERR = -1, EVT_DONE, + EVT_XFER_DONE, EVT_GSER, EVT_DISC, }; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c0f226584a408d..486347776cb29a 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1878,9 +1878,10 @@ int xhci_bus_resume(struct usb_hcd *hcd) int max_ports, port_index; int sret; u32 next_state; - u32 temp, portsc; + u32 portsc; struct xhci_hub *rhub; struct xhci_port **ports; + bool disabled_irq = false; rhub = xhci_get_rhub(hcd); ports = rhub->ports; @@ -1896,17 +1897,20 @@ int xhci_bus_resume(struct usb_hcd *hcd) return -ESHUTDOWN; } - /* delay the irqs */ - temp = readl(&xhci->op_regs->command); - temp &= ~CMD_EIE; - writel(temp, &xhci->op_regs->command); - /* bus specific resume for ports we suspended at bus_suspend */ - if (hcd->speed >= HCD_USB3) + if (hcd->speed >= HCD_USB3) { next_state = XDEV_U0; - else + } else { next_state = XDEV_RESUME; - + if (bus_state->bus_suspended) { + /* + * prevent port event interrupts from interfering + * with usb2 port resume process + */ + xhci_disable_interrupter(xhci->interrupters[0]); + disabled_irq = true; + } + } port_index = max_ports; while (port_index--) { portsc = readl(ports[port_index]->addr); @@ -1974,11 +1978,9 @@ int xhci_bus_resume(struct usb_hcd *hcd) (void) readl(&xhci->op_regs->command); bus_state->next_statechange = jiffies + msecs_to_jiffies(5); - /* re-enable irqs */ - temp = readl(&xhci->op_regs->command); - temp |= CMD_EIE; - writel(temp, &xhci->op_regs->command); - temp = readl(&xhci->op_regs->command); + /* re-enable interrupter */ + if (disabled_irq) + xhci_enable_interrupter(xhci->interrupters[0]); spin_unlock_irqrestore(&xhci->lock, flags); return 0; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 5d64c297721cd9..423bf36495705e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -561,8 +561,8 @@ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, * pointer command pending because the device can choose to start any * stream once the endpoint is on the HW schedule. */ - if (ep_state & (EP_STOP_CMD_PENDING | SET_DEQ_PENDING | EP_HALTED | - EP_CLEARING_TT | EP_STALLED)) + if ((ep_state & EP_STOP_CMD_PENDING) || (ep_state & SET_DEQ_PENDING) || + (ep_state & EP_HALTED) || (ep_state & EP_CLEARING_TT)) return; trace_xhci_ring_ep_doorbell(slot_id, DB_VALUE(ep_index, stream_id)); @@ -699,7 +699,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, int new_cycle; dma_addr_t addr; u64 hw_dequeue; - bool cycle_found = false; + bool hw_dequeue_found = false; bool td_last_trb_found = false; u32 trb_sct = 0; int ret; @@ -715,25 +715,24 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id); new_seg = ep_ring->deq_seg; new_deq = ep_ring->dequeue; - new_cycle = hw_dequeue & 0x1; + new_cycle = le32_to_cpu(td->end_trb->generic.field[3]) & TRB_CYCLE; /* - * We want to find the pointer, segment and cycle state of the new trb - * (the one after current TD's end_trb). We know the cycle state at - * hw_dequeue, so walk the ring until both hw_dequeue and end_trb are - * found. + * Walk the ring until both the next TRB and hw_dequeue are found (don't + * move hw_dequeue back if it went forward due to a HW bug). Cycle state + * is loaded from a known good TRB, track later toggles to maintain it. */ do { - if (!cycle_found && xhci_trb_virt_to_dma(new_seg, new_deq) + if (!hw_dequeue_found && xhci_trb_virt_to_dma(new_seg, new_deq) == (dma_addr_t)(hw_dequeue & ~0xf)) { - cycle_found = true; + hw_dequeue_found = true; if (td_last_trb_found) break; } if (new_deq == td->end_trb) td_last_trb_found = true; - if (cycle_found && trb_is_link(new_deq) && + if (td_last_trb_found && trb_is_link(new_deq) && link_trb_toggles_cycle(new_deq)) new_cycle ^= 0x1; @@ -745,7 +744,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, return -EINVAL; } - } while (!cycle_found || !td_last_trb_found); + } while (!hw_dequeue_found || !td_last_trb_found); /* Don't update the ring cycle state for the producer (us). */ addr = xhci_trb_virt_to_dma(new_seg, new_deq); @@ -2573,9 +2572,6 @@ static void process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET); return; - case COMP_STALL_ERROR: - ep->ep_state |= EP_STALLED; - break; default: /* do nothing */ break; @@ -2916,7 +2912,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (xhci_spurious_success_tx_event(xhci, ep_ring)) { xhci_dbg(xhci, "Spurious event dma %pad, comp_code %u after %u\n", &ep_trb_dma, trb_comp_code, ep_ring->old_trb_comp_code); - ep_ring->old_trb_comp_code = trb_comp_code; + ep_ring->old_trb_comp_code = 0; return 0; } @@ -3780,7 +3776,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, * enqueue a No Op TRB, this can prevent the Setup and Data Stage * TRB to be breaked by the Link TRB. */ - if (trb_is_link(ep_ring->enqueue + 1)) { + if (last_trb_on_seg(ep_ring->enq_seg, ep_ring->enqueue + 1)) { field = TRB_TYPE(TRB_TR_NOOP) | ep_ring->cycle_state; queue_trb(xhci, ep_ring, false, 0, 0, TRB_INTR_TARGET(0), field); diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index b5c362c2051d70..0c7af44d4dae50 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1364,6 +1364,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra->otg_usb3_port = tegra_xusb_padctl_get_usb3_companion(tegra->padctl, tegra->otg_usb2_port); + pm_runtime_get_sync(tegra->dev); if (tegra->host_mode) { /* switch to host mode */ if (tegra->otg_usb3_port >= 0) { @@ -1393,6 +1394,7 @@ static void tegra_xhci_id_work(struct work_struct *work) } tegra_xhci_set_port_power(tegra, true, true); + pm_runtime_mark_last_busy(tegra->dev); } else { if (tegra->otg_usb3_port >= 0) @@ -1400,6 +1402,7 @@ static void tegra_xhci_id_work(struct work_struct *work) tegra_xhci_set_port_power(tegra, true, false); } + pm_runtime_put_autosuspend(tegra->dev); } #if IS_ENABLED(CONFIG_PM) || IS_ENABLED(CONFIG_PM_SLEEP) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0452b8d65832b9..90eb491267b584 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -322,7 +322,7 @@ static void xhci_zero_64b_regs(struct xhci_hcd *xhci) xhci_info(xhci, "Fault detected\n"); } -static int xhci_enable_interrupter(struct xhci_interrupter *ir) +int xhci_enable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -335,7 +335,7 @@ static int xhci_enable_interrupter(struct xhci_interrupter *ir) return 0; } -static int xhci_disable_interrupter(struct xhci_interrupter *ir) +int xhci_disable_interrupter(struct xhci_interrupter *ir) { u32 iman; @@ -1605,11 +1605,6 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto free_priv; } - /* Class driver might not be aware ep halted due to async URB giveback */ - if (*ep_state & EP_STALLED) - dev_dbg(&urb->dev->dev, "URB %p queued before clearing halt\n", - urb); - switch (usb_endpoint_type(&urb->ep->desc)) { case USB_ENDPOINT_XFER_CONTROL: @@ -1770,8 +1765,8 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) goto done; } - /* In these cases no commands are pending but the endpoint is stopped */ - if (ep->ep_state & (EP_CLEARING_TT | EP_STALLED)) { + /* In this case no commands are pending but the endpoint is stopped */ + if (ep->ep_state & EP_CLEARING_TT) { /* and cancelled TDs can be given back right away */ xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n", urb->dev->slot_id, ep_index, ep->ep_state); @@ -3209,11 +3204,8 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, ep = &vdev->eps[ep_index]; - spin_lock_irqsave(&xhci->lock, flags); - - ep->ep_state &= ~EP_STALLED; - /* Bail out if toggle is already being cleared by a endpoint reset */ + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { ep->ep_state &= ~EP_HARD_CLEAR_TOGGLE; spin_unlock_irqrestore(&xhci->lock, flags); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 37860f1e3abac4..242ab9fbc8ae6b 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -664,7 +664,7 @@ struct xhci_virt_ep { unsigned int err_count; unsigned int ep_state; #define SET_DEQ_PENDING (1 << 0) -#define EP_HALTED (1 << 1) /* Halted host ep handling */ +#define EP_HALTED (1 << 1) /* For stall handling */ #define EP_STOP_CMD_PENDING (1 << 2) /* For URB cancellation */ /* Transitioning the endpoint to using streams, don't enqueue URBs */ #define EP_GETTING_STREAMS (1 << 3) @@ -675,7 +675,6 @@ struct xhci_virt_ep { #define EP_SOFT_CLEAR_TOGGLE (1 << 7) /* usb_hub_clear_tt_buffer is in progress */ #define EP_CLEARING_TT (1 << 8) -#define EP_STALLED (1 << 9) /* For stall handling */ /* ---- Related to URB cancellation ---- */ struct list_head cancelled_td_list; struct xhci_hcd *xhci; @@ -1891,6 +1890,8 @@ int xhci_alloc_tt_info(struct xhci_hcd *xhci, struct usb_tt *tt, gfp_t mem_flags); int xhci_set_interrupter_moderation(struct xhci_interrupter *ir, u32 imod_interval); +int xhci_enable_interrupter(struct xhci_interrupter *ir); +int xhci_disable_interrupter(struct xhci_interrupter *ir); /* xHCI ring, segment, TRB, and TD functions */ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb); diff --git a/drivers/usb/misc/onboard_usb_dev.c b/drivers/usb/misc/onboard_usb_dev.c index 75ac3c6aa92d0d..86f25bcb64253c 100644 --- a/drivers/usb/misc/onboard_usb_dev.c +++ b/drivers/usb/misc/onboard_usb_dev.c @@ -36,9 +36,10 @@ #define USB5744_CMD_CREG_ACCESS 0x99 #define USB5744_CMD_CREG_ACCESS_LSB 0x37 #define USB5744_CREG_MEM_ADDR 0x00 +#define USB5744_CREG_MEM_RD_ADDR 0x04 #define USB5744_CREG_WRITE 0x00 -#define USB5744_CREG_RUNTIMEFLAGS2 0x41 -#define USB5744_CREG_RUNTIMEFLAGS2_LSB 0x1D +#define USB5744_CREG_READ 0x01 +#define USB5744_CREG_RUNTIMEFLAGS2 0x411D #define USB5744_CREG_BYPASS_UDC_SUSPEND BIT(3) static void onboard_dev_attach_usb_driver(struct work_struct *work); @@ -309,11 +310,88 @@ static void onboard_dev_attach_usb_driver(struct work_struct *work) pr_err("Failed to attach USB driver: %pe\n", ERR_PTR(err)); } +#if IS_ENABLED(CONFIG_USB_ONBOARD_DEV_USB5744) +static int onboard_dev_5744_i2c_read_byte(struct i2c_client *client, u16 addr, u8 *data) +{ + struct i2c_msg msg[2]; + u8 rd_buf[3]; + int ret; + + u8 wr_buf[7] = {0, USB5744_CREG_MEM_ADDR, 4, + USB5744_CREG_READ, 1, + addr >> 8 & 0xff, + addr & 0xff}; + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].len = sizeof(wr_buf); + msg[0].buf = wr_buf; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + wr_buf[0] = USB5744_CMD_CREG_ACCESS; + wr_buf[1] = USB5744_CMD_CREG_ACCESS_LSB; + wr_buf[2] = 0; + msg[0].len = 3; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + wr_buf[0] = 0; + wr_buf[1] = USB5744_CREG_MEM_RD_ADDR; + msg[0].len = 2; + + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].len = 2; + msg[1].buf = rd_buf; + + ret = i2c_transfer(client->adapter, msg, 2); + if (ret < 0) + return ret; + *data = rd_buf[1]; + + return 0; +} + +static int onboard_dev_5744_i2c_write_byte(struct i2c_client *client, u16 addr, u8 data) +{ + struct i2c_msg msg[2]; + int ret; + + u8 wr_buf[8] = {0, USB5744_CREG_MEM_ADDR, 5, + USB5744_CREG_WRITE, 1, + addr >> 8 & 0xff, + addr & 0xff, + data}; + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].len = sizeof(wr_buf); + msg[0].buf = wr_buf; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + msg[0].len = 3; + wr_buf[0] = USB5744_CMD_CREG_ACCESS; + wr_buf[1] = USB5744_CMD_CREG_ACCESS_LSB; + wr_buf[2] = 0; + + ret = i2c_transfer(client->adapter, msg, 1); + if (ret < 0) + return ret; + + return 0; +} + static int onboard_dev_5744_i2c_init(struct i2c_client *client) { -#if IS_ENABLED(CONFIG_USB_ONBOARD_DEV_USB5744) struct device *dev = &client->dev; int ret; + u8 reg; /* * Set BYPASS_UDC_SUSPEND bit to ensure MCU is always enabled @@ -321,20 +399,16 @@ static int onboard_dev_5744_i2c_init(struct i2c_client *client) * The command writes 5 bytes to memory and single data byte in * configuration register. */ - char wr_buf[7] = {USB5744_CREG_MEM_ADDR, 5, - USB5744_CREG_WRITE, 1, - USB5744_CREG_RUNTIMEFLAGS2, - USB5744_CREG_RUNTIMEFLAGS2_LSB, - USB5744_CREG_BYPASS_UDC_SUSPEND}; - - ret = i2c_smbus_write_block_data(client, 0, sizeof(wr_buf), wr_buf); + ret = onboard_dev_5744_i2c_read_byte(client, + USB5744_CREG_RUNTIMEFLAGS2, ®); if (ret) - return dev_err_probe(dev, ret, "BYPASS_UDC_SUSPEND bit configuration failed\n"); + return dev_err_probe(dev, ret, "CREG_RUNTIMEFLAGS2 read failed\n"); - ret = i2c_smbus_write_word_data(client, USB5744_CMD_CREG_ACCESS, - USB5744_CMD_CREG_ACCESS_LSB); + reg |= USB5744_CREG_BYPASS_UDC_SUSPEND; + ret = onboard_dev_5744_i2c_write_byte(client, + USB5744_CREG_RUNTIMEFLAGS2, reg); if (ret) - return dev_err_probe(dev, ret, "Configuration Register Access Command failed\n"); + return dev_err_probe(dev, ret, "BYPASS_UDC_SUSPEND bit configuration failed\n"); /* Send SMBus command to boot hub. */ ret = i2c_smbus_write_word_data(client, USB5744_CMD_ATTACH, @@ -343,10 +417,13 @@ static int onboard_dev_5744_i2c_init(struct i2c_client *client) return dev_err_probe(dev, ret, "USB Attach with SMBus command failed\n"); return ret; +} #else +static int onboard_dev_5744_i2c_init(struct i2c_client *client) +{ return -ENODEV; -#endif } +#endif static int onboard_dev_probe(struct platform_device *pdev) { @@ -569,8 +646,14 @@ static void onboard_dev_usbdev_disconnect(struct usb_device *udev) } static const struct usb_device_id onboard_dev_id_table[] = { - { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6504) }, /* CYUSB33{0,1,2}x/CYUSB230x 3.0 HUB */ - { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6506) }, /* CYUSB33{0,1,2}x/CYUSB230x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6500) }, /* CYUSB330x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6502) }, /* CYUSB330x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6503) }, /* CYUSB33{0,1}x 2.0 HUB, Vendor Mode */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6504) }, /* CYUSB331x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6506) }, /* CYUSB331x 2.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6507) }, /* CYUSB332x 2.0 HUB, Vendor Mode */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6508) }, /* CYUSB332x 3.0 HUB */ + { USB_DEVICE(VENDOR_ID_CYPRESS, 0x650a) }, /* CYUSB332x 2.0 HUB */ { USB_DEVICE(VENDOR_ID_CYPRESS, 0x6570) }, /* CY7C6563x 2.0 HUB */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 HUB */ { USB_DEVICE(VENDOR_ID_GENESYS, 0x0610) }, /* Genesys Logic GL852G USB 2.0 HUB */ diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 4b35ef216125c7..16692e72b73650 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -685,10 +685,29 @@ static int usbhs_probe(struct platform_device *pdev) INIT_DELAYED_WORK(&priv->notify_hotplug_work, usbhsc_notify_hotplug); spin_lock_init(usbhs_priv_to_lock(priv)); + /* + * Acquire clocks and enable power management (PM) early in the + * probe process, as the driver accesses registers during + * initialization. Ensure the device is active before proceeding. + */ + pm_runtime_enable(dev); + + ret = usbhsc_clk_get(dev, priv); + if (ret) + goto probe_pm_disable; + + ret = pm_runtime_resume_and_get(dev); + if (ret) + goto probe_clk_put; + + ret = usbhsc_clk_prepare_enable(priv); + if (ret) + goto probe_pm_put; + /* call pipe and module init */ ret = usbhs_pipe_probe(priv); if (ret < 0) - return ret; + goto probe_clk_dis_unprepare; ret = usbhs_fifo_probe(priv); if (ret < 0) @@ -705,10 +724,6 @@ static int usbhs_probe(struct platform_device *pdev) if (ret) goto probe_fail_rst; - ret = usbhsc_clk_get(dev, priv); - if (ret) - goto probe_fail_clks; - /* * deviece reset here because * USB device might be used in boot loader. @@ -721,7 +736,7 @@ static int usbhs_probe(struct platform_device *pdev) if (ret) { dev_warn(dev, "USB function not selected (GPIO)\n"); ret = -ENOTSUPP; - goto probe_end_mod_exit; + goto probe_assert_rest; } } @@ -735,14 +750,19 @@ static int usbhs_probe(struct platform_device *pdev) ret = usbhs_platform_call(priv, hardware_init, pdev); if (ret < 0) { dev_err(dev, "platform init failed.\n"); - goto probe_end_mod_exit; + goto probe_assert_rest; } /* reset phy for connection */ usbhs_platform_call(priv, phy_reset, pdev); - /* power control */ - pm_runtime_enable(dev); + /* + * Disable the clocks that were enabled earlier in the probe path, + * and let the driver handle the clocks beyond this point. + */ + usbhsc_clk_disable_unprepare(priv); + pm_runtime_put(dev); + if (!usbhs_get_dparam(priv, runtime_pwctrl)) { usbhsc_power_ctrl(priv, 1); usbhs_mod_autonomy_mode(priv); @@ -759,9 +779,7 @@ static int usbhs_probe(struct platform_device *pdev) return ret; -probe_end_mod_exit: - usbhsc_clk_put(priv); -probe_fail_clks: +probe_assert_rest: reset_control_assert(priv->rsts); probe_fail_rst: usbhs_mod_remove(priv); @@ -769,6 +787,14 @@ static int usbhs_probe(struct platform_device *pdev) usbhs_fifo_remove(priv); probe_end_pipe_exit: usbhs_pipe_remove(priv); +probe_clk_dis_unprepare: + usbhsc_clk_disable_unprepare(priv); +probe_pm_put: + pm_runtime_put(dev); +probe_clk_put: + usbhsc_clk_put(priv); +probe_pm_disable: + pm_runtime_disable(dev); dev_info(dev, "probe failed (%d)\n", ret); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 9b34e23b70919f..6ac7a0a5cf074e 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1093,6 +1093,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 1) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 2) }, { USB_DEVICE_INTERFACE_NUMBER(ALTERA_VID, ALTERA_UB3_602E_PID, 3) }, + /* Abacus Electrics */ + { USB_DEVICE(FTDI_VID, ABACUS_OPTICAL_PROBE_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 52be47d684ea66..9acb6f83732763 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -442,6 +442,11 @@ #define LINX_FUTURE_1_PID 0xF44B /* Linx future device */ #define LINX_FUTURE_2_PID 0xF44C /* Linx future device */ +/* + * Abacus Electrics + */ +#define ABACUS_OPTICAL_PROBE_PID 0xf458 /* ABACUS ELECTRICS Optical Probe */ + /* * Oceanic product ids */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5cd26dac2069fa..27879cc575365c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -611,6 +611,7 @@ static void option_instat_callback(struct urb *urb); /* Sierra Wireless products */ #define SIERRA_VENDOR_ID 0x1199 #define SIERRA_PRODUCT_EM9191 0x90d3 +#define SIERRA_PRODUCT_EM9291 0x90e3 /* UNISOC (Spreadtrum) products */ #define UNISOC_VENDOR_ID 0x1782 @@ -2432,6 +2433,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 010688dd9e49ce..22579d0d8ab8aa 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -458,6 +458,8 @@ static int pl2303_detect_type(struct usb_serial *serial) case 0x605: case 0x700: /* GR */ case 0x705: + case 0x905: /* GT-2AB */ + case 0x1005: /* GC-Q20 */ return TYPE_HXN; } break; diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 2c12449ff60c51..a0afaf254d1229 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -100,6 +100,11 @@ DEVICE(nokia, NOKIA_IDS); { USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */ DEVICE_N(novatel_gps, NOVATEL_IDS, 3); +/* OWON electronic test and measurement equipment driver */ +#define OWON_IDS() \ + { USB_DEVICE(0x5345, 0x1234) } /* HDS200 oscilloscopes and others */ +DEVICE(owon, OWON_IDS); + /* Siemens USB/MPI adapter */ #define SIEMENS_IDS() \ { USB_DEVICE(0x908, 0x0004) } @@ -134,6 +139,7 @@ static struct usb_serial_driver * const serial_drivers[] = { &motorola_tetra_device, &nokia_device, &novatel_gps_device, + &owon_device, &siemens_mpi_device, &suunto_device, &vivopay_device, @@ -153,6 +159,7 @@ static const struct usb_device_id id_table[] = { MOTOROLA_TETRA_IDS(), NOKIA_IDS(), NOVATEL_IDS(), + OWON_IDS(), SIEMENS_IDS(), SUUNTO_IDS(), VIVOPAY_IDS(), diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 1f8c9b16a0fb85..1477e31d776327 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -52,6 +52,13 @@ UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), +/* Reported-by: Zhihong Zhou */ +UNUSUAL_DEV(0x0781, 0x55e8, 0x0000, 0x9999, + "SanDisk", + "", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* Reported-by: Hongling Zeng */ UNUSUAL_DEV(0x090c, 0x2000, 0x0000, 0x9999, "Hiksemi", @@ -83,6 +90,13 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_LUNS), +/* Reported-by: Oliver Neukum */ +UNUSUAL_DEV(0x125f, 0xa94a, 0x0160, 0x0160, + "ADATA", + "Portable HDD CH94", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Benjamin Tissoires */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c index ae90688d23e400..a884cec9ab7e88 100644 --- a/drivers/usb/typec/bus.c +++ b/drivers/usb/typec/bus.c @@ -449,7 +449,7 @@ ATTRIBUTE_GROUPS(typec); static int typec_match(struct device *dev, const struct device_driver *driver) { - struct typec_altmode_driver *drv = to_altmode_driver(driver); + const struct typec_altmode_driver *drv = to_altmode_driver(driver); struct typec_altmode *altmode = to_typec_altmode(dev); const struct typec_device_id *id; diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 9c76c3d0c6cff9..67a533e3515064 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -1052,9 +1052,11 @@ struct typec_partner *typec_register_partner(struct typec_port *port, partner->usb_mode = USB_MODE_USB3; } + mutex_lock(&port->partner_link_lock); ret = device_register(&partner->dev); if (ret) { dev_err(&port->dev, "failed to register partner (%d)\n", ret); + mutex_unlock(&port->partner_link_lock); put_device(&partner->dev); return ERR_PTR(ret); } @@ -1063,6 +1065,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port, typec_partner_link_device(partner, port->usb2_dev); if (port->usb3_dev) typec_partner_link_device(partner, port->usb3_dev); + mutex_unlock(&port->partner_link_lock); return partner; } @@ -1083,12 +1086,18 @@ void typec_unregister_partner(struct typec_partner *partner) port = to_typec_port(partner->dev.parent); - if (port->usb2_dev) + mutex_lock(&port->partner_link_lock); + if (port->usb2_dev) { typec_partner_unlink_device(partner, port->usb2_dev); - if (port->usb3_dev) + port->usb2_dev = NULL; + } + if (port->usb3_dev) { typec_partner_unlink_device(partner, port->usb3_dev); + port->usb3_dev = NULL; + } device_unregister(&partner->dev); + mutex_unlock(&port->partner_link_lock); } EXPORT_SYMBOL_GPL(typec_unregister_partner); @@ -2041,10 +2050,11 @@ static struct typec_partner *typec_get_partner(struct typec_port *port) static void typec_partner_attach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; struct usb_device *udev = to_usb_device(dev); enum usb_mode usb_mode; + mutex_lock(&port->partner_link_lock); if (udev->speed < USB_SPEED_SUPER) { usb_mode = USB_MODE_USB2; port->usb2_dev = dev; @@ -2053,18 +2063,22 @@ static void typec_partner_attach(struct typec_connector *con, struct device *dev port->usb3_dev = dev; } + partner = typec_get_partner(port); if (partner) { typec_partner_set_usb_mode(partner, usb_mode); typec_partner_link_device(partner, dev); put_device(&partner->dev); } + mutex_unlock(&port->partner_link_lock); } static void typec_partner_deattach(struct typec_connector *con, struct device *dev) { struct typec_port *port = container_of(con, struct typec_port, con); - struct typec_partner *partner = typec_get_partner(port); + struct typec_partner *partner; + mutex_lock(&port->partner_link_lock); + partner = typec_get_partner(port); if (partner) { typec_partner_unlink_device(partner, dev); put_device(&partner->dev); @@ -2074,6 +2088,7 @@ static void typec_partner_deattach(struct typec_connector *con, struct device *d port->usb2_dev = NULL; else if (port->usb3_dev == dev) port->usb3_dev = NULL; + mutex_unlock(&port->partner_link_lock); } /** @@ -2614,6 +2629,7 @@ struct typec_port *typec_register_port(struct device *parent, ida_init(&port->mode_ids); mutex_init(&port->port_type_lock); + mutex_init(&port->partner_link_lock); port->id = id; port->ops = cap->ops; diff --git a/drivers/usb/typec/class.h b/drivers/usb/typec/class.h index b3076a24ad2eee..db2fe96c48ff0f 100644 --- a/drivers/usb/typec/class.h +++ b/drivers/usb/typec/class.h @@ -59,6 +59,7 @@ struct typec_port { enum typec_port_type port_type; enum usb_mode usb_mode; struct mutex port_type_lock; + struct mutex partner_link_lock; enum typec_orientation orientation; struct typec_switch *sw; diff --git a/drivers/usb/typec/tcpm/tcpci_maxim_core.c b/drivers/usb/typec/tcpm/tcpci_maxim_core.c index fd1b8059336764..648311f5e3cf13 100644 --- a/drivers/usb/typec/tcpm/tcpci_maxim_core.c +++ b/drivers/usb/typec/tcpm/tcpci_maxim_core.c @@ -166,7 +166,8 @@ static void process_rx(struct max_tcpci_chip *chip, u16 status) return; } - if (count > sizeof(struct pd_message) || count + 1 > TCPC_RECEIVE_BUFFER_LEN) { + if (count > sizeof(struct pd_message) + 1 || + count + 1 > TCPC_RECEIVE_BUFFER_LEN) { dev_err(chip->dev, "Invalid TCPC_RX_BYTE_CNT %d\n", count); return; } diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index a99db4e025cd04..214d45f8e55c21 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -596,6 +596,15 @@ struct pd_rx_event { enum tcpm_transmit_type rx_sop_type; }; +struct altmode_vdm_event { + struct kthread_work work; + struct tcpm_port *port; + u32 header; + u32 *data; + int cnt; + enum tcpm_transmit_type tx_sop_type; +}; + static const char * const pd_rev[] = { [PD_REV10] = "rev1", [PD_REV20] = "rev2", @@ -1608,18 +1617,68 @@ static void tcpm_queue_vdm(struct tcpm_port *port, const u32 header, mod_vdm_delayed_work(port, 0); } -static void tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header, - const u32 *data, int cnt, enum tcpm_transmit_type tx_sop_type) +static void tcpm_queue_vdm_work(struct kthread_work *work) { - if (port->state != SRC_READY && port->state != SNK_READY && - port->state != SRC_VDM_IDENTITY_REQUEST) - return; + struct altmode_vdm_event *event = container_of(work, + struct altmode_vdm_event, + work); + struct tcpm_port *port = event->port; mutex_lock(&port->lock); - tcpm_queue_vdm(port, header, data, cnt, tx_sop_type); + if (port->state != SRC_READY && port->state != SNK_READY && + port->state != SRC_VDM_IDENTITY_REQUEST) { + tcpm_log_force(port, "dropping altmode_vdm_event"); + goto port_unlock; + } + + tcpm_queue_vdm(port, event->header, event->data, event->cnt, event->tx_sop_type); + +port_unlock: + kfree(event->data); + kfree(event); mutex_unlock(&port->lock); } +static int tcpm_queue_vdm_unlocked(struct tcpm_port *port, const u32 header, + const u32 *data, int cnt, enum tcpm_transmit_type tx_sop_type) +{ + struct altmode_vdm_event *event; + u32 *data_cpy; + int ret = -ENOMEM; + + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + goto err_event; + + data_cpy = kcalloc(cnt, sizeof(u32), GFP_KERNEL); + if (!data_cpy) + goto err_data; + + kthread_init_work(&event->work, tcpm_queue_vdm_work); + event->port = port; + event->header = header; + memcpy(data_cpy, data, sizeof(u32) * cnt); + event->data = data_cpy; + event->cnt = cnt; + event->tx_sop_type = tx_sop_type; + + ret = kthread_queue_work(port->wq, &event->work); + if (!ret) { + ret = -EBUSY; + goto err_queue; + } + + return 0; + +err_queue: + kfree(data_cpy); +err_data: + kfree(event); +err_event: + tcpm_log_force(port, "failed to queue altmode vdm, err:%d", ret); + return ret; +} + static void svdm_consume_identity(struct tcpm_port *port, const u32 *p, int cnt) { u32 vdo = p[VDO_INDEX_IDH]; @@ -2830,8 +2889,7 @@ static int tcpm_altmode_enter(struct typec_altmode *altmode, u32 *vdo) header = VDO(altmode->svid, vdo ? 2 : 1, svdm_version, CMD_ENTER_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP); - return 0; + return tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP); } static int tcpm_altmode_exit(struct typec_altmode *altmode) @@ -2847,8 +2905,7 @@ static int tcpm_altmode_exit(struct typec_altmode *altmode) header = VDO(altmode->svid, 1, svdm_version, CMD_EXIT_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP); - return 0; + return tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP); } static int tcpm_altmode_vdm(struct typec_altmode *altmode, @@ -2856,9 +2913,7 @@ static int tcpm_altmode_vdm(struct typec_altmode *altmode, { struct tcpm_port *port = typec_altmode_get_drvdata(altmode); - tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP); - - return 0; + return tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP); } static const struct typec_altmode_ops tcpm_altmode_ops = { @@ -2882,8 +2937,7 @@ static int tcpm_cable_altmode_enter(struct typec_altmode *altmode, enum typec_pl header = VDO(altmode->svid, vdo ? 2 : 1, svdm_version, CMD_ENTER_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP_PRIME); - return 0; + return tcpm_queue_vdm_unlocked(port, header, vdo, vdo ? 1 : 0, TCPC_TX_SOP_PRIME); } static int tcpm_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plug_index sop) @@ -2899,8 +2953,7 @@ static int tcpm_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plu header = VDO(altmode->svid, 1, svdm_version, CMD_EXIT_MODE); header |= VDO_OPOS(altmode->mode); - tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP_PRIME); - return 0; + return tcpm_queue_vdm_unlocked(port, header, NULL, 0, TCPC_TX_SOP_PRIME); } static int tcpm_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug_index sop, @@ -2908,9 +2961,7 @@ static int tcpm_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug { struct tcpm_port *port = typec_altmode_get_drvdata(altmode); - tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP_PRIME); - - return 0; + return tcpm_queue_vdm_unlocked(port, header, data, count - 1, TCPC_TX_SOP_PRIME); } static const struct typec_cable_ops tcpm_cable_ops = { @@ -5965,7 +6016,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, case SNK_TRY_WAIT_DEBOUNCE: if (!tcpm_port_is_sink(port)) { port->max_wait = 0; - tcpm_set_state(port, SRC_TRYWAIT, 0); + tcpm_set_state(port, SRC_TRYWAIT, PD_T_PD_DEBOUNCE); } break; case SRC_TRY_WAIT: diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 420af5139c70a3..8aae80b457d74d 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -54,7 +54,8 @@ static int ucsi_displayport_enter(struct typec_altmode *alt, u32 *vdo) u8 cur = 0; int ret; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override && dp->initialized) { const struct typec_altmode *p = typec_altmode_get_partner(alt); @@ -100,7 +101,7 @@ static int ucsi_displayport_enter(struct typec_altmode *alt, u32 *vdo) schedule_work(&dp->work); ret = 0; err_unlock: - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return ret; } @@ -112,7 +113,8 @@ static int ucsi_displayport_exit(struct typec_altmode *alt) u64 command; int ret = 0; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override) { const struct typec_altmode *p = typec_altmode_get_partner(alt); @@ -144,7 +146,7 @@ static int ucsi_displayport_exit(struct typec_altmode *alt) schedule_work(&dp->work); out_unlock: - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return ret; } @@ -202,20 +204,21 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, int cmd = PD_VDO_CMD(header); int svdm_version; - mutex_lock(&dp->con->lock); + if (!ucsi_con_mutex_lock(dp->con)) + return -ENOTCONN; if (!dp->override && dp->initialized) { const struct typec_altmode *p = typec_altmode_get_partner(alt); dev_warn(&p->dev, "firmware doesn't support alternate mode overriding\n"); - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return -EOPNOTSUPP; } svdm_version = typec_altmode_get_svdm_version(alt); if (svdm_version < 0) { - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return svdm_version; } @@ -259,7 +262,7 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, break; } - mutex_unlock(&dp->con->lock); + ucsi_con_mutex_unlock(dp->con); return 0; } @@ -296,6 +299,8 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt) if (!dp) return; + cancel_work_sync(&dp->work); + dp->data.conf = 0; dp->data.status = 0; dp->initialized = false; diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index e8c7e9dc49309c..01ce858a1a2b34 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1922,6 +1922,40 @@ void ucsi_set_drvdata(struct ucsi *ucsi, void *data) } EXPORT_SYMBOL_GPL(ucsi_set_drvdata); +/** + * ucsi_con_mutex_lock - Acquire the connector mutex + * @con: The connector interface to lock + * + * Returns true on success, false if the connector is disconnected + */ +bool ucsi_con_mutex_lock(struct ucsi_connector *con) +{ + bool mutex_locked = false; + bool connected = true; + + while (connected && !mutex_locked) { + mutex_locked = mutex_trylock(&con->lock) != 0; + connected = UCSI_CONSTAT(con, CONNECTED); + if (connected && !mutex_locked) + msleep(20); + } + + connected = connected && con->partner; + if (!connected && mutex_locked) + mutex_unlock(&con->lock); + + return connected; +} + +/** + * ucsi_con_mutex_unlock - Release the connector mutex + * @con: The connector interface to unlock + */ +void ucsi_con_mutex_unlock(struct ucsi_connector *con) +{ + mutex_unlock(&con->lock); +} + /** * ucsi_create - Allocate UCSI instance * @dev: Device interface to the PPM (Platform Policy Manager) diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 3a2c1762bec1bf..70910232a05d74 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -94,6 +94,8 @@ int ucsi_register(struct ucsi *ucsi); void ucsi_unregister(struct ucsi *ucsi); void *ucsi_get_drvdata(struct ucsi *ucsi); void ucsi_set_drvdata(struct ucsi *ucsi, void *data); +bool ucsi_con_mutex_lock(struct ucsi_connector *con); +void ucsi_con_mutex_unlock(struct ucsi_connector *con); void ucsi_connector_change(struct ucsi *ucsi, u8 num); @@ -432,7 +434,7 @@ struct ucsi_debugfs_entry { u64 low; u64 high; } response; - u32 status; + int status; struct dentry *dentry; }; diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 451c639299eb3b..d12a350440d3ca 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -350,6 +350,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm) return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0); } +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev) +{ + switch (vf_data->acc_magic) { + case ACC_DEV_MAGIC_V2: + if (vf_data->major_ver != ACC_DRV_MAJOR_VER) { + dev_info(dev, "migration driver version<%u.%u> not match!\n", + vf_data->major_ver, vf_data->minor_ver); + return -EINVAL; + } + break; + case ACC_DEV_MAGIC_V1: + /* Correct dma address */ + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH]; + vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET; + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW]; + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH]; + vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET; + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW]; + break; + default: + return -EINVAL; + } + + return 0; +} + static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, struct hisi_acc_vf_migration_file *migf) { @@ -363,7 +389,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done) return 0; - if (vf_data->acc_magic != ACC_DEV_MAGIC) { + ret = vf_qm_version_check(vf_data, dev); + if (ret) { dev_err(dev, "failed to match ACC_DEV_MAGIC\n"); return -EINVAL; } @@ -399,13 +426,6 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, return -EINVAL; } - ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1); - if (ret) { - dev_err(dev, "failed to write QM_VF_STATE\n"); - return ret; - } - - hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; hisi_acc_vdev->match_done = true; return 0; } @@ -418,7 +438,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, int vf_id = hisi_acc_vdev->vf_id; int ret; - vf_data->acc_magic = ACC_DEV_MAGIC; + vf_data->acc_magic = ACC_DEV_MAGIC_V2; + vf_data->major_ver = ACC_DRV_MAJOR_VER; + vf_data->minor_ver = ACC_DRV_MINOR_VER; /* Save device id */ vf_data->dev_id = hisi_acc_vdev->vf_dev->device; @@ -441,6 +463,19 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, return 0; } +static void vf_qm_xeqc_save(struct hisi_qm *qm, + struct hisi_acc_vf_migration_file *migf) +{ + struct acc_vf_data *vf_data = &migf->vf_data; + u16 eq_head, aeq_head; + + eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF; + qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0); + + aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF; + qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0); +} + static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, struct hisi_acc_vf_migration_file *migf) { @@ -456,6 +491,20 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev, if (migf->total_length < sizeof(struct acc_vf_data)) return -EINVAL; + if (!vf_data->eqe_dma || !vf_data->aeqe_dma || + !vf_data->sqc_dma || !vf_data->cqc_dma) { + dev_info(dev, "resume dma addr is NULL!\n"); + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; + return 0; + } + + ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1); + if (ret) { + dev_err(dev, "failed to write QM_VF_STATE\n"); + return -EINVAL; + } + hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; + qm->eqe_dma = vf_data->eqe_dma; qm->aeqe_dma = vf_data->aeqe_dma; qm->sqc_dma = vf_data->sqc_dma; @@ -496,12 +545,12 @@ static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data) return -EINVAL; /* Every reg is 32 bit, the dma address is 64 bit. */ - vf_data->eqe_dma = vf_data->qm_eqc_dw[1]; + vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH]; vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET; - vf_data->eqe_dma |= vf_data->qm_eqc_dw[0]; - vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1]; + vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW]; + vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH]; vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET; - vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0]; + vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW]; /* Through SQC_BT/CQC_BT to get sqc and cqc address */ ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma); @@ -524,7 +573,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, { struct acc_vf_data *vf_data = &migf->vf_data; struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; - struct device *dev = &vf_qm->pdev->dev; int ret; if (unlikely(qm_wait_dev_not_ready(vf_qm))) { @@ -538,17 +586,14 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, vf_data->vf_qm_state = QM_READY; hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; - ret = vf_qm_cache_wb(vf_qm); - if (ret) { - dev_err(dev, "failed to writeback QM Cache!\n"); - return ret; - } - ret = vf_qm_read_data(vf_qm, vf_data); if (ret) return -EINVAL; migf->total_length = sizeof(struct acc_vf_data); + /* Save eqc and aeqc interrupt information */ + vf_qm_xeqc_save(vf_qm, migf); + return 0; } @@ -967,6 +1012,13 @@ static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev dev_err(dev, "failed to check QM INT state!\n"); return ret; } + + ret = vf_qm_cache_wb(vf_qm); + if (ret) { + dev_err(dev, "failed to writeback QM cache!\n"); + return ret; + } + return 0; } @@ -1463,6 +1515,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev); struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm; + hisi_acc_vf_disable_fds(hisi_acc_vdev); mutex_lock(&hisi_acc_vdev->open_mutex); hisi_acc_vdev->dev_opened = false; iounmap(vf_qm->io_base); @@ -1485,6 +1538,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev) hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1; hisi_acc_vdev->pf_qm = pf_qm; hisi_acc_vdev->vf_dev = pdev; + hisi_acc_vdev->vf_qm_state = QM_NOT_READY; mutex_init(&hisi_acc_vdev->state_mutex); mutex_init(&hisi_acc_vdev->open_mutex); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h index 245d7537b2bcd4..91002ceeebc18a 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -39,6 +39,9 @@ #define QM_REG_ADDR_OFFSET 0x0004 #define QM_XQC_ADDR_OFFSET 32U +#define QM_XQC_ADDR_LOW 0x1 +#define QM_XQC_ADDR_HIGH 0x2 + #define QM_VF_AEQ_INT_MASK 0x0004 #define QM_VF_EQ_INT_MASK 0x000c #define QM_IFC_INT_SOURCE_V 0x0020 @@ -50,10 +53,15 @@ #define QM_EQC_DW0 0X8000 #define QM_AEQC_DW0 0X8020 +#define ACC_DRV_MAJOR_VER 1 +#define ACC_DRV_MINOR_VER 0 + +#define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC +#define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE + struct acc_vf_data { #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state) /* QM match information */ -#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC u64 acc_magic; u32 qp_num; u32 dev_id; @@ -61,7 +69,9 @@ struct acc_vf_data { u32 qp_base; u32 vf_qm_state; /* QM reserved match information */ - u32 qm_rsv_state[3]; + u16 major_ver; + u16 minor_ver; + u32 qm_rsv_state[2]; /* QM RW regs */ u32 aeq_int_mask; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 14437396d72118..8f02f236b5b4ba 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1815,7 +1815,7 @@ int vfio_config_init(struct vfio_pci_core_device *vdev) } if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx || - vdev->pdev->irq == IRQ_NOTCONNECTED) + !vdev->pdev->irq || vdev->pdev->irq == IRQ_NOTCONNECTED) vconfig[PCI_INTERRUPT_PIN] = 0; ret = vfio_cap_init(vdev); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 35f9046af315ff..6328c3a05bcdd4 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1646,14 +1646,14 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf, { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; - unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1); + unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT; + unsigned long pfn = vma_to_pfn(vma) + pgoff; vm_fault_t ret = VM_FAULT_SIGBUS; - pfn = vma_to_pfn(vma) + pgoff; - - if (order && (pfn & ((1 << order) - 1) || - vmf->address & ((PAGE_SIZE << order) - 1) || - vmf->address + (PAGE_SIZE << order) > vma->vm_end)) { + if (order && (addr < vma->vm_start || + addr + (PAGE_SIZE << order) > vma->vm_end || + pfn & ((1 << order) - 1))) { ret = VM_FAULT_FALLBACK; goto out; } diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0ac56072af9f23..ba5d91e576af16 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -293,7 +293,7 @@ static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize) struct rb_node *p; for (p = rb_prev(n); p; p = rb_prev(p)) { - struct vfio_dma *dma = rb_entry(n, + struct vfio_dma *dma = rb_entry(p, struct vfio_dma, node); vfio_dma_bitmap_free(dma); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f6f5a7ac789455..26bcf3a7f70cb2 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -627,6 +627,9 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) int ret; llnode = llist_del_all(&svq->completion_list); + + mutex_lock(&svq->vq.mutex); + llist_for_each_entry_safe(cmd, t, llnode, tvc_completion_list) { se_cmd = &cmd->tvc_se_cmd; @@ -660,6 +663,8 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vhost_scsi_release_cmd_res(se_cmd); } + mutex_unlock(&svq->vq.mutex); + if (signal) vhost_signal(&svq->vs->dev, &svq->vq); } @@ -994,39 +999,66 @@ static void vhost_scsi_target_queue_cmd(struct vhost_scsi_nexus *nexus, static void vhost_scsi_send_status(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - int head, unsigned int out, u8 status) + struct vhost_scsi_ctx *vc, u8 status) { - struct virtio_scsi_cmd_resp __user *resp; struct virtio_scsi_cmd_resp rsp; + struct iov_iter iov_iter; int ret; memset(&rsp, 0, sizeof(rsp)); rsp.status = status; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + + iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, + sizeof(rsp)); + + ret = copy_to_iter(&rsp, sizeof(rsp), &iov_iter); + + if (likely(ret == sizeof(rsp))) + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else pr_err("Faulted on virtio_scsi_cmd_resp\n"); } +#define TYPE_IO_CMD 0 +#define TYPE_CTRL_TMF 1 +#define TYPE_CTRL_AN 2 + static void vhost_scsi_send_bad_target(struct vhost_scsi *vs, struct vhost_virtqueue *vq, - int head, unsigned out) + struct vhost_scsi_ctx *vc, int type) { - struct virtio_scsi_cmd_resp __user *resp; - struct virtio_scsi_cmd_resp rsp; + union { + struct virtio_scsi_cmd_resp cmd; + struct virtio_scsi_ctrl_tmf_resp tmf; + struct virtio_scsi_ctrl_an_resp an; + } rsp; + struct iov_iter iov_iter; + size_t rsp_size; int ret; memset(&rsp, 0, sizeof(rsp)); - rsp.response = VIRTIO_SCSI_S_BAD_TARGET; - resp = vq->iov[out].iov_base; - ret = __copy_to_user(resp, &rsp, sizeof(rsp)); - if (!ret) - vhost_add_used_and_signal(&vs->dev, vq, head, 0); + + if (type == TYPE_IO_CMD) { + rsp_size = sizeof(struct virtio_scsi_cmd_resp); + rsp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + } else if (type == TYPE_CTRL_TMF) { + rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp); + rsp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp); + rsp.an.response = VIRTIO_SCSI_S_BAD_TARGET; + } + + iov_iter_init(&iov_iter, ITER_DEST, &vq->iov[vc->out], vc->in, + rsp_size); + + ret = copy_to_iter(&rsp, rsp_size, &iov_iter); + + if (likely(ret == rsp_size)) + vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0); else - pr_err("Faulted on virtio_scsi_cmd_resp\n"); + pr_err("Faulted on virtio scsi type=%d\n", type); } static int @@ -1390,9 +1422,9 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (ret == -ENXIO) break; else if (ret == -EIO) - vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); + vhost_scsi_send_bad_target(vs, vq, &vc, TYPE_IO_CMD); else if (ret == -ENOMEM) - vhost_scsi_send_status(vs, vq, vc.head, vc.out, + vhost_scsi_send_status(vs, vq, &vc, SAM_STAT_TASK_SET_FULL); } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: @@ -1432,8 +1464,11 @@ static void vhost_scsi_tmf_resp_work(struct vhost_work *work) else resp_code = VIRTIO_SCSI_S_FUNCTION_REJECTED; + mutex_lock(&tmf->svq->vq.mutex); vhost_scsi_send_tmf_resp(tmf->vhost, &tmf->svq->vq, tmf->in_iovs, tmf->vq_desc, &tmf->resp_iov, resp_code); + mutex_unlock(&tmf->svq->vq.mutex); + vhost_scsi_release_tmf_res(tmf); } @@ -1623,7 +1658,10 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (ret == -ENXIO) break; else if (ret == -EIO) - vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out); + vhost_scsi_send_bad_target(vs, vq, &vc, + v_req.type == VIRTIO_SCSI_T_TMF ? + TYPE_CTRL_TMF : + TYPE_CTRL_AN); } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: mutex_unlock(&vq->mutex); diff --git a/drivers/video/backlight/qcom-wled.c b/drivers/video/backlight/qcom-wled.c index 9afe701b2a1b64..a63bb42c8f8b03 100644 --- a/drivers/video/backlight/qcom-wled.c +++ b/drivers/video/backlight/qcom-wled.c @@ -1406,9 +1406,11 @@ static int wled_configure(struct wled *wled) wled->ctrl_addr = be32_to_cpu(*prop_addr); rc = of_property_read_string(dev->of_node, "label", &wled->name); - if (rc) + if (rc) { wled->name = devm_kasprintf(dev, GFP_KERNEL, "%pOFn", dev->of_node); - + if (!wled->name) + return -ENOMEM; + } switch (wled->version) { case 3: u32_opts = wled3_opts; diff --git a/drivers/video/fbdev/core/fbcvt.c b/drivers/video/fbdev/core/fbcvt.c index 64843464c66135..cd3821bd82e566 100644 --- a/drivers/video/fbdev/core/fbcvt.c +++ b/drivers/video/fbdev/core/fbcvt.c @@ -312,7 +312,7 @@ int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb) cvt.f_refresh = cvt.refresh; cvt.interlace = 1; - if (!cvt.xres || !cvt.yres || !cvt.refresh) { + if (!cvt.xres || !cvt.yres || !cvt.refresh || cvt.f_refresh > INT_MAX) { printk(KERN_INFO "fbcvt: Invalid input parameters\n"); return 1; } diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 150753c3b5782c..95d5d7993e5b1b 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -407,6 +407,12 @@ static void virtio_dev_shutdown(struct device *_d) if (!drv) return; + /* If the driver has its own shutdown method, use that. */ + if (drv->shutdown) { + drv->shutdown(dev); + return; + } + /* * Some devices get wedged if you kick them after they are * reset. Mark all vqs as broken to make sure we don't. diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 5eaade7578606e..7182f43ed05515 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -48,6 +48,7 @@ void vp_modern_avq_done(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + unsigned int status_size = sizeof(struct virtio_admin_cmd_status); struct virtio_admin_cmd *cmd; unsigned long flags; unsigned int len; @@ -56,7 +57,17 @@ void vp_modern_avq_done(struct virtqueue *vq) do { virtqueue_disable_cb(vq); while ((cmd = virtqueue_get_buf(vq, &len))) { - cmd->result_sg_size = len; + /* If the number of bytes written by the device is less + * than the size of struct virtio_admin_cmd_status, the + * remaining status bytes will remain zero-initialized, + * since the buffer was zeroed during allocation. + * In this case, set the size of command_specific_result + * to 0. + */ + if (len < status_size) + cmd->result_sg_size = 0; + else + cmd->result_sg_size = len - status_size; complete(&cmd->completion); } } while (!virtqueue_enable_cb(vq)); @@ -247,7 +258,7 @@ virtio_pci_admin_cmd_dev_parts_objects_enable(struct virtio_device *virtio_dev) sg_init_one(&data_sg, get_data, sizeof(*get_data)); sg_init_one(&result_sg, result, sizeof(*result)); cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_DEVICE_CAP_GET); - cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); cmd.data_sg = &data_sg; cmd.result_sg = &result_sg; ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); @@ -305,7 +316,7 @@ static void virtio_pci_admin_cmd_cap_init(struct virtio_device *virtio_dev) sg_init_one(&result_sg, data, sizeof(*data)); cmd.opcode = cpu_to_le16(VIRTIO_ADMIN_CMD_CAP_ID_LIST_QUERY); - cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SRIOV); + cmd.group_type = cpu_to_le16(VIRTIO_ADMIN_GROUP_TYPE_SELF); cmd.result_sg = &result_sg; ret = vp_modern_admin_cmd_exec(virtio_dev, &cmd); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fdd2d2b07b5a2a..b784aab6686703 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2650,7 +2650,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) struct vring_virtqueue *vq = to_vvq(_vq); if (vq->event_triggered) - vq->event_triggered = false; + data_race(vq->event_triggered = false); return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); diff --git a/drivers/watchdog/exar_wdt.c b/drivers/watchdog/exar_wdt.c index 7c61ff34327116..c2e3bb08df899a 100644 --- a/drivers/watchdog/exar_wdt.c +++ b/drivers/watchdog/exar_wdt.c @@ -221,7 +221,7 @@ static const struct watchdog_info exar_wdt_info = { .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE, - .identity = "Exar/MaxLinear XR28V38x Watchdog", + .identity = "Exar XR28V38x Watchdog", }; static const struct watchdog_ops exar_wdt_ops = { diff --git a/drivers/watchdog/lenovo_se30_wdt.c b/drivers/watchdog/lenovo_se30_wdt.c index 024b842499b368..1c73bb7eeeeed1 100644 --- a/drivers/watchdog/lenovo_se30_wdt.c +++ b/drivers/watchdog/lenovo_se30_wdt.c @@ -271,6 +271,8 @@ static int lenovo_se30_wdt_probe(struct platform_device *pdev) return -EBUSY; priv->shm_base_addr = devm_ioremap(dev, base_phys, SHM_WIN_SIZE); + if (!priv->shm_base_addr) + return -ENOMEM; priv->wdt_cfg.mod = WDT_MODULE; priv->wdt_cfg.idx = WDT_CFG_INDEX; diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index f7d6f47971fdf8..24f485827e0399 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -278,7 +278,7 @@ config XEN_PRIVCMD_EVENTFD config XEN_ACPI_PROCESSOR tristate "Xen ACPI processor" - depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ + depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ default m help This ACPI processor uploads Power Management information to the Xen diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 65d4e7fa1eb8df..2de37dcd75566f 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -679,7 +679,7 @@ void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages) } EXPORT_SYMBOL(xen_free_ballooned_pages); -static void __init balloon_add_regions(void) +static int __init balloon_add_regions(void) { unsigned long start_pfn, pages; unsigned long pfn, extra_pfn_end; @@ -702,26 +702,41 @@ static void __init balloon_add_regions(void) for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) balloon_append(pfn_to_page(pfn)); - balloon_stats.total_pages += extra_pfn_end - start_pfn; + /* + * Extra regions are accounted for in the physmap, but need + * decreasing from current_pages and target_pages to balloon + * down the initial allocation, because they are already + * accounted for in total_pages. + */ + pages = extra_pfn_end - start_pfn; + if (pages >= balloon_stats.current_pages || + pages >= balloon_stats.target_pages) { + WARN(1, "Extra pages underflow current target"); + return -ERANGE; + } + balloon_stats.current_pages -= pages; + balloon_stats.target_pages -= pages; } + + return 0; } static int __init balloon_init(void) { struct task_struct *task; + int rc; if (!xen_domain()) return -ENODEV; pr_info("Initialising balloon driver\n"); -#ifdef CONFIG_XEN_PV - balloon_stats.current_pages = xen_pv_domain() - ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn) - : get_num_physpages(); -#else - balloon_stats.current_pages = get_num_physpages(); -#endif + if (xen_released_pages >= get_num_physpages()) { + WARN(1, "Released pages underflow current target"); + return -ERANGE; + } + + balloon_stats.current_pages = get_num_physpages() - xen_released_pages; balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; @@ -738,7 +753,9 @@ static int __init balloon_init(void) register_sysctl_init("xen/balloon", balloon_table); #endif - balloon_add_regions(); + rc = balloon_add_regions(); + if (rc) + return rc; task = kthread_run(balloon_thread, NULL, "xen-balloon"); if (IS_ERR(task)) { diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1f65795cf5d7a2..ef56a2500ed69a 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -217,6 +217,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (dma_capable(dev, dev_addr, size, true) && + !dma_kmalloc_needs_bounce(dev, size, dir) && !range_straddles_page_boundary(phys, size) && !xen_arch_need_swiotlb(dev, phys, dev_addr) && !is_swiotlb_force_bounce(dev)) diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 13821e7e825efb..9ac0427724a301 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -77,6 +77,7 @@ enum xb_req_state { struct xb_req_data { struct list_head list; wait_queue_head_t wq; + struct kref kref; struct xsd_sockmsg msg; uint32_t caller_req_id; enum xsd_sockmsg_type type; @@ -103,6 +104,7 @@ int xb_init_comms(void); void xb_deinit_comms(void); int xs_watch_msg(struct xs_watch_event *event); void xs_request_exit(struct xb_req_data *req); +void xs_free_req(struct kref *kref); int xenbus_match(struct device *_dev, const struct device_driver *_drv); int xenbus_dev_probe(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index e5fda0256feb3d..82df2da1b880b8 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -309,8 +309,8 @@ static int process_msg(void) virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); - } else - kfree(req); + } + kref_put(&req->kref, xs_free_req); } mutex_unlock(&xs_response_mutex); @@ -386,14 +386,13 @@ static int process_writes(void) state.req->msg.type = XS_ERROR; state.req->err = err; list_del(&state.req->list); - if (state.req->state == xb_req_state_aborted) - kfree(state.req); - else { + if (state.req->state != xb_req_state_aborted) { /* write err, then update state */ virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } + kref_put(&state.req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 46f8916597e53d..f5c21ba64df571 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -406,7 +406,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req) mutex_unlock(&u->reply_mutex); kfree(req->body); - kfree(req); + kref_put(&req->kref, xs_free_req); kref_put(&u->kref, xenbus_file_free); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 6d32ffb0113650..86fe6e77905669 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -966,9 +966,15 @@ static int __init xenbus_init(void) if (xen_pv_domain()) xen_store_domain_type = XS_PV; if (xen_hvm_domain()) + { xen_store_domain_type = XS_HVM; - if (xen_hvm_domain() && xen_initial_domain()) - xen_store_domain_type = XS_LOCAL; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + if (!v && xen_initial_domain()) + xen_store_domain_type = XS_LOCAL; + } if (xen_pv_domain() && !xen_start_info->store_evtchn) xen_store_domain_type = XS_LOCAL; if (xen_pv_domain() && xen_start_info->store_evtchn) @@ -987,10 +993,6 @@ static int __init xenbus_init(void) xen_store_interface = gfn_to_virt(xen_store_gfn); break; case XS_HVM: - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); if (err) goto out_error; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index fcb335bb7b1878..6d1819269cbe53 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -513,4 +513,5 @@ static int __init boot_wait_for_devices(void) late_initcall(boot_wait_for_devices); #endif +MODULE_DESCRIPTION("Xen PV-device frontend support"); MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index d32c726f7a12d0..dcf9182c8451ad 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -112,6 +112,12 @@ static void xs_suspend_exit(void) wake_up_all(&xs_state_enter_wq); } +void xs_free_req(struct kref *kref) +{ + struct xb_req_data *req = container_of(kref, struct xb_req_data, kref); + kfree(req); +} + static uint32_t xs_request_enter(struct xb_req_data *req) { uint32_t rq_id; @@ -237,6 +243,12 @@ static void xs_send(struct xb_req_data *req, struct xsd_sockmsg *msg) req->caller_req_id = req->msg.req_id; req->msg.req_id = xs_request_enter(req); + /* + * Take 2nd ref. One for this thread, and the second for the + * xenbus_thread. + */ + kref_get(&req->kref); + mutex_lock(&xb_write_mutex); list_add_tail(&req->list, &xb_write_list); notify = list_is_singular(&xb_write_list); @@ -261,8 +273,8 @@ static void *xs_wait_for_reply(struct xb_req_data *req, struct xsd_sockmsg *msg) if (req->state == xb_req_state_queued || req->state == xb_req_state_wait_reply) req->state = xb_req_state_aborted; - else - kfree(req); + + kref_put(&req->kref, xs_free_req); mutex_unlock(&xb_write_mutex); return ret; @@ -291,6 +303,7 @@ int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void *par) req->cb = xenbus_dev_queue_reply; req->par = par; req->user_req = true; + kref_init(&req->kref); xs_send(req, msg); @@ -319,6 +332,7 @@ static void *xs_talkv(struct xenbus_transaction t, req->num_vecs = num_vecs; req->cb = xs_wake_up; req->user_req = false; + kref_init(&req->kref); msg.req_id = 0; msg.tx_id = t.id; diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 32619d146cbc19..862164181baca1 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -59,7 +59,7 @@ static void v9fs_issue_write(struct netfs_io_subrequest *subreq) len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); if (len > 0) __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - netfs_write_subrequest_terminated(subreq, len ?: err, false); + netfs_write_subrequest_terminated(subreq, len ?: err); } /** @@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ - if (subreq->rreq->origin != NETFS_DIO_READ) + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (pos + total >= i_size_read(rreq->inode)) __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); @@ -164,4 +165,5 @@ const struct address_space_operations v9fs_addr_operations = { .invalidate_folio = netfs_invalidate_folio, .direct_IO = noop_direct_IO, .writepages = netfs_writepages, + .migrate_folio = filemap_migrate_folio, }; diff --git a/fs/Kconfig b/fs/Kconfig index c718b2e2de0ec5..5b4847bd2fbb57 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -368,6 +368,7 @@ config GRACE_PERIOD config LOCKD tristate depends on FILE_LOCKING + select CRC32 select GRACE_PERIOD config LOCKD_V4 diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 691e0ae607a167..8c6130789fde3a 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -348,9 +348,9 @@ static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx) } if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) { - rcu_read_lock(); + down_read(&net->cells_lock); ret = afs_dynroot_readdir_cells(net, ctx); - rcu_read_unlock(); + up_read(&net->cells_lock); } return ret; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 18b0a9f1615e44..2e7526ea883ae2 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -120,17 +120,17 @@ static void afs_issue_write_worker(struct work_struct *work) #if 0 // Error injection if (subreq->debug_index == 3) - return netfs_write_subrequest_terminated(subreq, -ENOANO, false); + return netfs_write_subrequest_terminated(subreq, -ENOANO); if (!subreq->retry_count) { set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); - return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); + return netfs_write_subrequest_terminated(subreq, -EAGAIN); } #endif op = afs_alloc_operation(wreq->netfs_priv, vnode->volume); if (IS_ERR(op)) - return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); + return netfs_write_subrequest_terminated(subreq, -EAGAIN); afs_op_set_vnode(op, 0, vnode); op->file[0].dv_delta = 1; @@ -166,7 +166,7 @@ static void afs_issue_write_worker(struct work_struct *work) break; } - netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false); + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len); } void afs_issue_write(struct netfs_io_subrequest *subreq) @@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st case NETFS_READ_GAPS: case NETFS_READ_SINGLE: case NETFS_READ_FOR_WRITE: + case NETFS_UNBUFFERED_READ: case NETFS_DIO_READ: return; default: diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index bf1c94e51dd064..07709b0d7688fc 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -4,7 +4,7 @@ config BCACHEFS_FS depends on BLOCK select EXPORTFS select CLOSURES - select LIBCRC32C + select CRC32 select CRC64 select FS_POSIX_ACL select LZ4_COMPRESS @@ -15,10 +15,9 @@ config BCACHEFS_FS select ZLIB_INFLATE select ZSTD_COMPRESS select ZSTD_DECOMPRESS - select CRYPTO select CRYPTO_LIB_SHA256 - select CRYPTO_CHACHA20 - select CRYPTO_POLY1305 + select CRYPTO_LIB_CHACHA + select CRYPTO_LIB_POLY1305 select KEYS select RAID6_PQ select XOR_BLOCKS diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 7c930ef7738040..7ec022e9361ae1 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1422,8 +1422,30 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, wp->sectors_free = UINT_MAX; - open_bucket_for_each(c, &wp->ptrs, ob, i) + open_bucket_for_each(c, &wp->ptrs, ob, i) { + /* + * Ensure proper write alignment - either due to misaligned + * bucket sizes (from buggy bcachefs-tools), or writes that mix + * logical/physical alignment: + */ + struct bch_dev *ca = ob_dev(c, ob); + u64 offset = bucket_to_sector(ca, ob->bucket) + + ca->mi.bucket_size - + ob->sectors_free; + unsigned align = round_up(offset, block_sectors(c)) - offset; + + ob->sectors_free = max_t(int, 0, ob->sectors_free - align); + wp->sectors_free = min(wp->sectors_free, ob->sectors_free); + } + + wp->sectors_free = rounddown(wp->sectors_free, block_sectors(c)); + + /* Did alignment use up space in an open_bucket? */ + if (unlikely(!wp->sectors_free)) { + bch2_alloc_sectors_done(c, wp); + goto retry; + } BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX); diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 69ec6a012898e6..4c1e33cf57c03b 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -110,7 +110,9 @@ static inline void bch2_alloc_sectors_done_inlined(struct bch_fs *c, struct writ unsigned i; open_bucket_for_each(c, &wp->ptrs, ob, i) - ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob); + ob_push(c, ob->sectors_free < block_sectors(c) + ? &ptrs + : &keep, ob); wp->ptrs = keep; mutex_unlock(&wp->lock); diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index ff26bb51515004..5f195d2280a4ea 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -192,7 +192,8 @@ static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, static int backpointer_target_not_found(struct btree_trans *trans, struct bkey_s_c_backpointer bp, struct bkey_s_c target_k, - struct bkey_buf *last_flushed) + struct bkey_buf *last_flushed, + bool commit) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -228,18 +229,77 @@ static int backpointer_target_not_found(struct btree_trans *trans, } if (fsck_err(trans, backpointer_to_missing_ptr, - "%s", buf.buf)) + "%s", buf.buf)) { ret = bch2_backpointer_del(trans, bp.k->p); + if (ret || !commit) + goto out; + + /* + * Normally, on transaction commit from inside a transaction, + * we'll return -BCH_ERR_transaction_restart_nested, since a + * transaction commit invalidates pointers given out by peek(). + * + * However, since we're updating a write buffer btree, if we + * return a transaction restart and loop we won't see that the + * backpointer has been deleted without an additional write + * buffer flush - and those are expensive. + * + * So we're relying on the caller immediately advancing to the + * next backpointer and starting a new transaction immediately + * after backpointer_get_key() returns NULL: + */ + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + } +out: fsck_err: printbuf_exit(&buf); return ret; } -struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, - struct bkey_s_c_backpointer bp, - struct btree_iter *iter, - unsigned iter_flags, - struct bkey_buf *last_flushed) +static struct btree *__bch2_backpointer_get_node(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + struct btree_iter *iter, + struct bkey_buf *last_flushed, + bool commit) +{ + struct bch_fs *c = trans->c; + + BUG_ON(!bp.v->level); + + bch2_trans_node_iter_init(trans, iter, + bp.v->btree_id, + bp.v->pos, + 0, + bp.v->level - 1, + 0); + struct btree *b = bch2_btree_iter_peek_node(trans, iter); + if (IS_ERR_OR_NULL(b)) + goto err; + + BUG_ON(b->c.level != bp.v->level - 1); + + if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, + bkey_i_to_s_c(&b->key), bp)) + return b; + + if (btree_node_will_make_reachable(b)) { + b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); + } else { + int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), + last_flushed, commit); + b = ret ? ERR_PTR(ret) : NULL; + } +err: + bch2_trans_iter_exit(trans, iter); + return b; +} + +static struct bkey_s_c __bch2_backpointer_get_key(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + struct btree_iter *iter, + unsigned iter_flags, + struct bkey_buf *last_flushed, + bool commit) { struct bch_fs *c = trans->c; @@ -277,10 +337,10 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, bch2_trans_iter_exit(trans, iter); if (!bp.v->level) { - int ret = backpointer_target_not_found(trans, bp, k, last_flushed); + int ret = backpointer_target_not_found(trans, bp, k, last_flushed, commit); return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { - struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); + struct btree *b = __bch2_backpointer_get_node(trans, bp, iter, last_flushed, commit); if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) return bkey_s_c_null; if (IS_ERR_OR_NULL(b)) @@ -295,35 +355,16 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, struct btree_iter *iter, struct bkey_buf *last_flushed) { - struct bch_fs *c = trans->c; - - BUG_ON(!bp.v->level); - - bch2_trans_node_iter_init(trans, iter, - bp.v->btree_id, - bp.v->pos, - 0, - bp.v->level - 1, - 0); - struct btree *b = bch2_btree_iter_peek_node(trans, iter); - if (IS_ERR_OR_NULL(b)) - goto err; - - BUG_ON(b->c.level != bp.v->level - 1); - - if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, - bkey_i_to_s_c(&b->key), bp)) - return b; + return __bch2_backpointer_get_node(trans, bp, iter, last_flushed, true); +} - if (btree_node_will_make_reachable(b)) { - b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); - } else { - int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed); - b = ret ? ERR_PTR(ret) : NULL; - } -err: - bch2_trans_iter_exit(trans, iter); - return b; +struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + struct btree_iter *iter, + unsigned iter_flags, + struct bkey_buf *last_flushed) +{ + return __bch2_backpointer_get_key(trans, bp, iter, iter_flags, last_flushed, true); } static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, @@ -521,7 +562,7 @@ static int check_bp_exists(struct btree_trans *trans, struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); struct bkey_s_c other_extent = - bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL); + __bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL, false); ret = bkey_err(other_extent); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) ret = 0; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5d9f208a1bb744..75f7408da1736b 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -788,6 +788,8 @@ struct bch_fs { unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; + DARRAY(enum bcachefs_metadata_version) + incompat_versions_requested; #ifdef CONFIG_UNICODE struct unicode_map *cf_encoding; @@ -981,8 +983,8 @@ struct bch_fs { mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; size_t zstd_workspace_size; - struct crypto_sync_skcipher *chacha20; - struct crypto_shash *poly1305; + struct bch_key chacha20_key; + bool chacha20_key_set; atomic64_t key_version; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a3db328dee31f5..d6e4a496f02b64 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -366,6 +366,10 @@ static inline void bkey_init(struct bkey *k) #define __BKEY_PADDED(key, pad) \ struct bkey_i key; __u64 key ## _pad[pad] +enum bch_bkey_type_flags { + BKEY_TYPE_strict_btree_checks = BIT(0), +}; + /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. @@ -383,46 +387,46 @@ static inline void bkey_init(struct bkey *k) * * - WHITEOUT: for hash table btrees */ -#define BCH_BKEY_TYPES() \ - x(deleted, 0) \ - x(whiteout, 1) \ - x(error, 2) \ - x(cookie, 3) \ - x(hash_whiteout, 4) \ - x(btree_ptr, 5) \ - x(extent, 6) \ - x(reservation, 7) \ - x(inode, 8) \ - x(inode_generation, 9) \ - x(dirent, 10) \ - x(xattr, 11) \ - x(alloc, 12) \ - x(quota, 13) \ - x(stripe, 14) \ - x(reflink_p, 15) \ - x(reflink_v, 16) \ - x(inline_data, 17) \ - x(btree_ptr_v2, 18) \ - x(indirect_inline_data, 19) \ - x(alloc_v2, 20) \ - x(subvolume, 21) \ - x(snapshot, 22) \ - x(inode_v2, 23) \ - x(alloc_v3, 24) \ - x(set, 25) \ - x(lru, 26) \ - x(alloc_v4, 27) \ - x(backpointer, 28) \ - x(inode_v3, 29) \ - x(bucket_gens, 30) \ - x(snapshot_tree, 31) \ - x(logged_op_truncate, 32) \ - x(logged_op_finsert, 33) \ - x(accounting, 34) \ - x(inode_alloc_cursor, 35) +#define BCH_BKEY_TYPES() \ + x(deleted, 0, 0) \ + x(whiteout, 1, 0) \ + x(error, 2, 0) \ + x(cookie, 3, 0) \ + x(hash_whiteout, 4, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr, 5, BKEY_TYPE_strict_btree_checks) \ + x(extent, 6, BKEY_TYPE_strict_btree_checks) \ + x(reservation, 7, BKEY_TYPE_strict_btree_checks) \ + x(inode, 8, BKEY_TYPE_strict_btree_checks) \ + x(inode_generation, 9, BKEY_TYPE_strict_btree_checks) \ + x(dirent, 10, BKEY_TYPE_strict_btree_checks) \ + x(xattr, 11, BKEY_TYPE_strict_btree_checks) \ + x(alloc, 12, BKEY_TYPE_strict_btree_checks) \ + x(quota, 13, BKEY_TYPE_strict_btree_checks) \ + x(stripe, 14, BKEY_TYPE_strict_btree_checks) \ + x(reflink_p, 15, BKEY_TYPE_strict_btree_checks) \ + x(reflink_v, 16, BKEY_TYPE_strict_btree_checks) \ + x(inline_data, 17, BKEY_TYPE_strict_btree_checks) \ + x(btree_ptr_v2, 18, BKEY_TYPE_strict_btree_checks) \ + x(indirect_inline_data, 19, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v2, 20, BKEY_TYPE_strict_btree_checks) \ + x(subvolume, 21, BKEY_TYPE_strict_btree_checks) \ + x(snapshot, 22, BKEY_TYPE_strict_btree_checks) \ + x(inode_v2, 23, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v3, 24, BKEY_TYPE_strict_btree_checks) \ + x(set, 25, 0) \ + x(lru, 26, BKEY_TYPE_strict_btree_checks) \ + x(alloc_v4, 27, BKEY_TYPE_strict_btree_checks) \ + x(backpointer, 28, BKEY_TYPE_strict_btree_checks) \ + x(inode_v3, 29, BKEY_TYPE_strict_btree_checks) \ + x(bucket_gens, 30, BKEY_TYPE_strict_btree_checks) \ + x(snapshot_tree, 31, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_truncate, 32, BKEY_TYPE_strict_btree_checks) \ + x(logged_op_finsert, 33, BKEY_TYPE_strict_btree_checks) \ + x(accounting, 34, BKEY_TYPE_strict_btree_checks) \ + x(inode_alloc_cursor, 35, BKEY_TYPE_strict_btree_checks) enum bch_bkey_type { -#define x(name, nr) KEY_TYPE_##name = nr, +#define x(name, nr, ...) KEY_TYPE_##name = nr, BCH_BKEY_TYPES() #undef x KEY_TYPE_MAX, @@ -863,6 +867,7 @@ LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED, LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4); LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20); +LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 15c93576b5c204..00d05ccfaf73bb 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -21,7 +21,7 @@ #include "xattr.h" const char * const bch2_bkey_types[] = { -#define x(name, nr) #name, +#define x(name, nr, ...) #name, BCH_BKEY_TYPES() #undef x NULL @@ -115,7 +115,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ }) const struct bkey_ops bch2_bkey_ops[] = { -#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, +#define x(name, nr, ...) [KEY_TYPE_##name] = bch2_bkey_ops_##name, BCH_BKEY_TYPES() #undef x }; @@ -155,6 +155,12 @@ static u64 bch2_key_types_allowed[] = { #undef x }; +static const enum bch_bkey_type_flags bch2_bkey_type_flags[] = { +#define x(name, nr, flags) [KEY_TYPE_##name] = flags, + BCH_BKEY_TYPES() +#undef x +}; + const char *bch2_btree_node_type_str(enum btree_node_type type) { return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); @@ -177,8 +183,18 @@ int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, if (type >= BKEY_TYPE_NR) return 0; - bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && - (type == BKEY_TYPE_btree || (from.flags & BCH_VALIDATE_commit)) && + enum bch_bkey_type_flags bkey_flags = k.k->type < KEY_TYPE_MAX + ? bch2_bkey_type_flags[k.k->type] + : 0; + + bool strict_key_type_allowed = + (from.flags & BCH_VALIDATE_commit) || + type == BKEY_TYPE_btree || + (from.btree < BTREE_ID_NR && + (bkey_flags & BKEY_TYPE_strict_btree_checks)); + + bkey_fsck_err_on(strict_key_type_allowed && + k.k->type < KEY_TYPE_MAX && !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 9b80201c7982f5..89989129579728 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -852,7 +852,6 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea b->sib_u64s[1] = 0; b->whiteout_u64s = 0; bch2_btree_keys_init(b); - set_btree_node_accessed(b); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], start_time); @@ -1286,6 +1285,10 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, six_unlock_read(&b->c.lock); goto retry; } + + /* avoid atomic set bit if it's not needed: */ + if (!btree_node_accessed(b)) + set_btree_node_accessed(b); } /* XXX: waiting on IO with btree locks held: */ @@ -1301,10 +1304,6 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, prefetch(p + L1_CACHE_BYTES * 2); } - /* avoid atomic set bit if it's not needed: */ - if (!btree_node_accessed(b)) - set_btree_node_accessed(b); - if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7b98ba2dec6472..37b69d89341f4f 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -47,6 +47,27 @@ #define DROP_PREV_NODE 11 #define DID_FILL_FROM_SCAN 12 +/* + * Returns true if it's a btree we can easily reconstruct, or otherwise won't + * cause data loss if it's missing: + */ +static bool btree_id_important(enum btree_id btree) +{ + if (btree_id_is_alloc(btree)) + return false; + + switch (btree) { + case BTREE_ID_quotas: + case BTREE_ID_snapshot_trees: + case BTREE_ID_logged_ops: + case BTREE_ID_rebalance_work: + case BTREE_ID_subvolume_children: + return false; + default: + return true; + } +} + static const char * const bch2_gc_phase_strs[] = { #define x(n) #n, GC_PHASES() @@ -534,8 +555,10 @@ int bch2_check_topology(struct bch_fs *c) r->error = 0; if (!bch2_btree_has_scanned_nodes(c, i)) { - mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); } else { bch2_btree_root_alloc_fake_trans(trans, i, 1); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 5fd4a58d2ad27d..60782f3e5aec84 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -41,6 +41,7 @@ void bch2_btree_node_io_unlock(struct btree *b) clear_btree_node_write_in_flight_inner(b); clear_btree_node_write_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); } @@ -1400,6 +1401,7 @@ static void btree_node_read_work(struct work_struct *work) printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1595,6 +1597,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) printbuf_exit(&buf); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); } @@ -1721,6 +1724,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, set_btree_node_read_error(b); bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); printbuf_exit(&buf); return; @@ -2061,8 +2065,10 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start if (new & (1U << BTREE_NODE_write_in_flight)) __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); - else + else { + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); + } } static void btree_node_write_done(struct bch_fs *c, struct btree *b, u64 start_time) @@ -2175,6 +2181,7 @@ static void btree_node_write_endio(struct bio *bio) } clear_btree_node_write_in_flight_inner(b); + smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); queue_work(c->btree_io_complete_wq, &wb->work); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index e34e9598ef25ab..ac5f2046550da3 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1162,7 +1162,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, } if (path->cached) { - ret = bch2_btree_path_traverse_cached(trans, path, flags); + ret = bch2_btree_path_traverse_cached(trans, path_idx, flags); goto out; } @@ -1971,6 +1971,12 @@ struct btree *bch2_btree_iter_next_node(struct btree_trans *trans, struct btree_ return NULL; } + /* + * We don't correctly handle nodes with extra intent locks here: + * downgrade so we don't violate locking invariants + */ + bch2_btree_path_downgrade(trans, path); + if (!bch2_btree_node_relock(trans, path, path->level + 1)) { __bch2_btree_path_unlock(trans, path); path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock); @@ -2577,7 +2583,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct struct bpos end) { if ((iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots)) && - !bkey_eq(iter->pos, POS_MAX)) { + !bkey_eq(iter->pos, POS_MAX) && + !((iter->flags & BTREE_ITER_is_extents) && + iter->pos.offset == U64_MAX)) { + /* * bkey_start_pos(), for extents, is not monotonically * increasing until after filtering for snapshots: @@ -2602,7 +2611,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_trans *trans, struct bch2_trans_verify_not_unlocked_or_in_restart(trans); bch2_btree_iter_verify_entry_exit(iter); - EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && iter->pos.inode != end.inode); int ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { @@ -2740,7 +2749,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre ret = trans_maybe_inject_restart(trans, _RET_IP_); if (unlikely(ret)) { k = bkey_s_c_err(ret); - goto out_no_locked; + goto out; } /* extents can't span inode numbers: */ @@ -2760,13 +2769,15 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); if (unlikely(ret)) { k = bkey_s_c_err(ret); - goto out_no_locked; + goto out; } struct btree_path *path = btree_iter_path(trans, iter); if (unlikely(!btree_path_node(path, path->level))) return bkey_s_c_null; + btree_path_set_should_be_locked(trans, path); + if ((iter->flags & BTREE_ITER_cached) || !(iter->flags & (BTREE_ITER_is_extents|BTREE_ITER_filter_snapshots))) { k = bkey_s_c_null; @@ -2787,12 +2798,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre if (!bkey_err(k)) iter->k = *k.k; /* We're not returning a key from iter->path: */ - goto out_no_locked; + goto out; } - k = bch2_btree_path_peek_slot(trans->paths + iter->path, &iter->k); + k = bch2_btree_path_peek_slot(btree_iter_path(trans, iter), &iter->k); if (unlikely(!k.k)) - goto out_no_locked; + goto out; if (unlikely(k.k->type == KEY_TYPE_whiteout && (iter->flags & BTREE_ITER_filter_snapshots) && @@ -2830,7 +2841,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } if (unlikely(bkey_err(k))) - goto out_no_locked; + goto out; next = k.k ? bkey_start_pos(k.k) : POS_MAX; @@ -2852,8 +2863,6 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre } } out: - btree_path_set_should_be_locked(trans, btree_iter_path(trans, iter)); -out_no_locked: bch2_btree_iter_verify_entry_exit(iter); bch2_btree_iter_verify(trans, iter); ret = bch2_btree_iter_verify_ret(trans, iter, k); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index d1ad1a7613c9f7..ade3b5addd7592 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -288,7 +288,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, .size = max_t(size_t, keys->size, 8) * 2, }; - new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL); + new_keys.data = bch2_kvmalloc(new_keys.size * sizeof(new_keys.data[0]), GFP_KERNEL); if (!new_keys.data) { bch_err(c, "%s: error allocating new key array (size %zu)", __func__, new_keys.size); @@ -644,8 +644,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, */ static int journal_sort_key_cmp(const void *_l, const void *_r) { - cond_resched(); - const struct journal_key *l = _l; const struct journal_key *r = _r; @@ -689,7 +687,8 @@ void bch2_journal_keys_put(struct bch_fs *c) static void __journal_keys_sort(struct journal_keys *keys) { - sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL); + sort_nonatomic(keys->data, keys->nr, sizeof(keys->data[0]), + journal_sort_key_cmp, NULL); cond_resched(); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 2b186584a291bb..669825f89cdd84 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -301,9 +301,11 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans } static noinline int btree_key_cache_fill(struct btree_trans *trans, - struct btree_path *ck_path, + btree_path_idx_t ck_path_idx, unsigned flags) { + struct btree_path *ck_path = trans->paths + ck_path_idx; + if (flags & BTREE_ITER_cached_nofill) { ck_path->l[0].b = NULL; return 0; @@ -325,6 +327,7 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, goto err; /* Recheck after btree lookup, before allocating: */ + ck_path = trans->paths + ck_path_idx; ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0; if (unlikely(ret)) goto out; @@ -344,10 +347,11 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans, } static inline int btree_path_traverse_cached_fast(struct btree_trans *trans, - struct btree_path *path) + btree_path_idx_t path_idx) { struct bch_fs *c = trans->c; struct bkey_cached *ck; + struct btree_path *path = trans->paths + path_idx; retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); if (!ck) @@ -373,27 +377,32 @@ static inline int btree_path_traverse_cached_fast(struct btree_trans *trans, return 0; } -int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, +int bch2_btree_path_traverse_cached(struct btree_trans *trans, + btree_path_idx_t path_idx, unsigned flags) { - EBUG_ON(path->level); - - path->l[1].b = NULL; + EBUG_ON(trans->paths[path_idx].level); int ret; do { - ret = btree_path_traverse_cached_fast(trans, path); + ret = btree_path_traverse_cached_fast(trans, path_idx); if (unlikely(ret == -ENOENT)) - ret = btree_key_cache_fill(trans, path, flags); + ret = btree_key_cache_fill(trans, path_idx, flags); } while (ret == -EEXIST); + struct btree_path *path = trans->paths + path_idx; + if (unlikely(ret)) { path->uptodate = BTREE_ITER_NEED_TRAVERSE; if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(ret); } + } else { + BUG_ON(path->uptodate); + BUG_ON(!path->nodes_locked); } + return ret; } diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index 51d6289b8dee32..82d8c72512a93a 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -40,8 +40,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *, struct bkey_cached * bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos); -int bch2_btree_path_traverse_cached(struct btree_trans *, struct btree_path *, - unsigned); +int bch2_btree_path_traverse_cached(struct btree_trans *, btree_path_idx_t, unsigned); bool bch2_btree_insert_key_cached(struct btree_trans *, unsigned, struct btree_insert_entry *); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 8c9fdb7263fea5..86acf037590ccc 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -183,7 +183,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, return; if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) { - if (!c->chacha20) + if (!c->chacha20_key_set) return; struct nonce nonce = btree_nonce(&bn->keys, 0); @@ -398,7 +398,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) bch2_print_string_as_lines(KERN_INFO, buf.buf); } - sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); + sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL); dst = 0; darray_for_each(f->nodes, i) { @@ -418,7 +418,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) } f->nodes.nr = dst; - sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); + sort_nonatomic(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); if (0 && c->opts.verbose) { printbuf_reset(&buf); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 55fbeeb8eaaa1c..00307356d7c814 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1221,7 +1221,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_disk_reservation_get(c, &as->disk_res, (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c), - c->opts.metadata_replicas, + READ_ONCE(c->opts.metadata_replicas), disk_res_flags); if (ret) goto err; @@ -1389,7 +1389,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, printbuf_exit(&buf); } -static void +static int bch2_btree_insert_keys_interior(struct btree_update *as, struct btree_trans *trans, struct btree_path *path, @@ -1411,7 +1411,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, insert = bkey_next(insert)) bch2_insert_fixup_btree_ptr(as, trans, path, b, &node_iter, insert); - if (bch2_btree_node_check_topology(trans, b)) { + int ret = bch2_btree_node_check_topology(trans, b); + if (ret) { struct printbuf buf = PRINTBUF; for (struct bkey_i *k = keys->keys; @@ -1421,11 +1422,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as, prt_newline(&buf); } - panic("%s(): check_topology error: inserted keys\n%s", __func__, buf.buf); + bch2_fs_fatal_error(as->c, "%ps -> %s(): check_topology error %s: inserted keys\n%s", + (void *) _RET_IP_, __func__, bch2_err_str(ret), buf.buf); + dump_stack(); + return ret; } memmove_u64s_down(keys->keys, insert, keys->top_p - insert->_data); keys->top_p -= insert->_data - keys->keys_p; + return 0; } static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos) @@ -1559,11 +1564,11 @@ static void __btree_split_node(struct btree_update *as, * nodes that were coalesced, and thus in the middle of a child node post * coalescing: */ -static void btree_split_insert_keys(struct btree_update *as, - struct btree_trans *trans, - btree_path_idx_t path_idx, - struct btree *b, - struct keylist *keys) +static int btree_split_insert_keys(struct btree_update *as, + struct btree_trans *trans, + btree_path_idx_t path_idx, + struct btree *b, + struct keylist *keys) { struct btree_path *path = trans->paths + path_idx; @@ -1573,8 +1578,12 @@ static void btree_split_insert_keys(struct btree_update *as, bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p); - bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + int ret = bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys); + if (ret) + return ret; } + + return 0; } static int btree_split(struct btree_update *as, struct btree_trans *trans, @@ -1607,8 +1616,10 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, __btree_split_node(as, trans, b, n, keys); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); - btree_split_insert_keys(as, trans, path, n2, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys) ?: + btree_split_insert_keys(as, trans, path, n2, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1654,7 +1665,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n3->sib_u64s[0] = U16_MAX; n3->sib_u64s[1] = U16_MAX; - btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + ret = btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); + if (ret) + goto err; } } else { trace_and_count(c, btree_node_compact, trans, b); @@ -1662,7 +1675,9 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans, n1 = bch2_btree_node_alloc_replacement(as, trans, b); if (keys) { - btree_split_insert_keys(as, trans, path, n1, keys); + ret = btree_split_insert_keys(as, trans, path, n1, keys); + if (ret) + goto err; BUG_ON(!bch2_keylist_empty(keys)); } @@ -1809,15 +1824,15 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t goto split; } - ret = bch2_btree_node_check_topology(trans, b); + + ret = bch2_btree_node_check_topology(trans, b) ?: + bch2_btree_insert_keys_interior(as, trans, path, b, + path->l[b->c.level].iter, keys); if (ret) { bch2_btree_node_unlock_write(trans, path, b); return ret; } - bch2_btree_insert_keys_interior(as, trans, path, b, - path->l[b->c.level].iter, keys); - trans_for_each_path_with_node(trans, b, linked, i) bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index adbe576ec77ee3..0941fb2c026d54 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -428,10 +428,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) */ trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr); - sort(wb->flushing.keys.data, - wb->flushing.keys.nr, - sizeof(wb->flushing.keys.data[0]), - wb_key_seq_cmp, NULL); + sort_nonatomic(wb->flushing.keys.data, + wb->flushing.keys.nr, + sizeof(wb->flushing.keys.data[0]), + wb_key_seq_cmp, NULL); darray_for_each(wb->flushing.keys, i) { if (!i->journal_seq) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index fea61e60a9eea8..31fbc2716d8bf5 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -37,7 +37,8 @@ void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) void bch2_dev_usage_full_read_fast(struct bch_dev *ca, struct bch_dev_usage_full *usage) { memset(usage, 0, sizeof(*usage)); - acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); + acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, + sizeof(struct bch_dev_usage_full) / sizeof(u64)); } static u64 reserve_factor(u64 r) @@ -603,6 +604,13 @@ static int bch2_trigger_pointer(struct btree_trans *trans, } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); + if (!bucket_valid(ca, bucket.offset)) { + if (insert) { + bch2_dev_bucket_missing(ca, bucket.offset); + ret = -BCH_ERR_trigger_pointer; + } + goto err; + } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); @@ -1306,13 +1314,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - bucket_gens->nbuckets = min(bucket_gens->nbuckets, - old_bucket_gens->nbuckets); - bucket_gens->nbuckets_minus_first = - bucket_gens->nbuckets - bucket_gens->first_bucket; + u64 copy = min(bucket_gens->nbuckets, + old_bucket_gens->nbuckets); memcpy(bucket_gens->b, old_bucket_gens->b, - bucket_gens->nbuckets); + sizeof(bucket_gens->b[0]) * copy); } rcu_assign_pointer(ca->bucket_gens, bucket_gens); diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 1c38b165f48b09..af1532de4a3747 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -44,6 +44,7 @@ static inline void bucket_unlock(struct bucket *b) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &b->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &b->lock, BUCKET_LOCK_BITNR); } @@ -242,11 +243,6 @@ static inline u64 dev_buckets_available(struct bch_dev *ca, /* Filesystem usage: */ -static inline unsigned dev_usage_u64s(void) -{ - return sizeof(struct bch_dev_usage) / sizeof(u64); -} - struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *); diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 3726689093e301..d0a34a097b809e 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -7,17 +7,12 @@ #include "super-io.h" #include -#include #include #include #include #include -#include -#include #include -#include #include -#include #include /* @@ -96,116 +91,40 @@ static void bch2_checksum_update(struct bch2_checksum_state *state, const void * } } -static inline int do_encrypt_sg(struct crypto_sync_skcipher *tfm, - struct nonce nonce, - struct scatterlist *sg, size_t len) +static void bch2_chacha20_init(u32 state[CHACHA_STATE_WORDS], + const struct bch_key *key, struct nonce nonce) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); + u32 key_words[CHACHA_KEY_SIZE / sizeof(u32)]; - skcipher_request_set_sync_tfm(req, tfm); - skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, len, nonce.d); + BUILD_BUG_ON(sizeof(key_words) != sizeof(*key)); + memcpy(key_words, key, sizeof(key_words)); + le32_to_cpu_array(key_words, ARRAY_SIZE(key_words)); - int ret = crypto_skcipher_encrypt(req); - if (ret) - pr_err("got error %i from crypto_skcipher_encrypt()", ret); - - return ret; -} - -static inline int do_encrypt(struct crypto_sync_skcipher *tfm, - struct nonce nonce, - void *buf, size_t len) -{ - if (!is_vmalloc_addr(buf)) { - struct scatterlist sg = {}; - - sg_mark_end(&sg); - sg_set_page(&sg, virt_to_page(buf), len, offset_in_page(buf)); - return do_encrypt_sg(tfm, nonce, &sg, len); - } else { - DARRAY_PREALLOCATED(struct scatterlist, 4) sgl; - size_t sgl_len = 0; - int ret; - - darray_init(&sgl); - - while (len) { - unsigned offset = offset_in_page(buf); - struct scatterlist sg = { - .page_link = (unsigned long) vmalloc_to_page(buf), - .offset = offset, - .length = min(len, PAGE_SIZE - offset), - }; + BUILD_BUG_ON(sizeof(nonce) != CHACHA_IV_SIZE); + chacha_init(state, key_words, (const u8 *)nonce.d); - if (darray_push(&sgl, sg)) { - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len); - if (ret) - goto err; - - nonce = nonce_add(nonce, sgl_len); - sgl_len = 0; - sgl.nr = 0; - BUG_ON(darray_push(&sgl, sg)); - } - - buf += sg.length; - len -= sg.length; - sgl_len += sg.length; - } - - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(tfm, nonce, sgl.data, sgl_len); -err: - darray_exit(&sgl); - return ret; - } + memzero_explicit(key_words, sizeof(key_words)); } -int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, - void *buf, size_t len) +static void bch2_chacha20(const struct bch_key *key, struct nonce nonce, + void *data, size_t len) { - struct crypto_sync_skcipher *chacha20 = - crypto_alloc_sync_skcipher("chacha20", 0, 0); - int ret; - - ret = PTR_ERR_OR_ZERO(chacha20); - if (ret) { - pr_err("error requesting chacha20 cipher: %s", bch2_err_str(ret)); - return ret; - } - - ret = crypto_skcipher_setkey(&chacha20->base, - (void *) key, sizeof(*key)); - if (ret) { - pr_err("error from crypto_skcipher_setkey(): %s", bch2_err_str(ret)); - goto err; - } + u32 state[CHACHA_STATE_WORDS]; - ret = do_encrypt(chacha20, nonce, buf, len); -err: - crypto_free_sync_skcipher(chacha20); - return ret; + bch2_chacha20_init(state, key, nonce); + chacha20_crypt(state, data, data, len); + memzero_explicit(state, sizeof(state)); } -static int gen_poly_key(struct bch_fs *c, struct shash_desc *desc, - struct nonce nonce) +static void bch2_poly1305_init(struct poly1305_desc_ctx *desc, + struct bch_fs *c, struct nonce nonce) { - u8 key[POLY1305_KEY_SIZE]; - int ret; + u8 key[POLY1305_KEY_SIZE] = { 0 }; nonce.d[3] ^= BCH_NONCE_POLY; - memset(key, 0, sizeof(key)); - ret = do_encrypt(c->chacha20, nonce, key, sizeof(key)); - if (ret) - return ret; - - desc->tfm = c->poly1305; - crypto_shash_init(desc); - crypto_shash_update(desc, key, sizeof(key)); - return 0; + bch2_chacha20(&c->chacha20_key, nonce, key, sizeof(key)); + poly1305_init(desc, key); } struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, @@ -230,14 +149,13 @@ struct bch_csum bch2_checksum(struct bch_fs *c, unsigned type, case BCH_CSUM_chacha20_poly1305_80: case BCH_CSUM_chacha20_poly1305_128: { - SHASH_DESC_ON_STACK(desc, c->poly1305); + struct poly1305_desc_ctx dctx; u8 digest[POLY1305_DIGEST_SIZE]; struct bch_csum ret = { 0 }; - gen_poly_key(c, desc, nonce); - - crypto_shash_update(desc, data, len); - crypto_shash_final(desc, digest); + bch2_poly1305_init(&dctx, c, nonce); + poly1305_update(&dctx, data, len); + poly1305_final(&dctx, digest); memcpy(&ret, digest, bch_crc_bytes[type]); return ret; @@ -253,11 +171,12 @@ int bch2_encrypt(struct bch_fs *c, unsigned type, if (!bch2_csum_type_is_encryption(type)) return 0; - if (bch2_fs_inconsistent_on(!c->chacha20, + if (bch2_fs_inconsistent_on(!c->chacha20_key_set, c, "attempting to encrypt without encryption key")) return -BCH_ERR_no_encryption_key; - return do_encrypt(c->chacha20, nonce, data, len); + bch2_chacha20(&c->chacha20_key, nonce, data, len); + return 0; } static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, @@ -296,26 +215,26 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, case BCH_CSUM_chacha20_poly1305_80: case BCH_CSUM_chacha20_poly1305_128: { - SHASH_DESC_ON_STACK(desc, c->poly1305); + struct poly1305_desc_ctx dctx; u8 digest[POLY1305_DIGEST_SIZE]; struct bch_csum ret = { 0 }; - gen_poly_key(c, desc, nonce); + bch2_poly1305_init(&dctx, c, nonce); #ifdef CONFIG_HIGHMEM __bio_for_each_segment(bv, bio, *iter, *iter) { void *p = kmap_local_page(bv.bv_page) + bv.bv_offset; - crypto_shash_update(desc, p, bv.bv_len); + poly1305_update(&dctx, p, bv.bv_len); kunmap_local(p); } #else __bio_for_each_bvec(bv, bio, *iter, *iter) - crypto_shash_update(desc, + poly1305_update(&dctx, page_address(bv.bv_page) + bv.bv_offset, bv.bv_len); #endif - crypto_shash_final(desc, digest); + poly1305_final(&dctx, digest); memcpy(&ret, digest, bch_crc_bytes[type]); return ret; @@ -338,43 +257,33 @@ int __bch2_encrypt_bio(struct bch_fs *c, unsigned type, { struct bio_vec bv; struct bvec_iter iter; - DARRAY_PREALLOCATED(struct scatterlist, 4) sgl; - size_t sgl_len = 0; + u32 chacha_state[CHACHA_STATE_WORDS]; int ret = 0; - if (bch2_fs_inconsistent_on(!c->chacha20, + if (bch2_fs_inconsistent_on(!c->chacha20_key_set, c, "attempting to encrypt without encryption key")) return -BCH_ERR_no_encryption_key; - darray_init(&sgl); + bch2_chacha20_init(chacha_state, &c->chacha20_key, nonce); bio_for_each_segment(bv, bio, iter) { - struct scatterlist sg = { - .page_link = (unsigned long) bv.bv_page, - .offset = bv.bv_offset, - .length = bv.bv_len, - }; - - if (darray_push(&sgl, sg)) { - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len); - if (ret) - goto err; - - nonce = nonce_add(nonce, sgl_len); - sgl_len = 0; - sgl.nr = 0; - - BUG_ON(darray_push(&sgl, sg)); + void *p; + + /* + * chacha_crypt() assumes that the length is a multiple of + * CHACHA_BLOCK_SIZE on any non-final call. + */ + if (!IS_ALIGNED(bv.bv_len, CHACHA_BLOCK_SIZE)) { + bch_err_ratelimited(c, "bio not aligned for encryption"); + ret = -EIO; + break; } - sgl_len += sg.length; + p = bvec_kmap_local(&bv); + chacha20_crypt(chacha_state, p, p, bv.bv_len); + kunmap_local(p); } - - sg_mark_end(&darray_last(sgl)); - ret = do_encrypt_sg(c->chacha20, nonce, sgl.data, sgl_len); -err: - darray_exit(&sgl); + memzero_explicit(chacha_state, sizeof(chacha_state)); return ret; } @@ -650,10 +559,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c, } /* decrypt real key: */ - ret = bch2_chacha_encrypt_key(&user_key, bch2_sb_key_nonce(c), - &sb_key, sizeof(sb_key)); - if (ret) - goto err; + bch2_chacha20(&user_key, bch2_sb_key_nonce(c), &sb_key, sizeof(sb_key)); if (bch2_key_is_encrypted(&sb_key)) { bch_err(c, "incorrect encryption key"); @@ -668,31 +574,6 @@ int bch2_decrypt_sb_key(struct bch_fs *c, return ret; } -static int bch2_alloc_ciphers(struct bch_fs *c) -{ - if (c->chacha20) - return 0; - - struct crypto_sync_skcipher *chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); - int ret = PTR_ERR_OR_ZERO(chacha20); - if (ret) { - bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret)); - return ret; - } - - struct crypto_shash *poly1305 = crypto_alloc_shash("poly1305", 0, 0); - ret = PTR_ERR_OR_ZERO(poly1305); - if (ret) { - bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret)); - crypto_free_sync_skcipher(chacha20); - return ret; - } - - c->chacha20 = chacha20; - c->poly1305 = poly1305; - return 0; -} - #if 0 /* @@ -797,35 +678,21 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) void bch2_fs_encryption_exit(struct bch_fs *c) { - if (c->poly1305) - crypto_free_shash(c->poly1305); - if (c->chacha20) - crypto_free_sync_skcipher(c->chacha20); + memzero_explicit(&c->chacha20_key, sizeof(c->chacha20_key)); } int bch2_fs_encryption_init(struct bch_fs *c) { struct bch_sb_field_crypt *crypt; - struct bch_key key; - int ret = 0; + int ret; crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); if (!crypt) - goto out; + return 0; - ret = bch2_alloc_ciphers(c); + ret = bch2_decrypt_sb_key(c, crypt, &c->chacha20_key); if (ret) - goto out; - - ret = bch2_decrypt_sb_key(c, crypt, &key); - if (ret) - goto out; - - ret = crypto_skcipher_setkey(&c->chacha20->base, - (void *) &key.key, sizeof(key.key)); - if (ret) - goto out; -out: - memzero_explicit(&key, sizeof(key)); - return ret; + return ret; + c->chacha20_key_set = true; + return 0; } diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 4ac251c8fcd839..1310782d3ae930 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -69,7 +69,6 @@ static inline void bch2_csum_err_msg(struct printbuf *out, bch2_csum_to_text(out, type, expected); } -int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t); int bch2_request_key(struct bch_sb *, struct bch_key *); #ifndef __KERNEL__ int bch2_revoke_key(struct bch_sb *); @@ -156,7 +155,7 @@ static inline bool bch2_checksum_type_valid(const struct bch_fs *c, if (type >= BCH_CSUM_NR) return false; - if (bch2_csum_type_is_encryption(type) && !c->chacha20) + if (bch2_csum_type_is_encryption(type) && !c->chacha20_key_set) return false; return true; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index de02ebf847ec94..b211c97238aba9 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -607,7 +607,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update prt_newline(out); printbuf_indent_add(out, 2); bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); - prt_printf(out, "read_done:\t\%u\n", m->read_done); + prt_printf(out, "read_done:\t%u\n", m->read_done); bch2_write_op_to_text(out, &m->op); printbuf_indent_sub(out, 2); } diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index bf53a029f3562c..901230ca4a750e 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,8 +13,8 @@ #include -static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) +int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) { *out_cf = (struct qstr) QSTR_INIT(NULL, 0); @@ -35,18 +35,6 @@ static int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info * #endif } -static inline int bch2_maybe_casefold(struct btree_trans *trans, - const struct bch_hash_info *info, - const struct qstr *str, struct qstr *out_cf) -{ - if (likely(!info->cf_encoding)) { - *out_cf = *str; - return 0; - } else { - return bch2_casefold(trans, info, str, out_cf); - } -} - static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { if (bkey_val_bytes(d.k) < offsetof(struct bch_dirent, d_name)) @@ -287,8 +275,8 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent, EBUG_ON(!dirent->v.d_casefold); EBUG_ON(!cf_name->len); - dirent->v.d_cf_name_block.d_name_len = name->len; - dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len; + dirent->v.d_cf_name_block.d_name_len = cpu_to_le16(name->len); + dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_name->len); memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len); memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0, @@ -300,6 +288,7 @@ static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent, } static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, + const struct bch_hash_info *hash_info, subvol_inum dir, u8 type, const struct qstr *name, @@ -307,10 +296,19 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, u64 dst) { struct bkey_i_dirent *dirent; + struct qstr _cf_name; if (name->len > BCH_NAME_MAX) return ERR_PTR(-ENAMETOOLONG); + if (hash_info->cf_encoding && !cf_name) { + int ret = bch2_casefold(trans, hash_info, name, &_cf_name); + if (ret) + return ERR_PTR(ret); + + cf_name = &_cf_name; + } + dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst); if (IS_ERR(dirent)) return dirent; @@ -336,7 +334,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, struct bkey_i_dirent *dirent; int ret; - dirent = dirent_create_key(trans, dir_inum, type, name, NULL, dst_inum); + dirent = dirent_create_key(trans, hash_info, dir_inum, type, name, NULL, dst_inum); ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; @@ -345,8 +343,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, dirent->k.p.snapshot = snapshot; ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, - dir_inum, snapshot, &dirent->k_i, - flags|BTREE_UPDATE_internal_snapshot_node); + dir_inum, snapshot, &dirent->k_i, flags); *dir_offset = dirent->k.p.offset; return ret; @@ -356,28 +353,16 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, const struct bch_hash_info *hash_info, u8 type, const struct qstr *name, u64 dst_inum, u64 *dir_offset, - u64 *i_size, enum btree_iter_update_trigger_flags flags) { struct bkey_i_dirent *dirent; int ret; - if (hash_info->cf_encoding) { - struct qstr cf_name; - ret = bch2_casefold(trans, hash_info, name, &cf_name); - if (ret) - return ret; - dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum); - } else { - dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum); - } - + dirent = dirent_create_key(trans, hash_info, dir, type, name, NULL, dst_inum); ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; - *i_size += bkey_bytes(&dirent->k); - ret = bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info, dir, &dirent->k_i, flags); *dir_offset = dirent->k.p.offset; @@ -410,8 +395,8 @@ int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir, } int bch2_dirent_rename(struct btree_trans *trans, - subvol_inum src_dir, struct bch_hash_info *src_hash, u64 *src_dir_i_size, - subvol_inum dst_dir, struct bch_hash_info *dst_hash, u64 *dst_dir_i_size, + subvol_inum src_dir, struct bch_hash_info *src_hash, + subvol_inum dst_dir, struct bch_hash_info *dst_hash, const struct qstr *src_name, subvol_inum *src_inum, u64 *src_offset, const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, enum bch_rename_mode mode) @@ -478,7 +463,7 @@ int bch2_dirent_rename(struct btree_trans *trans, *src_offset = dst_iter.pos.offset; /* Create new dst key: */ - new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, + new_dst = dirent_create_key(trans, dst_hash, dst_dir, 0, dst_name, dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0); ret = PTR_ERR_OR_ZERO(new_dst); if (ret) @@ -489,7 +474,7 @@ int bch2_dirent_rename(struct btree_trans *trans, /* Create new src key: */ if (mode == BCH_RENAME_EXCHANGE) { - new_src = dirent_create_key(trans, src_dir, 0, src_name, + new_src = dirent_create_key(trans, src_hash, src_dir, 0, src_name, src_hash->cf_encoding ? &src_name_lookup : NULL, 0); ret = PTR_ERR_OR_ZERO(new_src); if (ret) @@ -550,14 +535,6 @@ int bch2_dirent_rename(struct btree_trans *trans, new_src->v.d_type == DT_SUBVOL) new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol); - if (old_dst.k) - *dst_dir_i_size -= bkey_bytes(old_dst.k); - *src_dir_i_size -= bkey_bytes(old_src.k); - - if (mode == BCH_RENAME_EXCHANGE) - *src_dir_i_size += bkey_bytes(&new_src->k); - *dst_dir_i_size += bkey_bytes(&new_dst->k); - ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0); if (ret) goto out; @@ -697,7 +674,7 @@ static int bch2_dir_emit(struct dir_context *ctx, struct bkey_s_c_dirent d, subv vfs_d_type(d.v->d_type)); if (ret) ctx->pos = d.k->p.offset + 1; - return ret; + return !ret; } int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) @@ -722,7 +699,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx) if (ret2 > 0) continue; - ret2 ?: drop_locks_do(trans, bch2_dir_emit(ctx, dirent, target)); + ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); }))); bch2_bkey_buf_exit(&sk, c); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 0880772b80a9b1..999b895fa28a86 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,6 +23,21 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, + const struct qstr *, struct qstr *); + +static inline int bch2_maybe_casefold(struct btree_trans *trans, + const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + if (likely(!info->cf_encoding)) { + *out_cf = *str; + return 0; + } else { + return bch2_casefold(trans, info, str, out_cf); + } +} + struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d); static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len) @@ -50,7 +65,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32, enum btree_iter_update_trigger_flags); int bch2_dirent_create(struct btree_trans *, subvol_inum, const struct bch_hash_info *, u8, - const struct qstr *, u64, u64 *, u64 *, + const struct qstr *, u64, u64 *, enum btree_iter_update_trigger_flags); static inline unsigned vfs_d_type(unsigned type) @@ -65,8 +80,8 @@ enum bch_rename_mode { }; int bch2_dirent_rename(struct btree_trans *, - subvol_inum, struct bch_hash_info *, u64 *, - subvol_inum, struct bch_hash_info *, u64 *, + subvol_inum, struct bch_hash_info *, + subvol_inum, struct bch_hash_info *, const struct qstr *, subvol_inum *, u64 *, const struct qstr *, subvol_inum *, u64 *, enum bch_rename_mode); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index b007319b72e91c..1f0422bfae359f 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -376,6 +376,19 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, return ret; } +int bch2_accounting_mem_insert_locked(struct bch_fs *c, struct bkey_s_c_accounting a, + enum bch_accounting_mode mode) +{ + struct bch_replicas_padded r; + + if (mode != BCH_ACCOUNTING_read && + accounting_to_replicas(&r.e, a.k->p) && + !bch2_replicas_marked_locked(c, &r.e)) + return -BCH_ERR_btree_insert_need_mark_replicas; + + return __bch2_accounting_mem_insert(c, a); +} + static bool accounting_mem_entry_is_zero(struct accounting_mem_entry *e) { for (unsigned i = 0; i < e->nr_counters; i++) @@ -583,7 +596,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) accounting_key_init(&k_i.k, &acc_k, src_v, nr); bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), - BCH_ACCOUNTING_normal); + BCH_ACCOUNTING_normal, true); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -612,7 +625,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) percpu_down_read(&c->mark_lock); int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), - BCH_ACCOUNTING_read); + BCH_ACCOUNTING_read, false); percpu_up_read(&c->mark_lock); return ret; } diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index abb1f6206fe928..d557b99b3c0ae6 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -136,6 +136,7 @@ enum bch_accounting_mode { }; int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); +int bch2_accounting_mem_insert_locked(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); void bch2_accounting_mem_gc(struct bch_fs *); static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) @@ -150,7 +151,8 @@ static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) */ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, - enum bch_accounting_mode mode) + enum bch_accounting_mode mode, + bool write_locked) { struct bch_fs *c = trans->c; struct bch_accounting_mem *acc = &c->accounting; @@ -189,7 +191,11 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, mode); + int ret = 0; + if (unlikely(write_locked)) + ret = bch2_accounting_mem_insert_locked(c, a, mode); + else + ret = bch2_accounting_mem_insert(c, a, mode); if (ret) return ret; } @@ -206,7 +212,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); percpu_up_read(&trans->c->mark_lock); return ret; } @@ -259,7 +265,7 @@ static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, EBUG_ON(bversion_zero(a->k.bversion)); return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply)) - ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal) + ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal, false) : 0; } @@ -271,7 +277,7 @@ static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans struct bkey_s_accounting a = accounting_i_to_s(a_i); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); + bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal, false); bch2_accounting_neg(a); } } diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 1186280b29e903..2ca3cbf12b7135 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -470,23 +470,22 @@ void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { - struct bch_member *mi; - int ret, v = -1; + lockdep_assert_held(&c->sb_lock); - if (!strlen(name) || !strcmp(name, "none")) - return 0; - v = bch2_disk_path_find_or_create(&c->disk_sb, name); - if (v < 0) - return v; + if (!strlen(name) || !strcmp(name, "none")) { + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, 0); + } else { + int v = bch2_disk_path_find_or_create(&c->disk_sb, name); + if (v < 0) + return v; - ret = bch2_sb_disk_groups_to_cpu(c); - if (ret) - return ret; + struct bch_member *mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_GROUP(mi, v + 1); + } - mi = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_GROUP(mi, v + 1); - return 0; + return bch2_sb_disk_groups_to_cpu(c); } int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index a396865e8b176b..c6cb26981923dd 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -507,20 +507,14 @@ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s, static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) { - switch (k.k->type) { - case KEY_TYPE_extent: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - - extent_for_each_entry(e, entry) - if (extent_entry_type(entry) == - BCH_EXTENT_ENTRY_stripe_ptr && - entry->stripe_ptr.idx == idx) - return true; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; - break; - } - } + bkey_extent_entry_for_each(ptrs, entry) + if (extent_entry_type(entry) == + BCH_EXTENT_ENTRY_stripe_ptr && + entry->stripe_ptr.idx == idx) + return true; return false; } @@ -2204,10 +2198,10 @@ void bch2_fs_ec_stop(struct bch_fs *c) static bool bch2_fs_ec_flush_done(struct bch_fs *c) { - bool ret; + sched_annotate_sleep(); mutex_lock(&c->ec_stripe_new_lock); - ret = list_empty(&c->ec_stripe_new_list); + bool ret = list_empty(&c->ec_stripe_new_list); mutex_unlock(&c->ec_stripe_new_lock); return ret; diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 62d27e04d763f5..51893e1ee874f0 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -160,6 +160,7 @@ static inline void gc_stripe_unlock(struct gc_stripe *s) BUILD_BUG_ON(!((union ulong_byte_assert) { .ulong = 1UL << BUCKET_LOCK_BITNR }).byte); clear_bit_unlock(BUCKET_LOCK_BITNR, (void *) &s->lock); + smp_mb__after_atomic(); wake_up_bit((void *) &s->lock, BUCKET_LOCK_BITNR); } diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index c8696f01eb14f6..346766299cb34a 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -209,6 +209,8 @@ x(EINVAL, remove_would_lose_data) \ x(EINVAL, no_resize_with_buckets_nouse) \ x(EINVAL, inode_unpack_error) \ + x(EINVAL, inode_not_unlinked) \ + x(EINVAL, inode_has_child_snapshot) \ x(EINVAL, varint_decode_error) \ x(EINVAL, erasure_coding_found_btree_node) \ x(EOPNOTSUPP, may_not_use_incompat_feature) \ @@ -269,7 +271,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ - x(EIO, journal_shutdown) \ + x(EROFS, journal_shutdown) \ x(EIO, journal_flush_err) \ x(EIO, journal_write_err) \ x(EIO, btree_node_read_err) \ @@ -287,7 +289,7 @@ x(EIO, mark_stripe) \ x(EIO, stripe_reconstruct) \ x(EIO, key_type_error) \ - x(EIO, extent_poisened) \ + x(EIO, extent_poisoned) \ x(EIO, missing_indirect_extent) \ x(EIO, invalidate_stripe_to_dev) \ x(EIO, no_encryption_key) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index baf5dfb3229843..6b8695b1349c96 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -272,9 +272,6 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, { struct fsck_err_state *s; - if (!test_bit(BCH_FS_fsck_running, &c->flags)) - return NULL; - list_for_each_entry(s, &c->fsck_error_msgs, list) if (s->id == id) { /* @@ -481,7 +478,9 @@ int __bch2_fsck_err(struct bch_fs *c, } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { if (c->opts.errors != BCH_ON_ERROR_continue || !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { - prt_str(out, ", shutting down"); + prt_str_indented(out, ", shutting down\n" + "error not marked as autofix and not in fsck\n" + "run fsck, and forward to devs so error can be marked for self-healing"); inconsistent = true; print = true; ret = -BCH_ERR_fsck_errors_not_fixed; @@ -639,14 +638,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, return ret; } -void bch2_flush_fsck_errs(struct bch_fs *c) +static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) { struct fsck_err_state *s, *n; mutex_lock(&c->fsck_error_msgs_lock); list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { - if (s->ratelimited && s->last_msg) + if (print && s->ratelimited && s->last_msg) bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); list_del(&s->list); @@ -657,6 +656,16 @@ void bch2_flush_fsck_errs(struct bch_fs *c) mutex_unlock(&c->fsck_error_msgs_lock); } +void bch2_flush_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, true); +} + +void bch2_free_fsck_errs(struct bch_fs *c) +{ + __bch2_flush_fsck_errs(c, false); +} + int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum, u64 offset) { diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index d0d024dc714b11..4a364fd44abe0e 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -93,6 +93,7 @@ int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, _flags, BCH_FSCK_ERR_##_err_type, __VA_ARGS__) void bch2_flush_fsck_errs(struct bch_fs *); +void bch2_free_fsck_errs(struct bch_fs *); #define fsck_err_wrap(_do) \ ({ \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index ae7c7a177e10b2..e597fb9c98236c 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -139,7 +139,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); if (bch2_bkey_extent_ptrs_flags(ptrs) & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) - return -BCH_ERR_extent_poisened; + return -BCH_ERR_extent_poisoned; rcu_read_lock(); const union bch_extent_entry *entry; @@ -1056,8 +1056,9 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, struct bch_extent_ptr *ptr) { - if (!opts->promote_target || - !bch2_dev_in_target(c, ptr->dev, opts->promote_target)) + unsigned target = opts->promote_target ?: opts->foreground_target; + + if (target && !bch2_dev_in_target(c, ptr->dev, target)) return false; struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index e78a39e7e18f9a..9fe153183b36ec 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -380,13 +380,6 @@ out: \ /* Iterate over pointers in KEY_TYPE_extent: */ -#define extent_for_each_entry_from(_e, _entry, _start) \ - __bkey_extent_entry_for_each_from(_start, \ - extent_entry_last(_e), _entry) - -#define extent_for_each_entry(_e, _entry) \ - extent_for_each_entry_from(_e, _entry, (_e).v->start) - #define extent_ptr_next(_e, _ptr) \ __bkey_ptr_next(_ptr, extent_entry_last(_e)) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 19d4599918dc59..e3a75dcca60c81 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -225,11 +225,26 @@ static void bchfs_read(struct btree_trans *trans, bch2_read_extent(trans, rbio, iter.pos, data_btree, k, offset_into_extent, flags); - swap(rbio->bio.bi_iter.bi_size, bytes); + /* + * Careful there's a landmine here if bch2_read_extent() ever + * starts returning transaction restarts here. + * + * We've changed rbio->bi_iter.bi_size to be "bytes we can read + * from this extent" with the swap call, and we restore it + * below. That restore needs to come before checking for + * errors. + * + * But unlike __bch2_read(), we use the rbio bvec iter, not one + * on the stack, so we can't do the restore right after the + * bch2_read_extent() call: we don't own that iterator anymore + * if BCH_READ_last_fragment is set, since we may have submitted + * that rbio instead of cloning it. + */ if (flags & BCH_READ_last_fragment) break; + swap(rbio->bio.bi_iter.bi_size, bytes); bio_advance(&rbio->bio, bytes); err: if (ret && diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index e072900e6a5b9e..fbae9c1de746a1 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -605,10 +605,14 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) struct address_space *mapping = file->f_mapping; struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch2_folio_reservation res; - unsigned len; - loff_t isize; vm_fault_t ret; + loff_t file_offset = round_down(vmf->pgoff << PAGE_SHIFT, block_bytes(c)); + unsigned offset = file_offset - folio_pos(folio); + unsigned len = max(PAGE_SIZE, block_bytes(c)); + + BUG_ON(offset + len > folio_size(folio)); + bch2_folio_reservation_init(c, inode, &res); sb_start_pagefault(inode->v.i_sb); @@ -623,24 +627,24 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) bch2_pagecache_add_get(inode); folio_lock(folio); - isize = i_size_read(&inode->v); + u64 isize = i_size_read(&inode->v); - if (folio->mapping != mapping || folio_pos(folio) >= isize) { + if (folio->mapping != mapping || file_offset >= isize) { folio_unlock(folio); ret = VM_FAULT_NOPAGE; goto out; } - len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio)); + len = min_t(unsigned, len, isize - file_offset); if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?: - bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) { + bch2_folio_reservation_get(c, inode, folio, &res, offset, len)) { folio_unlock(folio); ret = VM_FAULT_SIGBUS; goto out; } - bch2_set_folio_dirty(c, inode, folio, &res, 0, len); + bch2_set_folio_dirty(c, inode, folio, &res, offset, len); bch2_folio_reservation_put(c, inode, &res); folio_wait_stable(folio); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 65c2c33d253dff..9657144666b8b2 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -144,10 +144,25 @@ int __must_check bch2_write_inode_size(struct bch_fs *c, void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { - bch2_fs_inconsistent_on((s64) inode->v.i_blocks + sectors < 0, c, - "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, sectors, - inode->ei_inode.bi_sectors); + if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, sectors, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, buf.buf, &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (sectors < 0) + sectors = -inode->v.i_blocks; + else + sectors = 0; + } + inode->v.i_blocks += sectors; #ifdef CONFIG_BCACHEFS_QUOTA @@ -502,11 +517,22 @@ int bchfs_truncate(struct mnt_idmap *idmap, goto err; } - bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks && - !bch2_journal_error(&c->journal), c, - "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", - inode->v.i_ino, (u64) inode->v.i_blocks, - inode->ei_inode.bi_sectors); + if (unlikely(!inode->v.i_size && inode->v.i_blocks && + !bch2_journal_error(&c->journal))) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", + inode->v.i_ino, (u64) inode->v.i_blocks, + inode->ei_inode.bi_sectors); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + } ret = bch2_setattr_nonsize(idmap, inode, iattr); err: diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index c1553e44e049a3..a82dfce9e4ad37 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -21,206 +21,6 @@ #define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ -struct flags_set { - unsigned mask; - unsigned flags; - - unsigned projid; - - bool set_projinherit; - bool projinherit; -}; - -static int bch2_inode_flags_set(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct bch_fs *c = inode->v.i_sb->s_fs_info; - /* - * We're relying on btree locking here for exclusion with other ioctl - * calls - use the flags in the btree (@bi), not inode->i_flags: - */ - struct flags_set *s = p; - unsigned newflags = s->flags; - unsigned oldflags = bi->bi_flags & s->mask; - - if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - - if (!S_ISREG(bi->bi_mode) && - !S_ISDIR(bi->bi_mode) && - (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) - return -EINVAL; - - if ((newflags ^ oldflags) & BCH_INODE_casefolded) { -#ifdef CONFIG_UNICODE - int ret = 0; - /* Not supported on individual files. */ - if (!S_ISDIR(bi->bi_mode)) - return -EOPNOTSUPP; - - /* - * Make sure the dir is empty, as otherwise we'd need to - * rehash everything and update the dirent keys. - */ - ret = bch2_empty_dir_trans(trans, inode_inum(inode)); - if (ret < 0) - return ret; - - ret = bch2_request_incompat_feature(c,bcachefs_metadata_version_casefolding); - if (ret) - return ret; - - bch2_check_set_feature(c, BCH_FEATURE_casefolding); -#else - printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); - return -EOPNOTSUPP; -#endif - } - - if (s->set_projinherit) { - bi->bi_fields_set &= ~(1 << Inode_opt_project); - bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); - } - - bi->bi_flags &= ~s->mask; - bi->bi_flags |= newflags; - - bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); - return 0; -} - -static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) -{ - unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); - - return put_user(flags, arg); -} - -static int bch2_ioc_setflags(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - void __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; - unsigned uflags; - int ret; - - if (get_user(uflags, (int __user *) arg)) - return -EFAULT; - - s.flags = map_flags_rev(bch_flags_to_uflags, uflags); - if (uflags) - return -EOPNOTSUPP; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto setflags_out; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_write_inode(c, inode, bch2_inode_flags_set, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); - -setflags_out: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - -static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct fsxattr fa = { 0 }; - - fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); - - if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) - fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; - - fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; - - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; - - return 0; -} - -static int fssetxattr_inode_update_fn(struct btree_trans *trans, - struct bch_inode_info *inode, - struct bch_inode_unpacked *bi, - void *p) -{ - struct flags_set *s = p; - - if (s->projid != bi->bi_project) { - bi->bi_fields_set |= 1U << Inode_opt_project; - bi->bi_project = s->projid; - } - - return bch2_inode_flags_set(trans, inode, bi, p); -} - -static int bch2_ioc_fssetxattr(struct bch_fs *c, - struct file *file, - struct bch_inode_info *inode, - struct fsxattr __user *arg) -{ - struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; - struct fsxattr fa; - int ret; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - s.set_projinherit = true; - s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; - fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; - - s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); - if (fa.fsx_xflags) - return -EOPNOTSUPP; - - if (fa.fsx_projid >= U32_MAX) - return -EINVAL; - - /* - * inode fields accessible via the xattr interface are stored with a +1 - * bias, so that 0 means unset: - */ - s.projid = fa.fsx_projid + 1; - - ret = mnt_want_write_file(file); - if (ret) - return ret; - - inode_lock(&inode->v); - if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { - ret = -EACCES; - goto err; - } - - mutex_lock(&inode->ei_update_lock); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - bch2_set_projid(c, inode, fa.fsx_projid) ?: - bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, - ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); -err: - inode_unlock(&inode->v); - mnt_drop_write_file(file); - return ret; -} - static int bch2_reinherit_attrs_fn(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -558,23 +358,6 @@ long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) long ret; switch (cmd) { - case FS_IOC_GETFLAGS: - ret = bch2_ioc_getflags(inode, (int __user *) arg); - break; - - case FS_IOC_SETFLAGS: - ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); - break; - - case FS_IOC_FSGETXATTR: - ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); - break; - - case FS_IOC_FSSETXATTR: - ret = bch2_ioc_fssetxattr(c, file, inode, - (void __user *) arg); - break; - case BCHFS_IOC_REINHERIT_ATTRS: ret = bch2_ioc_reinherit_attrs(c, file, inode, (void __user *) arg); diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index ecd3bfdcde2183..a657e4994b7153 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -2,81 +2,6 @@ #ifndef _BCACHEFS_FS_IOCTL_H #define _BCACHEFS_FS_IOCTL_H -/* Inode flags: */ - -/* bcachefs inode flags -> vfs inode flags: */ -static const __maybe_unused unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_sync] = S_SYNC, - [__BCH_INODE_immutable] = S_IMMUTABLE, - [__BCH_INODE_append] = S_APPEND, - [__BCH_INODE_noatime] = S_NOATIME, - [__BCH_INODE_casefolded] = S_CASEFOLD, -}; - -/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ -static const __maybe_unused unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_sync] = FS_SYNC_FL, - [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, - [__BCH_INODE_append] = FS_APPEND_FL, - [__BCH_INODE_nodump] = FS_NODUMP_FL, - [__BCH_INODE_noatime] = FS_NOATIME_FL, - [__BCH_INODE_casefolded] = FS_CASEFOLD_FL, -}; - -/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ -static const __maybe_unused unsigned bch_flags_to_xflags[] = { - [__BCH_INODE_sync] = FS_XFLAG_SYNC, - [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, - [__BCH_INODE_append] = FS_XFLAG_APPEND, - [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, - [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, - //[__BCH_INODE_PROJINHERIT] = FS_XFLAG_PROJINHERIT; -}; - -#define set_flags(_map, _in, _out) \ -do { \ - unsigned _i; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & (1 << _i)) \ - (_out) |= _map[_i]; \ - else \ - (_out) &= ~_map[_i]; \ -} while (0) - -#define map_flags(_map, _in) \ -({ \ - unsigned _out = 0; \ - \ - set_flags(_map, _in, _out); \ - _out; \ -}) - -#define map_flags_rev(_map, _in) \ -({ \ - unsigned _i, _out = 0; \ - \ - for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ - if ((_in) & _map[_i]) { \ - (_out) |= 1 << _i; \ - (_in) &= ~_map[_i]; \ - } \ - (_out); \ -}) - -#define map_defined(_map) \ -({ \ - unsigned _in = ~0; \ - \ - map_flags_rev(_map, _in); \ -}) - -/* Set VFS inode flags from bcachefs inode: */ -static inline void bch2_inode_flags_to_vfs(struct bch_inode_info *inode) -{ - set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); -} - long bch2_fs_file_ioctl(struct file *, unsigned, unsigned long); long bch2_compat_fs_ioctl(struct file *, unsigned, unsigned long); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 5a41b1a8e54fd8..8a47ce3467e8d2 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,24 @@ static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, struct bch_subvolume *); +/* Set VFS inode flags from bcachefs inode: */ +static inline void bch2_inode_flags_to_vfs(struct bch_fs *c, struct bch_inode_info *inode) +{ + static const __maybe_unused unsigned bch_flags_to_vfs[] = { + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, + }; + + set_flags(bch_flags_to_vfs, inode->ei_inode.bi_flags, inode->v.i_flags); + + if (bch2_inode_casefold(c, &inode->ei_inode)) + inode->v.i_flags |= S_CASEFOLD; + else + inode->v.i_flags &= ~S_CASEFOLD; +} + void bch2_inode_update_after_write(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, @@ -79,7 +98,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, inode->ei_inode = *bi; - bch2_inode_flags_to_vfs(inode); + bch2_inode_flags_to_vfs(c, inode); } int __must_check bch2_write_inode(struct bch_fs *c, @@ -631,13 +650,18 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, const struct qstr *name) { struct bch_fs *c = trans->c; - struct btree_iter dirent_iter = {}; subvol_inum inum = {}; struct printbuf buf = PRINTBUF; + struct qstr lookup_name; + int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); + if (ret) + return ERR_PTR(ret); + + struct btree_iter dirent_iter = {}; struct bkey_s_c k = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc, - dir_hash_info, dir, name, 0); - int ret = bkey_err(k); + dir_hash_info, dir, &lookup_name, 0); + ret = bkey_err(k); if (ret) return ERR_PTR(ret); @@ -825,6 +849,9 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, */ set_nlink(&inode->v, 0); } + + if (IS_CASEFOLDED(vdir)) + d_invalidate(dentry); err: bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); @@ -1235,10 +1262,20 @@ static int bch2_tmpfile(struct mnt_idmap *idmap, return finish_open_simple(file, 0); } +struct bch_fiemap_extent { + struct bkey_buf kbuf; + unsigned flags; +}; + static int bch2_fill_extent(struct bch_fs *c, struct fiemap_extent_info *info, - struct bkey_s_c k, unsigned flags) + struct bch_fiemap_extent *fe) { + struct bkey_s_c k = bkey_i_to_s_c(fe->kbuf.k); + unsigned flags = fe->flags; + + BUG_ON(!k.k->size); + if (bkey_extent_is_direct_data(k.k)) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1291,110 +1328,225 @@ static int bch2_fill_extent(struct bch_fs *c, } } -static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, - u64 start, u64 len) +/* + * Scan a range of an inode for data in pagecache. + * + * Intended to be retryable, so don't modify the output params until success is + * imminent. + */ +static int +bch2_fiemap_hole_pagecache(struct inode *vinode, u64 *start, u64 *end, + bool nonblock) { - struct bch_fs *c = vinode->i_sb->s_fs_info; - struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans *trans; - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_buf cur, prev; - bool have_extent = false; - int ret = 0; + loff_t dstart, dend; - ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); - if (ret) + dstart = bch2_seek_pagecache_data(vinode, *start, *end, 0, nonblock); + if (dstart < 0) + return dstart; + + if (dstart == *end) { + *start = dstart; + return 0; + } + + dend = bch2_seek_pagecache_hole(vinode, dstart, *end, 0, nonblock); + if (dend < 0) + return dend; + + /* race */ + BUG_ON(dstart == dend); + + *start = dstart; + *end = dend; + return 0; +} + +/* + * Scan a range of pagecache that corresponds to a file mapping hole in the + * extent btree. If data is found, fake up an extent key so it looks like a + * delalloc extent to the rest of the fiemap processing code. + */ +static int +bch2_next_fiemap_pagecache_extent(struct btree_trans *trans, struct bch_inode_info *inode, + u64 start, u64 end, struct bch_fiemap_extent *cur) +{ + struct bch_fs *c = trans->c; + struct bkey_i_extent *delextent; + struct bch_extent_ptr ptr = {}; + loff_t dstart = start << 9, dend = end << 9; + int ret; + + /* + * We hold btree locks here so we cannot block on folio locks without + * dropping trans locks first. Run a nonblocking scan for the common + * case of no folios over holes and fall back on failure. + * + * Note that dropping locks like this is technically racy against + * writeback inserting to the extent tree, but a non-sync fiemap scan is + * fundamentally racy with writeback anyways. Therefore, just report the + * range as delalloc regardless of whether we have to cycle trans locks. + */ + ret = bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, true); + if (ret == -EAGAIN) + ret = drop_locks_do(trans, + bch2_fiemap_hole_pagecache(&inode->v, &dstart, &dend, false)); + if (ret < 0) return ret; - struct bpos end = POS(ei->v.i_ino, (start + len) >> 9); - if (start + len < start) - return -EINVAL; + /* + * Create a fake extent key in the buffer. We have to add a dummy extent + * pointer for the fill code to add an extent entry. It's explicitly + * zeroed to reflect delayed allocation (i.e. phys offset 0). + */ + bch2_bkey_buf_realloc(&cur->kbuf, c, sizeof(*delextent) / sizeof(u64)); + delextent = bkey_extent_init(cur->kbuf.k); + delextent->k.p = POS(inode->ei_inum.inum, dend >> 9); + delextent->k.size = (dend - dstart) >> 9; + bch2_bkey_append_ptr(&delextent->k_i, ptr); - start >>= 9; + cur->flags = FIEMAP_EXTENT_DELALLOC; - bch2_bkey_buf_init(&cur); - bch2_bkey_buf_init(&prev); - trans = bch2_trans_get(c); + return 0; +} + +static int bch2_next_fiemap_extent(struct btree_trans *trans, + struct bch_inode_info *inode, + u64 start, u64 end, + struct bch_fiemap_extent *cur) +{ + u32 snapshot; + int ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot); + if (ret) + return ret; + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - POS(ei->v.i_ino, start), 0); + SPOS(inode->ei_inum.inum, start, snapshot), 0); - while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - enum btree_id data_btree = BTREE_ID_extents; + struct bkey_s_c k = + bch2_btree_iter_peek_max(trans, &iter, POS(inode->ei_inum.inum, end)); + ret = bkey_err(k); + if (ret) + goto err; - bch2_trans_begin(trans); + u64 pagecache_end = k.k ? max(start, bkey_start_offset(k.k)) : end; - u32 snapshot; - ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); - if (ret) - continue; + ret = bch2_next_fiemap_pagecache_extent(trans, inode, start, pagecache_end, cur); + if (ret) + goto err; - bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + struct bpos pagecache_start = bkey_start_pos(&cur->kbuf.k->k); - k = bch2_btree_iter_peek_max(trans, &iter, end); - ret = bkey_err(k); + /* + * Does the pagecache or the btree take precedence? + * + * It _should_ be the pagecache, so that we correctly report delalloc + * extents when dirty in the pagecache (we're COW, after all). + * + * But we'd have to add per-sector writeback tracking to + * bch_folio_state, otherwise we report delalloc extents for clean + * cached data in the pagecache. + * + * We should do this, but even then fiemap won't report stable mappings: + * on bcachefs data moves around in the background (copygc, rebalance) + * and we don't provide a way for userspace to lock that out. + */ + if (k.k && + bkey_le(bpos_max(iter.pos, bkey_start_pos(k.k)), + pagecache_start)) { + bch2_bkey_buf_reassemble(&cur->kbuf, trans->c, k); + bch2_cut_front(iter.pos, cur->kbuf.k); + bch2_cut_back(POS(inode->ei_inum.inum, end), cur->kbuf.k); + cur->flags = 0; + } else if (k.k) { + bch2_cut_back(bkey_start_pos(k.k), cur->kbuf.k); + } + + if (cur->kbuf.k->k.type == KEY_TYPE_reflink_p) { + unsigned sectors = cur->kbuf.k->k.size; + s64 offset_into_extent = 0; + enum btree_id data_btree = BTREE_ID_extents; + ret = bch2_read_indirect_extent(trans, &data_btree, &offset_into_extent, + &cur->kbuf); if (ret) - continue; + goto err; - if (!k.k) - break; + struct bkey_i *k = cur->kbuf.k; + sectors = min_t(unsigned, sectors, k->k.size - offset_into_extent); - if (!bkey_extent_is_data(k.k) && - k.k->type != KEY_TYPE_reservation) { - bch2_btree_iter_advance(trans, &iter); - continue; - } + bch2_cut_front(POS(k->k.p.inode, + bkey_start_offset(&k->k) + offset_into_extent), + k); + bch2_key_resize(&k->k, sectors); + k->k.p = iter.pos; + k->k.p.offset += k->k.size; + } +err: + bch2_trans_iter_exit(trans, &iter); + return ret; +} - s64 offset_into_extent = iter.pos.offset - bkey_start_offset(k.k); - unsigned sectors = k.k->size - offset_into_extent; +static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, + u64 start, u64 len) +{ + struct bch_fs *c = vinode->i_sb->s_fs_info; + struct bch_inode_info *ei = to_bch_ei(vinode); + struct btree_trans *trans; + struct bch_fiemap_extent cur, prev; + int ret = 0; + + ret = fiemap_prep(&ei->v, info, start, &len, 0); + if (ret) + return ret; + + if (start + len < start) + return -EINVAL; + + start >>= 9; + u64 end = (start + len) >> 9; + + bch2_bkey_buf_init(&cur.kbuf); + bch2_bkey_buf_init(&prev.kbuf); + bkey_init(&prev.kbuf.k->k); - bch2_bkey_buf_reassemble(&cur, c, k); + trans = bch2_trans_get(c); - ret = bch2_read_indirect_extent(trans, &data_btree, - &offset_into_extent, &cur); + while (start < end) { + ret = lockrestart_do(trans, + bch2_next_fiemap_extent(trans, ei, start, end, &cur)); if (ret) - continue; + goto err; - k = bkey_i_to_s_c(cur.k); - bch2_bkey_buf_realloc(&prev, c, k.k->u64s); + BUG_ON(bkey_start_offset(&cur.kbuf.k->k) < start); + BUG_ON(cur.kbuf.k->k.p.offset > end); - sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); + if (bkey_start_offset(&cur.kbuf.k->k) == end) + break; - bch2_cut_front(POS(k.k->p.inode, - bkey_start_offset(k.k) + - offset_into_extent), - cur.k); - bch2_key_resize(&cur.k->k, sectors); - cur.k->k.p = iter.pos; - cur.k->k.p.offset += cur.k->k.size; + start = cur.kbuf.k->k.p.offset; - if (have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, - bkey_i_to_s_c(prev.k), 0); + ret = bch2_fill_extent(c, info, &prev); if (ret) - break; + goto err; } - bkey_copy(prev.k, cur.k); - have_extent = true; - - bch2_btree_iter_set_pos(trans, &iter, - POS(iter.pos.inode, iter.pos.offset + sectors)); + bch2_bkey_buf_copy(&prev.kbuf, c, cur.kbuf.k); + prev.flags = cur.flags; } - bch2_trans_iter_exit(trans, &iter); - if (!ret && have_extent) { + if (!bkey_deleted(&prev.kbuf.k->k)) { bch2_trans_unlock(trans); - ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k), - FIEMAP_EXTENT_LAST); + prev.flags |= FIEMAP_EXTENT_LAST; + ret = bch2_fill_extent(c, info, &prev); } - +err: bch2_trans_put(trans); - bch2_bkey_buf_exit(&cur, c); - bch2_bkey_buf_exit(&prev, c); - return ret < 0 ? ret : 0; + bch2_bkey_buf_exit(&cur.kbuf, c); + bch2_bkey_buf_exit(&prev.kbuf, c); + + return bch2_err_class(ret < 0 ? ret : 0); } static const struct vm_operations_struct bch_vm_ops = { @@ -1449,6 +1601,141 @@ static int bch2_open(struct inode *vinode, struct file *file) return generic_file_open(vinode, file); } +/* bcachefs inode flags -> FS_IOC_GETFLAGS: */ +static const __maybe_unused unsigned bch_flags_to_uflags[] = { + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, +}; + +/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ +static const __maybe_unused unsigned bch_flags_to_xflags[] = { + [__BCH_INODE_sync] = FS_XFLAG_SYNC, + [__BCH_INODE_immutable] = FS_XFLAG_IMMUTABLE, + [__BCH_INODE_append] = FS_XFLAG_APPEND, + [__BCH_INODE_nodump] = FS_XFLAG_NODUMP, + [__BCH_INODE_noatime] = FS_XFLAG_NOATIME, +}; + +static int bch2_fileattr_get(struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + fileattr_fill_xflags(fa, map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags)); + + if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) + fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; + + if (bch2_inode_casefold(c, &inode->ei_inode)) + fa->flags |= FS_CASEFOLD_FL; + + fa->fsx_projid = inode->ei_qid.q[QTYP_PRJ]; + return 0; +} + +struct flags_set { + unsigned mask; + unsigned flags; + unsigned projid; + bool set_project; + bool set_casefold; + bool casefold; +}; + +static int fssetxattr_inode_update_fn(struct btree_trans *trans, + struct bch_inode_info *inode, + struct bch_inode_unpacked *bi, + void *p) +{ + struct bch_fs *c = trans->c; + struct flags_set *s = p; + + /* + * We're relying on btree locking here for exclusion with other ioctl + * calls - use the flags in the btree (@bi), not inode->i_flags: + */ + if (!S_ISREG(bi->bi_mode) && + !S_ISDIR(bi->bi_mode) && + (s->flags & (BCH_INODE_nodump|BCH_INODE_noatime)) != s->flags) + return -EINVAL; + + if (s->casefold != bch2_inode_casefold(c, bi)) { + int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->casefold); + if (ret) + return ret; + } + + if (s->set_project) { + bi->bi_project = s->projid; + bi->bi_fields_set |= BIT(Inode_opt_project); + } + + bi->bi_flags &= ~s->mask; + bi->bi_flags |= s->flags; + + bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); + return 0; +} + +static int bch2_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, + struct fileattr *fa) +{ + struct bch_inode_info *inode = to_bch_ei(d_inode(dentry)); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + struct flags_set s = {}; + int ret; + + if (fa->fsx_valid) { + fa->fsx_xflags &= ~FS_XFLAG_PROJINHERIT; + + s.mask = map_defined(bch_flags_to_xflags); + s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags); + if (fa->fsx_xflags) + return -EOPNOTSUPP; + + if (fa->fsx_projid >= U32_MAX) + return -EINVAL; + + /* + * inode fields accessible via the xattr interface are stored with a +1 + * bias, so that 0 means unset: + */ + if ((inode->ei_inode.bi_project || + fa->fsx_projid) && + inode->ei_inode.bi_project != fa->fsx_projid + 1) { + s.projid = fa->fsx_projid + 1; + s.set_project = true; + } + } + + if (fa->flags_valid) { + s.mask = map_defined(bch_flags_to_uflags); + + s.set_casefold = true; + s.casefold = (fa->flags & FS_CASEFOLD_FL) != 0; + fa->flags &= ~FS_CASEFOLD_FL; + + s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); + if (fa->flags) + return -EOPNOTSUPP; + } + + mutex_lock(&inode->ei_update_lock); + ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: + (s.set_project + ? bch2_set_projid(c, inode, fa->fsx_projid) + : 0) ?: + bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ATTR_CTIME); + mutex_unlock(&inode->ei_update_lock); + return ret; +} + static const struct file_operations bch_file_operations = { .open = bch2_open, .llseek = bch2_llseek, @@ -1476,6 +1763,8 @@ static const struct inode_operations bch_file_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_dir_inode_operations = { @@ -1496,6 +1785,8 @@ static const struct inode_operations bch_dir_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct file_operations bch_dir_file_operations = { @@ -1518,6 +1809,8 @@ static const struct inode_operations bch_symlink_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct inode_operations bch_special_inode_operations = { @@ -1528,6 +1821,8 @@ static const struct inode_operations bch_special_inode_operations = { .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif + .fileattr_get = bch2_fileattr_get, + .fileattr_set = bch2_fileattr_set, }; static const struct address_space_operations bch_address_space_operations = { @@ -1886,7 +2181,13 @@ static void bch2_evict_inode(struct inode *vinode) KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, KEY_TYPE_QUOTA_WARN); - bch2_inode_rm(c, inode_inum(inode)); + int ret = bch2_inode_rm(c, inode_inum(inode)); + if (ret && !bch2_err_matches(ret, EROFS)) { + bch_err_msg(c, ret, "VFS incorrectly tried to delete inode %llu:%llu", + inode->ei_inum.subvol, + inode->ei_inum.inum); + bch2_sb_error_count(c, BCH_FSCK_ERR_vfs_bad_inode_rm); + } /* * If we are deleting, we need it present in the vfs hash table @@ -2185,10 +2486,9 @@ static int bch2_fs_get_tree(struct fs_context *fc) bch2_opts_apply(&c->opts, opts); - /* - * need to initialise sb and set c->vfs_sb _before_ starting fs, - * for blk_holder_ops - */ + ret = bch2_fs_start(c); + if (ret) + goto err_stop_fs; sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); @@ -2250,9 +2550,10 @@ static int bch2_fs_get_tree(struct fs_context *fc) sb->s_shrink->seeks = 0; - ret = bch2_fs_start(c); - if (ret) - goto err_put_super; +#ifdef CONFIG_UNICODE + sb->s_encoding = c->cf_encoding; +#endif + generic_set_sb_d_ops(sb); vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM); ret = PTR_ERR_OR_ZERO(vinode); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 18308f3d64a1cc..bf117f2225d8a0 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -306,6 +306,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, &lostfound_str, lostfound->bi_inum, &lostfound->bi_dir_offset, + BTREE_UPDATE_internal_snapshot_node| STR_HASH_must_create) ?: bch2_inode_write_flags(trans, &lostfound_iter, lostfound, BTREE_UPDATE_internal_snapshot_node); @@ -321,6 +322,31 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) inode->bi_subvol == BCACHEFS_ROOT_SUBVOL) return false; + /* + * Subvolume roots are special: older versions of subvolume roots may be + * disconnected, it's only the newest version that matters. + * + * We only keep a single dirent pointing to a subvolume root, i.e. + * older versions of snapshots will not have a different dirent pointing + * to the same subvolume root. + * + * This is because dirents that point to subvolumes are only visible in + * the parent subvolume - versioning is not needed - and keeping them + * around would break fsck, because when we're crossing subvolumes we + * don't have a consistent snapshot ID to do check the inode <-> dirent + * relationships. + * + * Thus, a subvolume root that's been renamed after a snapshot will have + * a disconnected older version - that's expected. + * + * Note that taking a snapshot always updates the root inode (to update + * the dirent backpointer), so a subvolume root inode with + * BCH_INODE_has_child_snapshot is never visible. + */ + if (inode->bi_subvol && + (inode->bi_flags & BCH_INODE_has_child_snapshot)) + return false; + return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked); } @@ -406,6 +432,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * &name, inode->bi_subvol ?: inode->bi_inum, &inode->bi_dir_offset, + BTREE_UPDATE_internal_snapshot_node| STR_HASH_must_create); if (ret) { bch_err_msg(c, ret, "error creating dirent"); @@ -1007,6 +1034,23 @@ static int check_inode_dirent_inode(struct btree_trans *trans, if (ret && !bch2_err_matches(ret, ENOENT)) return ret; + if ((ret || dirent_points_to_inode_nowarn(d, inode)) && + inode->bi_subvol && + (inode->bi_flags & BCH_INODE_has_child_snapshot)) { + /* Older version of a renamed subvolume root: we won't have a + * correct dirent for it. That's expected, see + * inode_should_reattach(). + * + * We don't clear the backpointer field when doing the rename + * because there might be arbitrarily many versions in older + * snapshots. + */ + inode->bi_dir = 0; + inode->bi_dir_offset = 0; + *write_inode = true; + goto out; + } + if (fsck_err_on(ret, trans, inode_points_to_missing_dirent, "inode points to missing dirent\n%s", @@ -1027,7 +1071,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, inode->bi_dir_offset = 0; *write_inode = true; } - +out: ret = 0; fsck_err: bch2_trans_iter_exit(trans, &dirent_iter); @@ -1139,6 +1183,14 @@ static int check_inode(struct btree_trans *trans, ret = 0; } + if (fsck_err_on(S_ISDIR(u.bi_mode) && u.bi_size, + trans, inode_dir_has_nonzero_i_size, + "directory %llu:%u with nonzero i_size %lli", + u.bi_inum, u.bi_snapshot, u.bi_size)) { + u.bi_size = 0; + do_update = true; + } + ret = bch2_inode_has_child_snapshots(trans, k.k->p); if (ret < 0) goto err; @@ -2146,6 +2198,41 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + /* check casefold */ + if (fsck_err_on(d.v->d_casefold != !!hash_info->cf_encoding, + trans, dirent_casefold_mismatch, + "dirent casefold does not match dir casefold\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), + buf.buf))) { + struct qstr name = bch2_dirent_get_name(d); + u32 subvol = d.v->d_type == DT_SUBVOL + ? d.v->d_parent_subvol + : 0; + u64 target = d.v->d_type == DT_SUBVOL + ? d.v->d_child_subvol + : d.v->d_inum; + u64 dir_offset; + + ret = bch2_hash_delete_at(trans, + bch2_dirent_hash_desc, hash_info, iter, + BTREE_UPDATE_internal_snapshot_node) ?: + bch2_dirent_create_snapshot(trans, subvol, + d.k->p.inode, d.k->p.snapshot, + hash_info, + d.v->d_type, + &name, + target, + &dir_offset, + BTREE_ITER_with_updates| + BTREE_UPDATE_internal_snapshot_node| + STR_HASH_must_create) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); + + /* might need another check_dirents pass */ + goto out; + } + if (d.v->d_type == DT_SUBVOL) { ret = check_dirent_to_subvol(trans, iter, d); if (ret) @@ -2404,7 +2491,7 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, u32 parent = le32_to_cpu(s.v->fs_path_parent); if (darray_u32_has(&subvol_path, parent)) { - if (fsck_err(c, subvol_loop, "subvolume loop")) + if (fsck_err(trans, subvol_loop, "subvolume loop")) ret = reattach_subvol(trans, s); break; } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index b51d98cf8a80d5..845efd429d13a8 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -14,6 +14,7 @@ #include "extent_update.h" #include "fs.h" #include "inode.h" +#include "namei.h" #include "opts.h" #include "str_hash.h" #include "snapshot.h" @@ -37,6 +38,7 @@ static const char * const bch2_inode_flag_strs[] = { #undef x static int delete_ancestor_snapshot_inodes(struct btree_trans *, struct bpos); +static int may_delete_deleted_inum(struct btree_trans *, subvol_inum); static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 }; @@ -1047,19 +1049,23 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) u32 snapshot; int ret; + ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum)); + if (ret) + goto err2; + /* * If this was a directory, there shouldn't be any real dirents left - * but there could be whiteouts (from hash collisions) that we should * delete: * - * XXX: the dirent could ideally would delete whiteouts when they're no + * XXX: the dirent code ideally would delete whiteouts when they're no * longer needed */ ret = bch2_inode_delete_keys(trans, inum, BTREE_ID_extents) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents); if (ret) - goto err; + goto err2; retry: bch2_trans_begin(trans); @@ -1204,6 +1210,41 @@ int bch2_inum_opts_get(struct btree_trans *trans, subvol_inum inum, struct bch_i return 0; } +int bch2_inode_set_casefold(struct btree_trans *trans, subvol_inum inum, + struct bch_inode_unpacked *bi, unsigned v) +{ + struct bch_fs *c = trans->c; + +#ifdef CONFIG_UNICODE + int ret = 0; + /* Not supported on individual files. */ + if (!S_ISDIR(bi->bi_mode)) + return -EOPNOTSUPP; + + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + ret = bch2_empty_dir_trans(trans, inum); + if (ret < 0) + return ret; + + ret = bch2_request_incompat_feature(c, bcachefs_metadata_version_casefolding); + if (ret) + return ret; + + bch2_check_set_feature(c, BCH_FEATURE_casefolding); + + bi->bi_casefold = v + 1; + bi->bi_fields_set |= BIT(Inode_opt_casefold); + + return 0; +#else + bch_err(c, "Cannot use casefolding on a kernel without CONFIG_UNICODE"); + return -EOPNOTSUPP; +#endif +} + static noinline int __bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) { struct bch_fs *c = trans->c; @@ -1306,10 +1347,8 @@ int bch2_inode_rm_snapshot(struct btree_trans *trans, u64 inum, u32 snapshot) delete_ancestor_snapshot_inodes(trans, SPOS(0, inum, snapshot)); } -static int may_delete_deleted_inode(struct btree_trans *trans, - struct btree_iter *iter, - struct bpos pos, - bool *need_another_pass) +static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, + bool from_deleted_inodes) { struct bch_fs *c = trans->c; struct btree_iter inode_iter; @@ -1324,11 +1363,13 @@ static int may_delete_deleted_inode(struct btree_trans *trans, return ret; ret = bkey_is_inode(k.k) ? 0 : -BCH_ERR_ENOENT_inode; - if (fsck_err_on(!bkey_is_inode(k.k), + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_missing, "nonexistent inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; ret = bch2_inode_unpack(k, &inode); if (ret) @@ -1336,7 +1377,8 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (S_ISDIR(inode.bi_mode)) { ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot); - if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY), + if (fsck_err_on(from_deleted_inodes && + bch2_err_matches(ret, ENOTEMPTY), trans, deleted_inode_is_dir, "non empty directory %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) @@ -1345,17 +1387,25 @@ static int may_delete_deleted_inode(struct btree_trans *trans, goto out; } - if (fsck_err_on(!(inode.bi_flags & BCH_INODE_unlinked), + ret = inode.bi_flags & BCH_INODE_unlinked ? 0 : -BCH_ERR_inode_not_unlinked; + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_not_unlinked, "non-deleted inode %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; - if (fsck_err_on(inode.bi_flags & BCH_INODE_has_child_snapshot, + ret = !(inode.bi_flags & BCH_INODE_has_child_snapshot) + ? 0 : -BCH_ERR_inode_has_child_snapshot; + + if (fsck_err_on(from_deleted_inodes && ret, trans, deleted_inode_has_child_snapshots, "inode with child snapshots %llu:%u in deleted_inodes btree", pos.offset, pos.snapshot)) goto delete; + if (ret) + goto out; ret = bch2_inode_has_child_snapshots(trans, k.k->p); if (ret < 0) @@ -1372,19 +1422,28 @@ static int may_delete_deleted_inode(struct btree_trans *trans, if (ret) goto out; } + + if (!from_deleted_inodes) { + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_inode_has_child_snapshot; + goto out; + } + goto delete; } - if (test_bit(BCH_FS_clean_recovery, &c->flags) && - !fsck_err(trans, deleted_inode_but_clean, - "filesystem marked as clean but have deleted inode %llu:%u", - pos.offset, pos.snapshot)) { - ret = 0; - goto out; - } + if (from_deleted_inodes) { + if (test_bit(BCH_FS_clean_recovery, &c->flags) && + !fsck_err(trans, deleted_inode_but_clean, + "filesystem marked as clean but have deleted inode %llu:%u", + pos.offset, pos.snapshot)) { + ret = 0; + goto out; + } - ret = 1; + ret = 1; + } out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); @@ -1395,12 +1454,19 @@ static int may_delete_deleted_inode(struct btree_trans *trans, goto out; } +static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum) +{ + u32 snapshot; + + return bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?: + may_delete_deleted_inode(trans, SPOS(0, inum.inum, snapshot), false); +} + int bch2_delete_dead_inodes(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); - bool need_another_pass; int ret; -again: + /* * if we ran check_inodes() unlinked inodes will have already been * cleaned up but the write buffer will be out of sync; therefore we @@ -1410,8 +1476,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c) if (ret) goto err; - need_another_pass = false; - /* * Weird transaction restart handling here because on successful delete, * bch2_inode_rm_snapshot() will return a nested transaction restart, @@ -1421,7 +1485,7 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); + ret = may_delete_deleted_inode(trans, k.k->p, true); if (ret > 0) { bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); @@ -1442,9 +1506,6 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret; })); - - if (!ret && need_another_pass) - goto again; err: bch2_trans_put(trans); return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f82cfbf460d094..5cfba9e98966d5 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -243,6 +243,14 @@ static inline unsigned bkey_inode_mode(struct bkey_s_c k) } } +static inline bool bch2_inode_casefold(struct bch_fs *c, const struct bch_inode_unpacked *bi) +{ + /* inode apts are stored with a +1 bias: 0 means "unset, use fs opt" */ + return bi->bi_casefold + ? bi->bi_casefold - 1 + : c->opts.casefold; +} + /* i_nlink: */ static inline unsigned nlink_bias(umode_t mode) @@ -284,7 +292,9 @@ static inline bool bch2_inode_should_have_single_bp(struct bch_inode_unpacked *i struct bch_opts bch2_inode_opts_to_opts(struct bch_inode_unpacked *); void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); -int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); +int bch2_inum_opts_get(struct btree_trans *, subvol_inum, struct bch_io_opts *); +int bch2_inode_set_casefold(struct btree_trans *, subvol_inum, + struct bch_inode_unpacked *, unsigned); #include "rebalance.h" diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h index 117110af1e3f2a..87e193e8ed2543 100644 --- a/fs/bcachefs/inode_format.h +++ b/fs/bcachefs/inode_format.h @@ -103,7 +103,8 @@ struct bch_inode_generation { x(bi_parent_subvol, 32) \ x(bi_nocow, 8) \ x(bi_depth, 32) \ - x(bi_inodes_32bit, 8) + x(bi_inodes_32bit, 8) \ + x(bi_casefold, 8) /* subset of BCH_INODE_FIELDS */ #define BCH_INODE_OPTS() \ @@ -117,7 +118,8 @@ struct bch_inode_generation { x(background_target, 16) \ x(erasure_code, 16) \ x(nocow, 8) \ - x(inodes_32bit, 8) + x(inodes_32bit, 8) \ + x(casefold, 8) enum inode_opt_id { #define x(name, ...) \ @@ -137,8 +139,7 @@ enum inode_opt_id { x(i_sectors_dirty, 6) \ x(unlinked, 7) \ x(backptr_untrusted, 8) \ - x(has_child_snapshot, 9) \ - x(casefolded, 10) + x(has_child_snapshot, 9) /* bits 20+ reserved for packed fields below: */ diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 417bb0c7bbfa65..def4a26a3b4590 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -487,6 +487,8 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; + int orig_error = rbio->ret; + struct btree_trans *trans = bch2_trans_get(c); trace_io_read_retry(&rbio->bio); @@ -519,7 +521,9 @@ static void bch2_rbio_retry(struct work_struct *work) if (ret) { rbio->ret = ret; rbio->bio.bi_status = BLK_STS_IOERR; - } else { + } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace && + orig_error != -BCH_ERR_data_read_ptr_stale_race && + !failed.nr) { struct printbuf buf = PRINTBUF; lockrestart_do(trans, @@ -977,7 +981,8 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto err; } - if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { + if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && + !c->chacha20_key_set) { struct printbuf buf = PRINTBUF; bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); @@ -1344,14 +1349,16 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, bch2_trans_iter_exit(trans, &iter); - if (ret) { - struct printbuf buf = PRINTBUF; - lockrestart_do(trans, - bch2_inum_offset_err_msg_trans(trans, &buf, inum, - bvec_iter.bi_sector << 9)); - prt_printf(&buf, "read error: %s", bch2_err_str(ret)); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); + if (unlikely(ret)) { + if (ret != -BCH_ERR_extent_poisoned) { + struct printbuf buf = PRINTBUF; + lockrestart_do(trans, + bch2_inum_offset_err_msg_trans(trans, &buf, inum, + bvec_iter.bi_sector << 9)); + prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } rbio->bio.bi_status = BLK_STS_IOERR; rbio->ret = ret; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index a418fa62f09d23..c1237da079ede2 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -255,6 +255,27 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, } if (i_sectors_delta) { + s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); + if (unlikely(bi_sectors + i_sectors_delta < 0)) { + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", + extent_iter->pos.inode, bi_sectors, i_sectors_delta); + + bool repeat = false, print = false, suppress = false; + bch2_count_fsck_err(c, inode_i_sectors_underflow, buf.buf, + &repeat, &print, &suppress); + if (print) + bch2_print_str(c, buf.buf); + printbuf_exit(&buf); + + if (i_sectors_delta < 0) + i_sectors_delta = -bi_sectors; + else + i_sectors_delta = 0; + } + le64_add_cpu(&inode->v.bi_sectors, i_sectors_delta); inode_update_flags = 0; } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index d8f74b6d0a75a8..bb45d363419488 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -281,7 +281,24 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; - BUG_ON(sectors > buf->sectors); + if (unlikely(sectors > buf->sectors)) { + struct printbuf err = PRINTBUF; + err.atomic++; + + prt_printf(&err, "journal entry overran reserved space: %u > %u\n", + sectors, buf->sectors); + prt_printf(&err, "buf u64s %u u64s reserved %u cur_entry_u64s %u block_bits %u\n", + le32_to_cpu(buf->data->u64s), buf->u64s_reserved, + j->cur_entry_u64s, + c->block_bits); + prt_printf(&err, "fatal error - emergency read only"); + bch2_journal_halt_locked(j); + + bch_err(c, "%s", err.buf); + printbuf_exit(&err); + return; + } + buf->sectors = sectors; /* @@ -1462,8 +1479,6 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); - - set_bit(JOURNAL_running, &j->flags); j->last_flush_write = jiffies; j->reservations.idx = journal_cur_seq(j); @@ -1474,6 +1489,21 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) return 0; } +void bch2_journal_set_replay_done(struct journal *j) +{ + /* + * journal_space_available must happen before setting JOURNAL_running + * JOURNAL_running must happen before JOURNAL_replay_done + */ + spin_lock(&j->lock); + bch2_journal_space_available(j); + + set_bit(JOURNAL_need_flush_write, &j->flags); + set_bit(JOURNAL_running, &j->flags); + set_bit(JOURNAL_replay_done, &j->flags); + spin_unlock(&j->lock); +} + /* init/exit: */ void bch2_dev_journal_exit(struct bch_dev *ca) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 47828771f9c239..641e20c05a147d 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -437,12 +437,6 @@ static inline int bch2_journal_error(struct journal *j) struct bch_dev; -static inline void bch2_journal_set_replay_done(struct journal *j) -{ - BUG_ON(!test_bit(JOURNAL_running, &j->flags)); - set_bit(JOURNAL_replay_done, &j->flags); -} - void bch2_journal_unblock(struct journal *); void bch2_journal_block(struct journal *); struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u64, bool *); @@ -459,6 +453,7 @@ void bch2_dev_journal_stop(struct journal *, struct bch_dev *); void bch2_fs_journal_stop(struct journal *); int bch2_fs_journal_start(struct journal *, u64); +void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 1b7961f4f609e3..ded18a94ed021e 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -19,6 +19,7 @@ #include #include +#include void bch2_journal_pos_from_member_info_set(struct bch_fs *c) { @@ -1262,7 +1263,8 @@ int bch2_journal_read(struct bch_fs *c, degraded = true; } - closure_sync(&jlist.cl); + while (closure_sync_timeout(&jlist.cl, sysctl_hung_task_timeout_secs * HZ / 2)) + ; if (jlist.ret) return jlist.ret; @@ -1460,7 +1462,7 @@ int bch2_journal_read(struct bch_fs *c, static void journal_advance_devs_to_next_bucket(struct journal *j, struct dev_alloc_list *devs, - unsigned sectors, u64 seq) + unsigned sectors, __le64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -1782,7 +1784,7 @@ static CLOSURE_CALLBACK(journal_write_submit) struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE); if (!ca) { /* XXX: fix this */ - bch_err(c, "missing device for journal write\n"); + bch_err(c, "missing device %u for journal write", ptr->dev); continue; } diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 5d1547aa118ac8..cc00b0fc40d8e3 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -17,6 +17,8 @@ #include #include +static bool __should_discard_bucket(struct journal *, struct journal_device *); + /* Free space calculations: */ static unsigned journal_space_from(struct journal_device *ja, @@ -203,8 +205,7 @@ void bch2_journal_space_available(struct journal *j) ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk) ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr; - if (ja->discard_idx != ja->dirty_idx_ondisk) - can_discard = true; + can_discard |= __should_discard_bucket(j, ja); max_entry_size = min_t(unsigned, max_entry_size, ca->mi.bucket_size); nr_online++; @@ -252,7 +253,10 @@ void bch2_journal_space_available(struct journal *j) bch2_journal_set_watermark(j); out: - j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0; + j->cur_entry_sectors = !ret + ? round_down(j->space[journal_space_discarded].next_entry, + block_sectors(c)) + : 0; j->cur_entry_error = ret; if (!ret) @@ -261,12 +265,19 @@ void bch2_journal_space_available(struct journal *j) /* Discards - last part of journal reclaim: */ -static bool should_discard_bucket(struct journal *j, struct journal_device *ja) +static bool __should_discard_bucket(struct journal *j, struct journal_device *ja) { - bool ret; + unsigned min_free = max(4, ja->nr / 8); + + return bch2_journal_dev_buckets_available(j, ja, journal_space_discarded) < + min_free && + ja->discard_idx != ja->dirty_idx_ondisk; +} +static bool should_discard_bucket(struct journal *j, struct journal_device *ja) +{ spin_lock(&j->lock); - ret = ja->discard_idx != ja->dirty_idx_ondisk; + bool ret = __should_discard_bucket(j, ja); spin_unlock(&j->lock); return ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index fc396b9fa75451..dfdbb9259985b6 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -784,7 +784,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, goto err; ret = bch2_btree_write_buffer_tryflush(trans); - bch_err_msg(c, ret, "flushing btree write buffer"); + if (!bch2_err_matches(ret, EROFS)) + bch_err_msg(c, ret, "flushing btree write buffer"); if (ret) goto err; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 159410c50861b7..96873372b51600 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_move_stats_init(&move_stats, "copygc"); bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats, writepoint_ptr(&c->copygc_write_point), diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h index ea181fef5bc932..d1885cf67a4574 100644 --- a/fs/bcachefs/movinggc.h +++ b/fs/bcachefs/movinggc.h @@ -5,6 +5,15 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *); void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *); +static inline void bch2_copygc_wakeup(struct bch_fs *c) +{ + rcu_read_lock(); + struct task_struct *p = rcu_dereference(c->copygc_thread); + if (p) + wake_up_process(p); + rcu_read_unlock(); +} + void bch2_copygc_stop(struct bch_fs *); int bch2_copygc_start(struct bch_fs *); void bch2_fs_copygc_init(struct bch_fs *); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 0d65ea96f7a297..413fb60cff434b 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -47,10 +47,6 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; - /* Inherit casefold state from parent. */ - if (S_ISDIR(mode)) - new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded; - if (!(flags & BCH_CREATE_SNAPSHOT)) { /* Normal create path - allocate a new inode: */ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); @@ -162,7 +158,6 @@ int bch2_create_trans(struct btree_trans *trans, name, dir_target, &dir_offset, - &dir_u->bi_size, STR_HASH_must_create|BTREE_ITER_with_updates) ?: bch2_inode_write(trans, &dir_iter, dir_u); if (ret) @@ -229,7 +224,6 @@ int bch2_link_trans(struct btree_trans *trans, mode_to_type(inode_u->bi_mode), name, inum.inum, &dir_offset, - &dir_u->bi_size, STR_HASH_must_create); if (ret) goto err; @@ -347,6 +341,9 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u, bool ret = false; for (id = 0; id < Inode_opt_nr; id++) { + if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold) + continue; + /* Skip attributes that were explicitly set on this inode */ if (dst_u->bi_fields_set & (1 << id)) continue; @@ -421,8 +418,8 @@ int bch2_rename_trans(struct btree_trans *trans, } ret = bch2_dirent_rename(trans, - src_dir, &src_hash, &src_dir_u->bi_size, - dst_dir, &dst_hash, &dst_dir_u->bi_size, + src_dir, &src_hash, + dst_dir, &dst_hash, src_name, &src_inum, &src_offset, dst_name, &dst_inum, &dst_offset, mode); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 4d06313076ff65..dfb14810124c7c 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -228,6 +228,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_ERASURE_CODE, false, \ NULL, "Enable erasure coding (DO NOT USE YET)") \ + x(casefold, u8, \ + OPT_FS|OPT_INODE|OPT_FORMAT, \ + OPT_BOOL(), \ + BCH_SB_CASEFOLD, false, \ + NULL, "Dirent lookups are casefolded") \ x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index c63fa53f30d219..623273556aa978 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -262,7 +262,7 @@ int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_set_rebalance_needs_scan_trans(trans, inum)); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -309,7 +309,7 @@ static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { - if (!bch2_bkey_rebalance_opts(k)) + if (k.k->type == KEY_TYPE_reflink_v || !bch2_bkey_rebalance_opts(k)) return 0; struct bkey_i *n = bch2_bkey_make_mut(trans, iter, &k, 0); @@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg) set_freezable(); + /* + * Data move operations can't run until after check_snapshots has + * completed, and bch2_snapshot_is_ancestor() is available. + */ + kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots || + kthread_should_stop()); + bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats, writepoint_ptr(&c->rebalance_write_point), true); @@ -664,7 +671,7 @@ void bch2_rebalance_stop(struct bch_fs *c) c->rebalance.thread = NULL; if (p) { - /* for sychronizing with rebalance_wakeup() */ + /* for sychronizing with bch2_rebalance_wakeup() */ synchronize_rcu(); kthread_stop(p); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 62a3859d3823f3..e5e8eb4a2dd150 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -37,7 +37,7 @@ int bch2_set_rebalance_needs_scan_trans(struct btree_trans *, u64); int bch2_set_rebalance_needs_scan(struct bch_fs *, u64 inum); int bch2_set_fs_needs_rebalance(struct bch_fs *); -static inline void rebalance_wakeup(struct bch_fs *c) +static inline void bch2_rebalance_wakeup(struct bch_fs *c) { struct task_struct *p; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 79fd18a5a07c43..d6c4ef819d40ac 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -18,6 +18,7 @@ #include "journal_seq_blacklist.h" #include "logged_ops.h" #include "move.h" +#include "movinggc.h" #include "namei.h" #include "quota.h" #include "rebalance.h" @@ -389,9 +390,9 @@ int bch2_journal_replay(struct bch_fs *c) * Now, replay any remaining keys in the order in which they appear in * the journal, unpinning those journal entries as we go: */ - sort(keys_sorted.data, keys_sorted.nr, - sizeof(keys_sorted.data[0]), - journal_sort_seq_cmp, NULL); + sort_nonatomic(keys_sorted.data, keys_sorted.nr, + sizeof(keys_sorted.data[0]), + journal_sort_seq_cmp, NULL); darray_for_each(keys_sorted, kp) { cond_resched(); @@ -1125,14 +1126,17 @@ int bch2_fs_initialize(struct bch_fs *c) * journal_res_get() will crash if called before this has * set up the journal.pin FIFO and journal.cur pointer: */ - bch2_fs_journal_start(&c->journal, 1); - set_bit(BCH_FS_accounting_replay_done, &c->flags); - bch2_journal_set_replay_done(&c->journal); + ret = bch2_fs_journal_start(&c->journal, 1); + if (ret) + goto err; ret = bch2_fs_read_write_early(c); if (ret) goto err; + set_bit(BCH_FS_accounting_replay_done, &c->flags); + bch2_journal_set_replay_done(&c->journal); + for_each_member_device(c, ca) { ret = bch2_dev_usage_init(ca, false); if (ret) { @@ -1191,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c) c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1; + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + if (enabled_qtypes(c)) { ret = bch2_fs_quota_read(c); if (ret) diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 593ff142530dfb..22f72bb5b8536b 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -12,6 +12,7 @@ #include "journal.h" #include "lru.h" #include "logged_ops.h" +#include "movinggc.h" #include "rebalance.h" #include "recovery.h" #include "recovery_passes.h" @@ -262,49 +263,52 @@ int bch2_run_recovery_passes(struct bch_fs *c) */ c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; - while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { - c->next_recovery_pass = c->curr_recovery_pass + 1; + spin_lock_irq(&c->recovery_pass_lock); - spin_lock_irq(&c->recovery_pass_lock); + while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) { + unsigned prev_done = c->recovery_pass_done; unsigned pass = c->curr_recovery_pass; + c->next_recovery_pass = pass + 1; + if (c->opts.recovery_pass_last && - c->curr_recovery_pass > c->opts.recovery_pass_last) { - spin_unlock_irq(&c->recovery_pass_lock); + c->curr_recovery_pass > c->opts.recovery_pass_last) break; - } - if (!should_run_recovery_pass(c, pass)) { - c->curr_recovery_pass++; - c->recovery_pass_done = max(c->recovery_pass_done, pass); + if (should_run_recovery_pass(c, pass)) { spin_unlock_irq(&c->recovery_pass_lock); - continue; - } - spin_unlock_irq(&c->recovery_pass_lock); - - ret = bch2_run_recovery_pass(c, pass) ?: - bch2_journal_flush(&c->journal); - - if (!ret && !test_bit(BCH_FS_error, &c->flags)) - bch2_clear_recovery_pass_required(c, pass); - - spin_lock_irq(&c->recovery_pass_lock); - if (c->next_recovery_pass < c->curr_recovery_pass) { - /* - * bch2_run_explicit_recovery_pass() was called: we - * can't always catch -BCH_ERR_restart_recovery because - * it may have been called from another thread (btree - * node read completion) - */ - ret = 0; - c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); - } else { - c->recovery_passes_complete |= BIT_ULL(pass); - c->recovery_pass_done = max(c->recovery_pass_done, pass); + ret = bch2_run_recovery_pass(c, pass) ?: + bch2_journal_flush(&c->journal); + + if (!ret && !test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, pass); + spin_lock_irq(&c->recovery_pass_lock); + + if (c->next_recovery_pass < c->curr_recovery_pass) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ + ret = 0; + c->recovery_passes_complete &= ~(~0ULL << c->curr_recovery_pass); + } else { + c->recovery_passes_complete |= BIT_ULL(pass); + c->recovery_pass_done = max(c->recovery_pass_done, pass); + } } + c->curr_recovery_pass = c->next_recovery_pass; - spin_unlock_irq(&c->recovery_pass_lock); + + if (prev_done <= BCH_RECOVERY_PASS_check_snapshots && + c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) { + bch2_copygc_wakeup(c); + bch2_rebalance_wakeup(c); + } } + spin_unlock_irq(&c->recovery_pass_lock); + return ret; } diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index acb5d845841e53..badd0e17ada5a0 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -20,6 +20,10 @@ * x(version, recovery_passes, errors...) */ #define UPGRADE_TABLE() \ + x(snapshot_2, \ + RECOVERY_PASS_ALL_FSCK, \ + BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ + BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ x(backpointers, \ RECOVERY_PASS_ALL_FSCK) \ x(inode_v3, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 5d43e3504386ad..9387f6092fe989 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -46,7 +46,7 @@ enum bch_fsck_flags { x(btree_node_unsupported_version, 34, 0) \ x(btree_node_bset_older_than_sb_min, 35, 0) \ x(btree_node_bset_newer_than_sb, 36, 0) \ - x(btree_node_data_missing, 37, 0) \ + x(btree_node_data_missing, 37, FSCK_AUTOFIX) \ x(btree_node_bset_after_end, 38, 0) \ x(btree_node_replicas_sectors_written_mismatch, 39, 0) \ x(btree_node_replicas_data_mismatch, 40, 0) \ @@ -205,10 +205,11 @@ enum bch_fsck_flags { x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ - x(subvol_not_master_and_not_snapshot, 187, 0) \ + x(subvol_not_master_and_not_snapshot, 187, FSCK_AUTOFIX) \ x(subvol_to_missing_root, 188, 0) \ - x(subvol_root_wrong_bi_subvol, 189, 0) \ + x(subvol_root_wrong_bi_subvol, 189, FSCK_AUTOFIX) \ x(bkey_in_missing_snapshot, 190, 0) \ + x(bkey_in_deleted_snapshot, 315, 0) \ x(inode_pos_inode_nonzero, 191, 0) \ x(inode_pos_blockdev_range, 192, 0) \ x(inode_alloc_cursor_inode_bad, 301, 0) \ @@ -216,6 +217,7 @@ enum bch_fsck_flags { x(inode_str_hash_invalid, 194, 0) \ x(inode_v3_fields_start_bad, 195, 0) \ x(inode_snapshot_mismatch, 196, 0) \ + x(snapshot_key_missing_inode_snapshot, 314, 0) \ x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ @@ -230,12 +232,19 @@ enum bch_fsck_flags { x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ + x(inode_dir_has_nonzero_i_size, 319, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ x(inode_has_child_snapshots_wrong, 287, 0) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ + x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ + x(inode_has_case_insensitive_not_set, 316, FSCK_AUTOFIX) \ + x(inode_parent_has_case_insensitive_not_set, 317, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_underflow, 311, FSCK_AUTOFIX) \ + x(vfs_inode_i_blocks_not_zero_at_truncate, 313, FSCK_AUTOFIX) \ + x(vfs_bad_inode_rm, 320, 0) \ x(deleted_inode_but_clean, 211, FSCK_AUTOFIX) \ x(deleted_inode_missing, 212, FSCK_AUTOFIX) \ x(deleted_inode_is_dir, 213, FSCK_AUTOFIX) \ @@ -259,6 +268,7 @@ enum bch_fsck_flags { x(dirent_to_overwritten_inode, 302, 0) \ x(dirent_to_missing_subvol, 230, 0) \ x(dirent_to_itself, 231, 0) \ + x(dirent_casefold_mismatch, 318, FSCK_AUTOFIX) \ x(quota_type_invalid, 232, 0) \ x(xattr_val_size_too_small, 233, 0) \ x(xattr_val_size_too_big, 234, 0) \ @@ -290,14 +300,15 @@ enum bch_fsck_flags { x(btree_node_bkey_bad_u64s, 260, 0) \ x(btree_node_topology_empty_interior_node, 261, 0) \ x(btree_ptr_v2_min_key_bad, 262, 0) \ - x(btree_root_unreadable_and_scan_found_nothing, 263, 0) \ - x(snapshot_node_missing, 264, 0) \ + x(btree_root_unreadable_and_scan_found_nothing, 263, FSCK_AUTOFIX) \ + x(snapshot_node_missing, 264, FSCK_AUTOFIX) \ x(dup_backpointer_to_bad_csum_extent, 265, 0) \ x(btree_bitmap_not_marked, 266, FSCK_AUTOFIX) \ x(sb_clean_entry_overrun, 267, 0) \ x(btree_ptr_v2_written_0, 268, 0) \ x(subvol_snapshot_bad, 269, 0) \ x(subvol_inode_bad, 270, 0) \ + x(subvol_missing, 308, FSCK_AUTOFIX) \ x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \ x(accounting_mismatch, 272, FSCK_AUTOFIX) \ x(accounting_replicas_not_marked, 273, 0) \ @@ -317,7 +328,9 @@ enum bch_fsck_flags { x(directory_size_mismatch, 303, FSCK_AUTOFIX) \ x(dirent_cf_name_too_big, 304, 0) \ x(dirent_stray_data_after_cf_name, 305, 0) \ - x(MAX, 308, 0) + x(rebalance_work_incorrectly_set, 309, FSCK_AUTOFIX) \ + x(rebalance_work_incorrectly_unset, 310, FSCK_AUTOFIX) \ + x(MAX, 321, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 116131f95815f3..72779912939b6f 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -15,9 +15,11 @@ void bch2_dev_missing(struct bch_fs *c, unsigned dev) bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); } -void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) +void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) { - bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); + bch2_fs_inconsistent(ca->fs, + "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)", + bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets); } #define x(t, n, ...) [n] = #t, diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 06bb41a3f3605b..42786657522ce1 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -249,20 +249,23 @@ static inline struct bch_dev *bch2_dev_tryget(struct bch_fs *c, unsigned dev) static inline struct bch_dev *bch2_dev_bucket_tryget_noerror(struct bch_fs *c, struct bpos bucket) { struct bch_dev *ca = bch2_dev_tryget_noerror(c, bucket.inode); - if (ca && !bucket_valid(ca, bucket.offset)) { + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { bch2_dev_put(ca); ca = NULL; } return ca; } -void bch2_dev_bucket_missing(struct bch_fs *, struct bpos); +void bch2_dev_bucket_missing(struct bch_dev *, u64); static inline struct bch_dev *bch2_dev_bucket_tryget(struct bch_fs *c, struct bpos bucket) { - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, bucket); - if (!ca) - bch2_dev_bucket_missing(c, bucket); + struct bch_dev *ca = bch2_dev_tryget(c, bucket.inode); + if (ca && unlikely(!bucket_valid(ca, bucket.offset))) { + bch2_dev_bucket_missing(ca, bucket.offset); + bch2_dev_put(ca); + ca = NULL; + } return ca; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index b7de29aed83917..fec569c7deb1c7 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -396,7 +396,7 @@ u32 bch2_snapshot_tree_oldest_subvol(struct bch_fs *c, u32 snapshot_root) u32 subvol = 0, s; rcu_read_lock(); - while (id) { + while (id && bch2_snapshot_exists(c, id)) { s = snapshot_t(c, id)->subvol; if (s && (!subvol || s < subvol)) diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 09a354a26c3bd0..0c1a00539bd14a 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -33,7 +33,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u8 type; - struct unicode_map *cf_encoding; + struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of * the siphash_key (k0) is used as the key. @@ -44,11 +44,10 @@ struct bch_hash_info { static inline struct bch_hash_info bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) { - /* XXX ick */ struct bch_hash_info info = { .type = INODE_STR_HASH(bi), #ifdef CONFIG_UNICODE - .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL, + .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif .siphash_key = { .k0 = bi->bi_hash_seed } }; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 5537283d0beaeb..bc6009a7128443 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -6,6 +6,8 @@ #include "errcode.h" #include "error.h" #include "fs.h" +#include "inode.h" +#include "recovery_passes.h" #include "snapshot.h" #include "subvolume.h" @@ -44,8 +46,8 @@ static int check_subvol(struct btree_trans *trans, ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) - bch_err(c, "subvolume %llu points to nonexistent snapshot %u", - k.k->p.offset, snapid); + return bch2_run_explicit_recovery_pass(c, + BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; if (ret) return ret; @@ -112,10 +114,20 @@ static int check_subvol(struct btree_trans *trans, "subvolume %llu points to missing subvolume root %llu:%u", k.k->p.offset, le64_to_cpu(subvol.v->inode), le32_to_cpu(subvol.v->snapshot))) { - ret = bch2_subvolume_delete(trans, iter->pos.offset); - bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); - ret = ret ?: -BCH_ERR_transaction_restart_nested; - goto err; + /* + * Recreate - any contents that are still disconnected + * will then get reattached under lost+found + */ + bch2_inode_init_early(c, &inode); + bch2_inode_init_late(&inode, bch2_current_time(c), + 0, 0, S_IFDIR|0700, 0, NULL); + inode.bi_inum = le64_to_cpu(subvol.v->inode); + inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + inode.bi_subvol = k.k->p.offset; + inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent); + ret = __bch2_fsck_write_inode(trans, &inode); + if (ret) + goto err; } } else { goto err; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index e27422b6d9c6a9..cb5d960aed92a6 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -73,14 +73,30 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v ? 0 : -BCH_ERR_may_not_use_incompat_feature; + mutex_lock(&c->sb_lock); if (!ret) { - mutex_lock(&c->sb_lock); SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); + } else { + darray_for_each(c->incompat_versions_requested, i) + if (version == *i) + goto out; + + darray_push(&c->incompat_versions_requested, version); + struct printbuf buf = PRINTBUF; + prt_str(&buf, "requested incompat feature "); + bch2_version_to_text(&buf, version); + prt_str(&buf, " currently not enabled"); + prt_printf(&buf, "\n set version_upgrade=incompat to enable"); + + bch_notice(c, "%s", buf.buf); + printbuf_exit(&buf); } +out: + mutex_unlock(&c->sb_lock); + return ret; } @@ -1086,7 +1102,8 @@ int bch2_write_super(struct bch_fs *c) prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); printbuf_exit(&buf); - return -BCH_ERR_sb_not_downgraded; + ret = -BCH_ERR_sb_not_downgraded; + goto out; } darray_for_each(online_devices, ca) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index a58edde43bee3b..84a37d971ffdae 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -70,14 +70,10 @@ #include #include #include -#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kent Overstreet "); MODULE_DESCRIPTION("bcachefs filesystem"); -MODULE_SOFTDEP("pre: chacha20"); -MODULE_SOFTDEP("pre: poly1305"); -MODULE_SOFTDEP("pre: xxhash"); const char * const bch2_fs_flag_strs[] = { #define x(n) #n, @@ -381,6 +377,11 @@ void bch2_fs_read_only(struct bch_fs *c) bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); } else { + /* Make sure error counts/counters are persisted */ + mutex_lock(&c->sb_lock); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + bch_verbose(c, "done going read-only, filesystem not clean"); } } @@ -422,32 +423,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) return ret; } -static int bch2_fs_read_write_late(struct bch_fs *c) -{ - int ret; - - /* - * Data move operations can't run until after check_snapshots has - * completed, and bch2_snapshot_is_ancestor() is available. - * - * Ideally we'd start copygc/rebalance earlier instead of waiting for - * all of recovery/fsck to complete: - */ - ret = bch2_copygc_start(c); - if (ret) { - bch_err(c, "error starting copygc thread"); - return ret; - } - - ret = bch2_rebalance_start(c); - if (ret) { - bch_err(c, "error starting rebalance thread"); - return ret; - } - - return 0; -} - static int __bch2_fs_read_write(struct bch_fs *c, bool early) { int ret; @@ -470,29 +445,28 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) clear_bit(BCH_FS_clean_shutdown, &c->flags); + __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { + bch2_dev_allocator_add(c, ca); + percpu_ref_reinit(&ca->io_ref[WRITE]); + } + bch2_recalc_capacity(c); + /* * First journal write must be a flush write: after a clean shutdown we * don't read the journal, so the first journal write may end up * overwriting whatever was there previously, and there must always be * at least one non-flush write in the journal or recovery will fail: */ + spin_lock(&c->journal.lock); set_bit(JOURNAL_need_flush_write, &c->journal.flags); set_bit(JOURNAL_running, &c->journal.flags); - - __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) { - bch2_dev_allocator_add(c, ca); - percpu_ref_reinit(&ca->io_ref[WRITE]); - } - bch2_recalc_capacity(c); + bch2_journal_space_available(&c->journal); + spin_unlock(&c->journal.lock); ret = bch2_fs_mark_dirty(c); if (ret) goto err; - spin_lock(&c->journal.lock); - bch2_journal_space_available(&c->journal); - spin_unlock(&c->journal.lock); - ret = bch2_journal_reclaim_start(&c->journal); if (ret) goto err; @@ -508,10 +482,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) atomic_long_inc(&c->writes[i]); } #endif - if (!early) { - ret = bch2_fs_read_write_late(c); - if (ret) - goto err; + + ret = bch2_copygc_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting copygc thread"); + goto err; + } + + ret = bch2_rebalance_start(c); + if (ret) { + bch_err_msg(c, ret, "error starting rebalance thread"); + goto err; } bch2_do_discards(c); @@ -555,8 +536,13 @@ static void __bch2_fs_free(struct bch_fs *c) for (unsigned i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_exit(&c->times[i]); +#ifdef CONFIG_UNICODE + utf8_unload(c->cf_encoding); +#endif + bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_free_fsck_errs(c); bch2_fs_accounting_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); @@ -593,6 +579,7 @@ static void __bch2_fs_free(struct bch_fs *c) free_percpu(c->online_reserved); } + darray_exit(&c->incompat_versions_requested); darray_exit(&c->btree_roots_extra); free_percpu(c->pcpu); free_percpu(c->usage); @@ -845,25 +832,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; -#ifdef CONFIG_UNICODE - /* Default encoding until we can potentially have more as an option. */ - c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); - if (IS_ERR(c->cf_encoding)) { - printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", - unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), - unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); - ret = -EINVAL; - goto err; - } -#else - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif - pr_uuid(&name, c->sb.user_uuid.b); ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0; if (ret) @@ -963,6 +931,29 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; +#ifdef CONFIG_UNICODE + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } + bch_info(c, "Using encoding defined by superblock: utf8-%u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif + for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; @@ -1002,12 +993,6 @@ static void print_mount_opts(struct bch_fs *c) prt_str(&p, "starting version "); bch2_version_to_text(&p, c->sb.version); - if (c->opts.read_only) { - prt_str(&p, " opts="); - first = false; - prt_printf(&p, "ro"); - } - for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); @@ -1023,10 +1008,49 @@ static void print_mount_opts(struct bch_fs *c) bch2_opt_to_text(&p, c, c->disk_sb.sb, opt, v, OPT_SHOW_MOUNT_STYLE); } + if (c->sb.version_incompat_allowed != c->sb.version) { + prt_printf(&p, "\n allowing incompatible features above "); + bch2_version_to_text(&p, c->sb.version_incompat_allowed); + } + bch_info(c, "%s", p.buf); printbuf_exit(&p); } +static bool bch2_fs_may_start(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i, flags = 0; + + if (c->opts.very_degraded) + flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; + + if (c->opts.degraded) + flags |= BCH_FORCE_IF_DEGRADED; + + if (!c->opts.degraded && + !c->opts.very_degraded) { + mutex_lock(&c->sb_lock); + + for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { + if (!bch2_member_exists(c->disk_sb.sb, i)) + continue; + + ca = bch2_dev_locked(c, i); + + if (!bch2_dev_is_online(ca) && + (ca->mi.state == BCH_MEMBER_STATE_rw || + ca->mi.state == BCH_MEMBER_STATE_ro)) { + mutex_unlock(&c->sb_lock); + return false; + } + } + mutex_unlock(&c->sb_lock); + } + + return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); +} + int bch2_fs_start(struct bch_fs *c) { time64_t now = ktime_get_real_seconds(); @@ -1034,6 +1058,9 @@ int bch2_fs_start(struct bch_fs *c) print_mount_opts(c); + if (!bch2_fs_may_start(c)) + return -BCH_ERR_insufficient_devices_to_start; + down_write(&c->state_lock); mutex_lock(&c->sb_lock); @@ -1086,13 +1113,10 @@ int bch2_fs_start(struct bch_fs *c) wake_up(&c->ro_ref_wait); down_write(&c->state_lock); - if (c->opts.read_only) { + if (c->opts.read_only) bch2_fs_read_only(c); - } else { - ret = !test_bit(BCH_FS_rw, &c->flags) - ? bch2_fs_read_write(c) - : bch2_fs_read_write_late(c); - } + else if (!test_bit(BCH_FS_rw, &c->flags)) + ret = bch2_fs_read_write(c); up_write(&c->state_lock); err: @@ -1504,7 +1528,7 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) printbuf_exit(&name); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return 0; } @@ -1563,40 +1587,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, } } -static bool bch2_fs_may_start(struct bch_fs *c) -{ - struct bch_dev *ca; - unsigned i, flags = 0; - - if (c->opts.very_degraded) - flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST; - - if (c->opts.degraded) - flags |= BCH_FORCE_IF_DEGRADED; - - if (!c->opts.degraded && - !c->opts.very_degraded) { - mutex_lock(&c->sb_lock); - - for (i = 0; i < c->disk_sb.sb->nr_devices; i++) { - if (!bch2_member_exists(c->disk_sb.sb, i)) - continue; - - ca = bch2_dev_locked(c, i); - - if (!bch2_dev_is_online(ca) && - (ca->mi.state == BCH_MEMBER_STATE_rw || - ca->mi.state == BCH_MEMBER_STATE_ro)) { - mutex_unlock(&c->sb_lock); - return false; - } - } - mutex_unlock(&c->sb_lock); - } - - return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true); -} - static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca) { bch2_dev_io_ref_stop(ca, WRITE); @@ -1650,7 +1640,7 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, if (new_state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); return ret; } @@ -1767,7 +1757,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) up_write(&c->state_lock); return 0; err: - if (ca->mi.state == BCH_MEMBER_STATE_rw && + if (test_bit(BCH_FS_rw, &c->flags) && + ca->mi.state == BCH_MEMBER_STATE_rw && !percpu_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); up_write(&c->state_lock); @@ -2231,11 +2222,6 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, } up_write(&c->state_lock); - if (!bch2_fs_may_start(c)) { - ret = -BCH_ERR_insufficient_devices_to_start; - goto err_print; - } - if (!c->opts.nostart) { ret = bch2_fs_start(c); if (ret) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index e5f003c29369e0..82ee333ddd216c 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -654,11 +654,10 @@ static ssize_t sysfs_opt_store(struct bch_fs *c, bch2_set_rebalance_needs_scan(c, 0); if (v && id == Opt_rebalance_enabled) - rebalance_wakeup(c); + bch2_rebalance_wakeup(c); - if (v && id == Opt_copygc_enabled && - c->copygc_thread) - wake_up_process(c->copygc_thread); + if (v && id == Opt_copygc_enabled) + bch2_copygc_wakeup(c); if (id == Opt_discard && !ca) { mutex_lock(&c->sb_lock); diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index c265b102267ab2..782a05fe7656b5 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -342,6 +342,8 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) */ static int test_peek_end(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; @@ -362,6 +364,8 @@ static int test_peek_end(struct bch_fs *c, u64 nr) static int test_peek_end_extents(struct bch_fs *c, u64 nr) { + delete_test_keys(c); + struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index dea73bc1cb51cb..314a24d15d4e7c 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -455,8 +455,10 @@ ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *stdio, bool nonblocki struct stdio_buf *buf = &stdio->output; unsigned long flags; ssize_t ret; - again: + if (stdio->done) + return -EPIPE; + spin_lock_irqsave(&buf->lock, flags); ret = bch2_darray_vprintf(&buf->buf, GFP_NOWAIT, fmt, args); spin_unlock_irqrestore(&buf->lock, flags); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 6ba5071ab6ddaa..3e52c7f8ddd225 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -739,4 +739,42 @@ static inline void memcpy_swab(void *_dst, void *_src, size_t len) *--dst = *src++; } +#define set_flags(_map, _in, _out) \ +do { \ + unsigned _i; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & (1 << _i)) \ + (_out) |= _map[_i]; \ + else \ + (_out) &= ~_map[_i]; \ +} while (0) + +#define map_flags(_map, _in) \ +({ \ + unsigned _out = 0; \ + \ + set_flags(_map, _in, _out); \ + _out; \ +}) + +#define map_flags_rev(_map, _in) \ +({ \ + unsigned _i, _out = 0; \ + \ + for (_i = 0; _i < ARRAY_SIZE(_map); _i++) \ + if ((_in) & _map[_i]) { \ + (_out) |= 1 << _i; \ + (_in) &= ~_map[_i]; \ + } \ + (_out); \ +}) + +#define map_defined(_map) \ +({ \ + unsigned _in = ~0; \ + \ + map_flags_rev(_map, _in); \ +}) + #endif /* _BCACHEFS_UTIL_H */ diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 651da52b2cbcc0..e6be32003f3b4c 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -473,6 +473,12 @@ static int inode_opt_set_fn(struct btree_trans *trans, { struct inode_opt_set *s = p; + if (s->id == Inode_opt_casefold) { + int ret = bch2_inode_set_casefold(trans, inode_inum(inode), bi, s->v); + if (ret) + return ret; + } + if (s->defined) bi->bi_fields_set |= 1U << s->id; else diff --git a/fs/bcachefs/xattr_format.h b/fs/bcachefs/xattr_format.h index c7916011ef34d3..67426e33d04e56 100644 --- a/fs/bcachefs/xattr_format.h +++ b/fs/bcachefs/xattr_format.h @@ -13,7 +13,13 @@ struct bch_xattr { __u8 x_type; __u8 x_name_len; __le16 x_val_len; - __u8 x_name[] __counted_by(x_name_len); + /* + * x_name contains the name and value counted by + * x_name_len + x_val_len. The introduction of + * __counted_by(x_name_len) caused a false positive + * detection of an out of bounds write. + */ + __u8 x_name[]; } __packed __aligned(8); #endif /* _BCACHEFS_XATTR_FORMAT_H */ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 584fa89bc8776e..4c1ea6b52a53de 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -830,6 +830,7 @@ static int load_elf_binary(struct linux_binprm *bprm) struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; struct elf_phdr *elf_property_phdata = NULL; unsigned long elf_brk; + bool brk_moved = false; int retval, i; unsigned long elf_entry; unsigned long e_entry; @@ -1097,15 +1098,19 @@ static int load_elf_binary(struct linux_binprm *bprm) /* Calculate any requested alignment. */ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); - /* - * There are effectively two types of ET_DYN - * binaries: programs (i.e. PIE: ET_DYN with PT_INTERP) - * and loaders (ET_DYN without PT_INTERP, since they - * _are_ the ELF interpreter). The loaders must - * be loaded away from programs since the program - * may otherwise collide with the loader (especially - * for ET_EXEC which does not have a randomized - * position). For example to handle invocations of + /** + * DOC: PIE handling + * + * There are effectively two types of ET_DYN ELF + * binaries: programs (i.e. PIE: ET_DYN with + * PT_INTERP) and loaders (i.e. static PIE: ET_DYN + * without PT_INTERP, usually the ELF interpreter + * itself). Loaders must be loaded away from programs + * since the program may otherwise collide with the + * loader (especially for ET_EXEC which does not have + * a randomized position). + * + * For example, to handle invocations of * "./ld.so someprog" to test out a new version of * the loader, the subsequent program that the * loader loads must avoid the loader itself, so @@ -1118,6 +1123,9 @@ static int load_elf_binary(struct linux_binprm *bprm) * ELF_ET_DYN_BASE and loaders are loaded into the * independently randomized mmap region (0 load_bias * without MAP_FIXED nor MAP_FIXED_NOREPLACE). + * + * See below for "brk" handling details, which is + * also affected by program vs loader and ASLR. */ if (interpreter) { /* On ET_DYN with PT_INTERP, we do the ASLR. */ @@ -1234,8 +1242,6 @@ static int load_elf_binary(struct linux_binprm *bprm) start_data += load_bias; end_data += load_bias; - current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk); - if (interpreter) { elf_entry = load_elf_interp(interp_elf_ex, interpreter, @@ -1291,27 +1297,44 @@ static int load_elf_binary(struct linux_binprm *bprm) mm->end_data = end_data; mm->start_stack = bprm->p; - if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) { + /** + * DOC: "brk" handling + * + * For architectures with ELF randomization, when executing a + * loader directly (i.e. static PIE: ET_DYN without PT_INTERP), + * move the brk area out of the mmap region and into the unused + * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide + * early with the stack growing down or other regions being put + * into the mmap region by the kernel (e.g. vdso). + * + * In the CONFIG_COMPAT_BRK case, though, everything is turned + * off because we're not allowed to move the brk at all. + */ + if (!IS_ENABLED(CONFIG_COMPAT_BRK) && + IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && + elf_ex->e_type == ET_DYN && !interpreter) { + elf_brk = ELF_ET_DYN_BASE; + /* This counts as moving the brk, so let brk(2) know. */ + brk_moved = true; + } + mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk); + + if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) { /* - * For architectures with ELF randomization, when executing - * a loader directly (i.e. no interpreter listed in ELF - * headers), move the brk area out of the mmap region - * (since it grows up, and may collide early with the stack - * growing down), and into the unused ELF_ET_DYN_BASE region. + * If we didn't move the brk to ELF_ET_DYN_BASE (above), + * leave a gap between .bss and brk. */ - if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && - elf_ex->e_type == ET_DYN && !interpreter) { - mm->brk = mm->start_brk = ELF_ET_DYN_BASE; - } else { - /* Otherwise leave a gap between .bss and brk. */ + if (!brk_moved) mm->brk = mm->start_brk = mm->brk + PAGE_SIZE; - } mm->brk = mm->start_brk = arch_randomize_brk(mm); + brk_moved = true; + } + #ifdef compat_brk_randomized + if (brk_moved) current->brk_randomized = 1; #endif - } if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index fa8515598341ec..73a2dfb854c55e 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -3,9 +3,9 @@ config BTRFS_FS tristate "Btrfs filesystem support" select BLK_CGROUP_PUNT_BIO + select CRC32 select CRYPTO select CRYPTO_CRC32C - select LIBCRC32C select CRYPTO_XXHASH select CRYPTO_SHA256 select CRYPTO_BLAKE2B diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index e7f8ee5d48a4f1..7f11ef559be6e7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -606,7 +606,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) free_extent_map(em); cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); - cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS); + cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS); if (!cb->compressed_folios) { ret = BLK_STS_RESOURCE; goto out_free_bio; diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index d6eef4bd9e9d45..de23c4b3515e58 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -94,8 +94,6 @@ static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group) { lockdep_assert_held(&discard_ctl->lock); - if (!btrfs_run_discard_work(discard_ctl)) - return; if (list_empty(&block_group->discard_list) || block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { @@ -118,6 +116,9 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, if (!btrfs_is_block_group_data_only(block_group)) return; + if (!btrfs_run_discard_work(discard_ctl)) + return; + spin_lock(&discard_ctl->lock); __add_to_discard_list(discard_ctl, block_group); spin_unlock(&discard_ctl->lock); @@ -244,6 +245,18 @@ static struct btrfs_block_group *peek_discard_list( block_group->used != 0) { if (btrfs_is_block_group_data_only(block_group)) { __add_to_discard_list(discard_ctl, block_group); + /* + * The block group must have been moved to other + * discard list even if discard was disabled in + * the meantime or a transaction abort happened, + * otherwise we can end up in an infinite loop, + * always jumping into the 'again' label and + * keep getting this block group over and over + * in case there are no other block groups in + * the discard lists. + */ + ASSERT(block_group->discard_index != + BTRFS_DISCARD_INDEX_UNUSED); } else { list_del_init(&block_group->discard_list); btrfs_put_block_group(block_group); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3dd555db3d328a..aa58e0663a5d7b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3853,7 +3853,6 @@ static int write_dev_supers(struct btrfs_device *device, atomic_inc(&device->sb_write_errors); continue; } - ASSERT(folio_order(folio) == 0); offset = offset_in_folio(folio, bytenr); disk_super = folio_address(folio) + offset; @@ -3926,7 +3925,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) /* If the folio has been removed, then we know it completed. */ if (IS_ERR(folio)) continue; - ASSERT(folio_order(folio) == 0); /* Folio will be unlocked once the write completes. */ folio_wait_locked(folio); diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 13de6af279e526..b5b44ea91f9996 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1252,8 +1252,11 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (!prealloc) goto search_again; ret = split_state(tree, state, prealloc, end + 1); - if (ret) + if (ret) { extent_io_tree_panic(tree, state, "split", ret); + prealloc = NULL; + goto out; + } set_state_bits(tree, prealloc, bits, changeset); cache_state(prealloc, cached_state); @@ -1456,6 +1459,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (IS_ERR(inserted_state)) { ret = PTR_ERR(inserted_state); extent_io_tree_panic(tree, prealloc, "insert", ret); + goto out; } cache_state(inserted_state, cached_state); if (inserted_state == prealloc) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 197f5e51c47446..13bdd60da3c738 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2047,7 +2047,7 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc) subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock(&folio->mapping->i_private_lock); - bit_start++; + bit_start += sectors_per_node; continue; } @@ -3508,8 +3508,8 @@ static void btree_clear_folio_dirty_tag(struct folio *folio) ASSERT(folio_test_locked(folio)); xa_lock_irq(&folio->mapping->i_pages); if (!folio_test_dirty(folio)) - __xa_clear_mark(&folio->mapping->i_pages, - folio_index(folio), PAGECACHE_TAG_DIRTY); + __xa_clear_mark(&folio->mapping->i_pages, folio->index, + PAGECACHE_TAG_DIRTY); xa_unlock_irq(&folio->mapping->i_pages); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2e261892c7bc79..f5b28b5c4908b2 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -298,6 +298,8 @@ static inline int __pure num_extent_pages(const struct extent_buffer *eb) */ static inline int __pure num_extent_folios(const struct extent_buffer *eb) { + if (!eb->folios[0]) + return 0; if (folio_order(eb->folios[0])) return 1; return num_extent_pages(eb); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 262a707d899064..22455fbcb29eba 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1862,7 +1862,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) if (reserved_space < fsize) { end = page_start + reserved_space - 1; btrfs_delalloc_release_space(BTRFS_I(inode), - data_reserved, page_start, + data_reserved, end + 1, fsize - reserved_space, true); } } @@ -2104,15 +2104,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * will always return true. * So here we need to do extra page alignment for * filemap_range_has_page(). + * + * And do not decrease page_lockend right now, as it can be 0. */ const u64 page_lockstart = round_up(lockstart, PAGE_SIZE); - const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1; + const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE); while (1) { truncate_pagecache_range(inode, lockstart, lockend); lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, cached_state); + /* The same page or adjacent pages. */ + if (page_lockend <= page_lockstart) + break; /* * We can't have ordered extents in the range, nor dirty/writeback * pages, because we have locked the inode's VFS lock in exclusive @@ -2124,7 +2129,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * we do, unlock the range and retry. */ if (!filemap_range_has_page(inode->i_mapping, page_lockstart, - page_lockend)) + page_lockend - 1)) break; unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index bcca43046064b8..7baa2ed45198f9 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -300,6 +300,7 @@ enum { #define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) +#define BTRFS_WARNING_COMMIT_INTERVAL (300) #define BTRFS_DEFAULT_MAX_INLINE (2048) struct btrfs_dev_replace { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cc67d1a2d61109..8a3f44302788cd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1109,6 +1109,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, struct extent_state *cached = NULL; struct extent_map *em; int ret = 0; + bool free_pages = false; u64 start = async_extent->start; u64 end = async_extent->start + async_extent->ram_size - 1; @@ -1129,7 +1130,10 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, } if (async_extent->compress_type == BTRFS_COMPRESS_NONE) { + ASSERT(!async_extent->folios); + ASSERT(async_extent->nr_folios == 0); submit_uncompressed_range(inode, async_extent, locked_folio); + free_pages = true; goto done; } @@ -1145,6 +1149,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, * fall back to uncompressed. */ submit_uncompressed_range(inode, async_extent, locked_folio); + free_pages = true; goto done; } @@ -1186,6 +1191,8 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, done: if (async_chunk->blkcg_css) kthread_associate_blkcg(NULL); + if (free_pages) + free_async_extent_pages(async_extent); kfree(async_extent); return; @@ -2129,12 +2136,13 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode, /* * If the found extent starts after requested offset, then - * adjust extent_end to be right before this extent begins + * adjust cur_offset to be right before this extent begins. */ if (found_key.offset > cur_offset) { - extent_end = found_key.offset; - extent_type = 0; - goto must_cow; + if (cow_start == (u64)-1) + cow_start = cur_offset; + cur_offset = found_key.offset; + goto next_slot; } /* @@ -4841,8 +4849,11 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len, folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask); if (IS_ERR(folio)) { - btrfs_delalloc_release_space(inode, data_reserved, block_start, - blocksize, true); + if (only_release_metadata) + btrfs_delalloc_release_metadata(inode, blocksize, true); + else + btrfs_delalloc_release_space(inode, data_reserved, + block_start, blocksize, true); btrfs_delalloc_release_extents(inode, blocksize); ret = -ENOMEM; goto out; @@ -5681,8 +5692,10 @@ struct btrfs_inode *btrfs_iget(u64 ino, struct btrfs_root *root) return inode; path = btrfs_alloc_path(); - if (!path) + if (!path) { + iget_failed(&inode->vfs_inode); return ERR_PTR(-ENOMEM); + } ret = btrfs_read_locked_inode(inode, path); btrfs_free_path(path); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a13d81bb56a089..63aeacc5494574 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4902,6 +4902,8 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state, &disk_bytenr, &disk_io_size); + if (ret == -EAGAIN) + goto out_acct; if (ret < 0 && ret != -EIOCBQUEUED) goto out_free; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f948f4f6431c6c..e17bcb03459519 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3803,7 +3803,7 @@ static noinline_for_stack struct inode *create_reloc_inode( if (ret) { if (inode) iput(&inode->vfs_inode); - inode = ERR_PTR(ret); + return ERR_PTR(ret); } return &inode->vfs_inode; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2c5edcee94502b..4c525a0408125d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -153,12 +153,14 @@ struct scrub_stripe { unsigned int init_nr_io_errors; unsigned int init_nr_csum_errors; unsigned int init_nr_meta_errors; + unsigned int init_nr_meta_gen_errors; /* * The following error bitmaps are all for the current status. * Every time we submit a new read, these bitmaps may be updated. * - * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap; + * error_bitmap = io_error_bitmap | csum_error_bitmap | + * meta_error_bitmap | meta_generation_bitmap; * * IO and csum errors can happen for both metadata and data. */ @@ -166,6 +168,7 @@ struct scrub_stripe { unsigned long io_error_bitmap; unsigned long csum_error_bitmap; unsigned long meta_error_bitmap; + unsigned long meta_gen_error_bitmap; /* For writeback (repair or replace) error reporting. */ unsigned long write_error_bitmap; @@ -616,7 +619,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr memcpy(on_disk_csum, header->csum, fs_info->csum_size); if (logical != btrfs_stack_header_bytenr(header)) { - bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); + bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad bytenr, has %llu want %llu", @@ -672,7 +675,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr } if (stripe->sectors[sector_nr].generation != btrfs_stack_header_generation(header)) { - bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); + bitmap_set(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree); bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, "tree block %llu mirror %u has bad generation, has %llu want %llu", @@ -684,6 +687,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree); bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree); bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree); + bitmap_clear(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree); } static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) @@ -972,8 +976,22 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx, if (__ratelimit(&rs) && dev) scrub_print_common_warning("header error", dev, false, stripe->logical, physical); + if (test_bit(sector_nr, &stripe->meta_gen_error_bitmap)) + if (__ratelimit(&rs) && dev) + scrub_print_common_warning("generation error", dev, false, + stripe->logical, physical); } + /* Update the device stats. */ + for (int i = 0; i < stripe->init_nr_io_errors; i++) + btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS); + for (int i = 0; i < stripe->init_nr_csum_errors; i++) + btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS); + /* Generation mismatch error is based on each metadata, not each block. */ + for (int i = 0; i < stripe->init_nr_meta_gen_errors; + i += (fs_info->nodesize >> fs_info->sectorsize_bits)) + btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS); + spin_lock(&sctx->stat_lock); sctx->stat.data_extents_scrubbed += stripe->nr_data_extents; sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents; @@ -982,7 +1000,8 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx, sctx->stat.no_csum += nr_nodatacsum_sectors; sctx->stat.read_errors += stripe->init_nr_io_errors; sctx->stat.csum_errors += stripe->init_nr_csum_errors; - sctx->stat.verify_errors += stripe->init_nr_meta_errors; + sctx->stat.verify_errors += stripe->init_nr_meta_errors + + stripe->init_nr_meta_gen_errors; sctx->stat.uncorrectable_errors += bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors); sctx->stat.corrected_errors += nr_repaired_sectors; @@ -1028,6 +1047,8 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) stripe->nr_sectors); stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors); + stripe->init_nr_meta_gen_errors = bitmap_weight(&stripe->meta_gen_error_bitmap, + stripe->nr_sectors); if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) goto out; @@ -1142,6 +1163,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio) bitmap_set(&stripe->write_error_bitmap, sector_nr, bio_size >> fs_info->sectorsize_bits); spin_unlock_irqrestore(&stripe->write_error_lock, flags); + for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++) + btrfs_dev_stat_inc_and_print(stripe->dev, + BTRFS_DEV_STAT_WRITE_ERRS); } bio_put(&bbio->bio); @@ -1508,10 +1532,12 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) stripe->init_nr_io_errors = 0; stripe->init_nr_csum_errors = 0; stripe->init_nr_meta_errors = 0; + stripe->init_nr_meta_gen_errors = 0; stripe->error_bitmap = 0; stripe->io_error_bitmap = 0; stripe->csum_error_bitmap = 0; stripe->meta_error_bitmap = 0; + stripe->meta_gen_error_bitmap = 0; } /* @@ -1541,8 +1567,8 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, u64 extent_gen; int ret; - if (unlikely(!extent_root)) { - btrfs_err(fs_info, "no valid extent root for scrub"); + if (unlikely(!extent_root || !csum_root)) { + btrfs_err(fs_info, "no valid extent or csum root for scrub"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 11dbd7be6a3b3b..c0a0b8b063d085 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -204,7 +204,7 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, btrfs_blocks_per_folio(fs_info, folio); \ \ btrfs_subpage_assert(fs_info, folio, start, len); \ - __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ + __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ __start_bit; \ }) @@ -666,7 +666,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, btrfs_blocks_per_folio(fs_info, folio); \ const struct btrfs_subpage *subpage = folio_get_private(folio); \ \ - ASSERT(blocks_per_folio < BITS_PER_LONG); \ + ASSERT(blocks_per_folio <= BITS_PER_LONG); \ *dst = bitmap_read(subpage->bitmaps, \ blocks_per_folio * btrfs_bitmap_nr_##name, \ blocks_per_folio); \ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 40709e2a44fcec..7310e2fa852621 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -569,6 +569,10 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) break; case Opt_commit_interval: ctx->commit_interval = result.uint_32; + if (ctx->commit_interval > BTRFS_WARNING_COMMIT_INTERVAL) { + btrfs_warn(NULL, "excessive commit interval %u, use with care", + ctx->commit_interval); + } if (ctx->commit_interval == 0) ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; break; @@ -1139,8 +1143,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) subvol_name = btrfs_get_subvol_name_from_objectid(info, btrfs_root_id(BTRFS_I(d_inode(dentry))->root)); if (!IS_ERR(subvol_name)) { - seq_puts(seq, ",subvol="); - seq_escape(seq, subvol_name, " \t\n\\"); + seq_show_option(seq, "subvol", subvol_name); kfree(subvol_name); } return 0; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 43979891f7c895..2b66a6130269a3 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -2235,7 +2235,7 @@ int btrfs_verify_level_key(struct extent_buffer *eb, btrfs_err(fs_info, "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", eb->start, check->level, found_level); - return -EIO; + return -EUCLEAN; } if (!check->has_first_key) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 90dc094cfa5e5a..f5af11565b8760 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6583,6 +6583,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_log_get_delayed_items(inode, &delayed_ins_list, &delayed_del_list); + /* + * If we are fsyncing a file with 0 hard links, then commit the delayed + * inode because the last inode ref (or extref) item may still be in the + * subvolume tree and if we log it the file will still exist after a log + * replay. So commit the delayed inode to delete that last ref and we + * skip logging it. + */ + if (inode->vfs_inode.i_nlink == 0) { + ret = btrfs_commit_inode_delayed_inode(inode); + if (ret) + goto out_unlock; + } + ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key, path, dst_path, logged_isize, inode_only, ctx, @@ -7051,14 +7064,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (btrfs_root_generation(&root->root_item) == trans->transid) return BTRFS_LOG_FORCE_COMMIT; - /* - * Skip already logged inodes or inodes corresponding to tmpfiles - * (since logging them is pointless, a link count of 0 means they - * will never be accessible). - */ - if ((btrfs_inode_in_log(inode, trans->transid) && - list_empty(&ctx->ordered_extents)) || - inode->vfs_inode.i_nlink == 0) + /* Skip already logged inodes and without new extents. */ + if (btrfs_inode_in_log(inode, trans->transid) && + list_empty(&ctx->ordered_extents)) return BTRFS_NO_LOG_SYNC; ret = start_log_trans(trans, root, ctx); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c8c21c55be53bd..8e6b6fed7429a9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -733,82 +733,6 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) return has_metadata_uuid ? sb->metadata_uuid : sb->fsid; } -/* - * We can have very weird soft links passed in. - * One example is "/proc/self/fd/", which can be a soft link to - * a block device. - * - * But it's never a good idea to use those weird names. - * Here we check if the path (not following symlinks) is a good one inside - * "/dev/". - */ -static bool is_good_dev_path(const char *dev_path) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - bool is_good = false; - int ret; - - if (!dev_path) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) - goto out; - - /* - * Do not follow soft link, just check if the original path is inside - * "/dev/". - */ - ret = kern_path(dev_path, 0, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) - goto out; - if (strncmp(resolved_path, "/dev/", strlen("/dev/"))) - goto out; - is_good = true; -out: - kfree(path_buf); - path_put(&path); - return is_good; -} - -static int get_canonical_dev_path(const char *dev_path, char *canonical) -{ - struct path path = { .mnt = NULL, .dentry = NULL }; - char *path_buf = NULL; - char *resolved_path; - int ret; - - if (!dev_path) { - ret = -EINVAL; - goto out; - } - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (!path_buf) { - ret = -ENOMEM; - goto out; - } - - ret = kern_path(dev_path, LOOKUP_FOLLOW, &path); - if (ret) - goto out; - resolved_path = d_path(&path, path_buf, PATH_MAX); - if (IS_ERR(resolved_path)) { - ret = PTR_ERR(resolved_path); - goto out; - } - ret = strscpy(canonical, resolved_path, PATH_MAX); -out: - kfree(path_buf); - path_put(&path); - return ret; -} - static bool is_same_device(struct btrfs_device *device, const char *new_path) { struct path old = { .mnt = NULL, .dentry = NULL }; @@ -1513,23 +1437,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, bool new_device_added = false; struct btrfs_device *device = NULL; struct file *bdev_file; - char *canonical_path = NULL; u64 bytenr; dev_t devt; int ret; lockdep_assert_held(&uuid_mutex); - if (!is_good_dev_path(path)) { - canonical_path = kmalloc(PATH_MAX, GFP_KERNEL); - if (canonical_path) { - ret = get_canonical_dev_path(path, canonical_path); - if (ret < 0) { - kfree(canonical_path); - canonical_path = NULL; - } - } - } /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, @@ -1574,8 +1487,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, goto free_disk_super; } - device = device_list_add(canonical_path ? : path, disk_super, - &new_device_added); + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); @@ -1584,7 +1496,6 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, error_bdev_put: fput(bdev_file); - kfree(canonical_path); return device; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index fb8b8b29c169ca..4a3e02b49f2957 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1277,7 +1277,7 @@ struct zone_info { static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, struct zone_info *info, unsigned long *active, - struct btrfs_chunk_map *map) + struct btrfs_chunk_map *map, bool new) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct btrfs_device *device; @@ -1307,6 +1307,8 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, return 0; } + ASSERT(!new || btrfs_dev_is_empty_zone(device, info->physical)); + /* This zone will be used for allocation, so mark this zone non-empty. */ btrfs_dev_clear_zone_empty(device, info->physical); @@ -1319,6 +1321,18 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, * to determine the allocation offset within the zone. */ WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); + + if (new) { + sector_t capacity; + + capacity = bdev_zone_capacity(device->bdev, info->physical >> SECTOR_SHIFT); + up_read(&dev_replace->rwsem); + info->alloc_offset = 0; + info->capacity = capacity << SECTOR_SHIFT; + + return 0; + } + nofs_flag = memalloc_nofs_save(); ret = btrfs_get_dev_zone(device, info->physical, &zone); memalloc_nofs_restore(nofs_flag); @@ -1588,7 +1602,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } for (i = 0; i < map->num_stripes; i++) { - ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); + ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map, new); if (ret) goto out; @@ -1659,7 +1673,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) * stripe. */ cache->alloc_offset = cache->zone_capacity; - ret = 0; } out: diff --git a/fs/buffer.c b/fs/buffer.c index c7abb4a029dc84..dd8709e05225ed 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -176,18 +176,8 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) } EXPORT_SYMBOL(end_buffer_write_sync); -/* - * Various filesystems appear to want __find_get_block to be non-blocking. - * But it's the page lock which protects the buffers. To get around this, - * we get exclusion from try_to_free_buffers with the blockdev mapping's - * i_private_lock. - * - * Hack idea: for the blockdev mapping, i_private_lock contention - * may be quite high. This code could TryLock the page, and if that - * succeeds, there is no need to take i_private_lock. - */ static struct buffer_head * -__find_get_block_slow(struct block_device *bdev, sector_t block) +__find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) { struct address_space *bd_mapping = bdev->bd_mapping; const int blkbits = bd_mapping->host->i_blkbits; @@ -204,10 +194,28 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) if (IS_ERR(folio)) goto out; - spin_lock(&bd_mapping->i_private_lock); + /* + * Folio lock protects the buffers. Callers that cannot block + * will fallback to serializing vs try_to_free_buffers() via + * the i_private_lock. + */ + if (atomic) + spin_lock(&bd_mapping->i_private_lock); + else + folio_lock(folio); + head = folio_buffers(folio); if (!head) goto out_unlock; + /* + * Upon a noref migration, the folio lock serializes here; + * otherwise bail. + */ + if (test_bit_acquire(BH_Migrate, &head->b_state)) { + WARN_ON(!atomic); + goto out_unlock; + } + bh = head; do { if (!buffer_mapped(bh)) @@ -236,7 +244,10 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) 1 << blkbits); } out_unlock: - spin_unlock(&bd_mapping->i_private_lock); + if (atomic) + spin_unlock(&bd_mapping->i_private_lock); + else + folio_unlock(folio); folio_put(folio); out: return ret; @@ -286,7 +297,6 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - return; } struct postprocess_bh_ctx { @@ -411,7 +421,6 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - return; } /* @@ -656,7 +665,9 @@ EXPORT_SYMBOL(generic_buffers_fsync); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize) { - struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); + struct buffer_head *bh; + + bh = __find_get_block_nonatomic(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) write_dirty_buffer(bh, 0); @@ -1109,6 +1120,8 @@ static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { + bool blocking = gfpflags_allow_blocking(gfp); + /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1124,12 +1137,15 @@ __getblk_slow(struct block_device *bdev, sector_t block, for (;;) { struct buffer_head *bh; - bh = __find_get_block(bdev, block, size); - if (bh) - return bh; - if (!grow_buffers(bdev, block, size, gfp)) return NULL; + + if (blocking) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size); + if (bh) + return bh; } } @@ -1207,10 +1223,8 @@ void mark_buffer_write_io_error(struct buffer_head *bh) /* FIXME: do we need to set this in both places? */ if (bh->b_folio && bh->b_folio->mapping) mapping_set_error(bh->b_folio->mapping, -EIO); - if (bh->b_assoc_map) { + if (bh->b_assoc_map) mapping_set_error(bh->b_assoc_map, -EIO); - errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO); - } } EXPORT_SYMBOL(mark_buffer_write_io_error); @@ -1386,16 +1400,18 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) /* * Perform a pagecache lookup for the matching buffer. If it's there, refresh * it in the LRU and mark it as accessed. If it is not present then return - * NULL + * NULL. Atomic context callers may also return NULL if the buffer is being + * migrated; similarly the page is not marked accessed either. */ -struct buffer_head * -__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +static struct buffer_head * +find_get_block_common(struct block_device *bdev, sector_t block, + unsigned size, bool atomic) { struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { /* __find_get_block_slow will mark the page accessed */ - bh = __find_get_block_slow(bdev, block); + bh = __find_get_block_slow(bdev, block, atomic); if (bh) bh_lru_install(bh); } else @@ -1403,8 +1419,23 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) return bh; } + +struct buffer_head * +__find_get_block(struct block_device *bdev, sector_t block, unsigned size) +{ + return find_get_block_common(bdev, block, size, true); +} EXPORT_SYMBOL(__find_get_block); +/* same as __find_get_block() but allows sleeping contexts */ +struct buffer_head * +__find_get_block_nonatomic(struct block_device *bdev, sector_t block, + unsigned size) +{ + return find_get_block_common(bdev, block, size, false); +} +EXPORT_SYMBOL(__find_get_block_nonatomic); + /** * bdev_getblk - Get a buffer_head in a block device's buffer cache. * @bdev: The block device. @@ -1422,7 +1453,12 @@ EXPORT_SYMBOL(__find_get_block); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - struct buffer_head *bh = __find_get_block(bdev, block, size); + struct buffer_head *bh; + + if (gfpflags_allow_blocking(gfp)) + bh = __find_get_block_nonatomic(bdev, block, size); + else + bh = __find_get_block(bdev, block, size); might_alloc(gfp); if (bh) @@ -1578,8 +1614,8 @@ static void discard_buffer(struct buffer_head * bh) bh->b_bdev = NULL; b_state = READ_ONCE(bh->b_state); do { - } while (!try_cmpxchg(&bh->b_state, &b_state, - b_state & ~BUFFER_FLAGS_DISCARD)); + } while (!try_cmpxchg_relaxed(&bh->b_state, &b_state, + b_state & ~BUFFER_FLAGS_DISCARD)); unlock_buffer(bh); } @@ -1644,7 +1680,6 @@ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length) filemap_release_folio(folio, 0); out: folio_clear_mappedtodisk(folio); - return; } EXPORT_SYMBOL(block_invalidate_folio); diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 92058ae4348826..c08e4a66ac07a7 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -63,7 +63,7 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret) ret = -ESTALE; } - ki->term_func(ki->term_func_priv, ret, ki->was_async); + ki->term_func(ki->term_func_priv, ret); } cachefiles_put_kiocb(ki); @@ -188,7 +188,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres, presubmission_error: if (term_func) - term_func(term_func_priv, ret < 0 ? ret : skipped, false); + term_func(term_func_priv, ret < 0 ? ret : skipped); return ret; } @@ -271,7 +271,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret) atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); if (ki->term_func) - ki->term_func(ki->term_func_priv, ret, ki->was_async); + ki->term_func(ki->term_func_priv, ret); cachefiles_put_kiocb(ki); } @@ -301,7 +301,7 @@ int __cachefiles_write(struct cachefiles_object *object, ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) { if (term_func) - term_func(term_func_priv, -ENOMEM, false); + term_func(term_func_priv, -ENOMEM); return -ENOMEM; } @@ -366,7 +366,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres, { if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) { if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite); return -ENOBUFS; } @@ -665,7 +665,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) pre = CACHEFILES_DIO_BLOCK_SIZE - off; if (pre >= len) { fscache_count_dio_misfit(); - netfs_write_subrequest_terminated(subreq, len, false); + netfs_write_subrequest_terminated(subreq, len); return; } subreq->transferred += pre; @@ -691,7 +691,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) len -= post; if (len == 0) { fscache_count_dio_misfit(); - netfs_write_subrequest_terminated(subreq, post, false); + netfs_write_subrequest_terminated(subreq, post); return; } iov_iter_truncate(&subreq->io_iter, len); @@ -703,7 +703,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) &start, &len, len, true); cachefiles_end_secure(cache, saved_cred); if (ret < 0) { - netfs_write_subrequest_terminated(subreq, ret, false); + netfs_write_subrequest_terminated(subreq, ret); return; } diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index bf935e25bdbeb8..b48525680e73db 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -8,7 +8,7 @@ #include #include "internal.h" -static const char cachefiles_charmap[64] = +static const char cachefiles_charmap[64] __nonstring = "0123456789" /* 0 - 9 */ "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index 7249d70e1a43fa..3e7def3d31c16f 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,7 +3,7 @@ config CEPH_FS tristate "Ceph distributed file system" depends on INET select CEPH_LIB - select LIBCRC32C + select CRC32 select CRYPTO_AES select CRYPTO select NETFS_SUPPORT diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 29be367905a16f..b95c4cb21c13f0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req) if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); if (err < subreq->len && + subreq->rreq->origin != NETFS_UNBUFFERED_READ && subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { @@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) size_t len; int mode; - if (rreq->origin != NETFS_DIO_READ) + if (rreq->origin != NETFS_UNBUFFERED_READ && + rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); @@ -539,7 +541,7 @@ static void ceph_set_page_fscache(struct page *page) folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ } -static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) +static void ceph_fscache_write_terminated(void *priv, ssize_t error) { struct inode *inode = priv; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 6ac2bd555e8698..06cd2963e41ee0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2367,7 +2367,7 @@ static int fill_fscrypt_truncate(struct inode *inode, /* Try to writeback the dirty pagecaches */ if (issued & (CEPH_CAP_FILE_BUFFER)) { - loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; + loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SIZE - 1; ret = filemap_write_and_wait_range(inode->i_mapping, orig_pos, lend); diff --git a/fs/coredump.c b/fs/coredump.c index c33c177a701b3d..d740a04112663c 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include @@ -60,6 +62,12 @@ static void free_vma_snapshot(struct coredump_params *cprm); #define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024) /* Define a reasonable max cap */ #define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024) +/* + * File descriptor number for the pidfd for the thread-group leader of + * the coredumping task installed into the usermode helper's file + * descriptor table. + */ +#define COREDUMP_PIDFD_NUMBER 3 static int core_uses_pid; static unsigned int core_pipe_limit; @@ -339,6 +347,27 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, case 'C': err = cn_printf(cn, "%d", cprm->cpu); break; + /* pidfd number */ + case 'F': { + /* + * Installing a pidfd only makes sense if + * we actually spawn a usermode helper. + */ + if (!ispipe) + break; + + /* + * Note that we'll install a pidfd for the + * thread-group leader. We know that task + * linkage hasn't been removed yet and even if + * this @current isn't the actual thread-group + * leader we know that the thread-group leader + * cannot be reaped until @current has exited. + */ + cprm->pid = task_tgid(current); + err = cn_printf(cn, "%d", COREDUMP_PIDFD_NUMBER); + break; + } default: break; } @@ -493,7 +522,7 @@ static void wait_for_dump_helpers(struct file *file) } /* - * umh_pipe_setup + * umh_coredump_setup * helper function to customize the process used * to collect the core in userspace. Specifically * it sets up a pipe and installs it as fd 0 (stdin) @@ -503,11 +532,32 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) { struct file *files[2]; struct coredump_params *cp = (struct coredump_params *)info->data; - int err = create_pipe_files(files, 0); + int err; + + if (cp->pid) { + struct file *pidfs_file __free(fput) = NULL; + + pidfs_file = pidfs_alloc_file(cp->pid, 0); + if (IS_ERR(pidfs_file)) + return PTR_ERR(pidfs_file); + + /* + * Usermode helpers are childen of either + * system_unbound_wq or of kthreadd. So we know that + * we're starting off with a clean file descriptor + * table. So we should always be able to use + * COREDUMP_PIDFD_NUMBER as our file descriptor value. + */ + err = replace_fd(COREDUMP_PIDFD_NUMBER, pidfs_file, 0); + if (err < 0) + return err; + } + + err = create_pipe_files(files, 0); if (err) return err; @@ -515,10 +565,13 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) err = replace_fd(0, files[0], 0); fput(files[0]); + if (err < 0) + return err; + /* and disallow core files too */ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; - return err; + return 0; } void do_coredump(const kernel_siginfo_t *siginfo) @@ -593,7 +646,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) } if (cprm.limit == 1) { - /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use @@ -632,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) retval = -ENOMEM; sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, GFP_KERNEL, - umh_pipe_setup, NULL, &cprm); + umh_coredump_setup, NULL, &cprm); if (sub_info) retval = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); diff --git a/fs/dax.c b/fs/dax.c index af5045b0f476e0..f8d8b1afd23244 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry) wq = dax_entry_waitqueue(xas, entry, &ewait.key); prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); - xas_pause(xas); + xas_reset(xas); xas_unlock_irq(xas); schedule(); finish_wait(wq, &ewait.wait); @@ -396,6 +396,7 @@ static inline unsigned long dax_folio_put(struct folio *folio) order = folio_order(folio); if (!order) return 0; + folio_reset_order(folio); for (i = 0; i < (1UL << order); i++) { struct dev_pagemap *pgmap = page_pgmap(&folio->page); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e4d6eeb29f5b..9c20d78e41f6b4 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -89,12 +89,12 @@ enum { }; static const struct fs_parameter_spec devpts_param_specs[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_s32 ("max", Opt_max), fsparam_u32oct ("mode", Opt_mode), fsparam_flag ("newinstance", Opt_newinstance), fsparam_u32oct ("ptmxmode", Opt_ptmxmode), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 331e49cd1b8d9d..8f68ec49ad897b 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,8 +3,8 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select CRC32 select FS_IOMAP - select LIBCRC32C help EROFS (Enhanced Read-Only File System) is a lightweight read-only file system with modern designs (e.g. no buffer heads, inline diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index 9581e9bf8192dc..767fb4acdc93a8 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -56,7 +56,7 @@ struct erofs_super_block { union { __le16 rootnid_2b; /* nid of root directory */ __le16 blocks_hi; /* (48BIT on) blocks count MSB */ - } rb; + } __packed rb; __le64 inos; /* total valid ino # (== f_files - f_favail) */ __le64 epoch; /* base seconds used for compact inodes */ __le32 fixed_nsec; /* fixed nanoseconds for compact inodes */ @@ -148,7 +148,7 @@ union erofs_inode_i_nb { __le16 nlink; /* if EROFS_I_NLINK_1_BIT is unset */ __le16 blocks_hi; /* total blocks count MSB */ __le16 startblk_hi; /* starting block number MSB */ -}; +} __packed; /* 32-byte reduced form of an ondisk inode */ struct erofs_inode_compact { @@ -369,9 +369,9 @@ struct z_erofs_map_header { * bit 7 : pack the whole file into packed inode */ __u8 h_clusterbits; - }; + } __packed; __le16 h_extents_hi; /* extent count MSB */ - }; + } __packed; }; enum { diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index bec4b56b382651..60c7cc4c105c67 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -32,6 +32,8 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) ret = 0; } if (rq->bio.bi_end_io) { + if (ret < 0 && !rq->bio.bi_status) + rq->bio.bi_status = errno_to_blk_status(ret); rq->bio.bi_end_io(&rq->bio); } else { bio_for_each_folio_all(fi, &rq->bio) { @@ -148,10 +150,10 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9; attached = 0; } - if (!attached++) - erofs_onlinefolio_split(folio); if (!bio_add_folio(&io->rq->bio, folio, len, cur)) goto io_retry; + if (!attached++) + erofs_onlinefolio_split(folio); io->dev.m_pa += len; } cur += len; diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 9c9129bca3460b..34517ca9df9157 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -102,8 +102,7 @@ static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) erofs_fscache_req_put(req); } -static void erofs_fscache_req_end_io(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_io *io = priv; struct erofs_fscache_rq *req = io->private; @@ -180,8 +179,7 @@ struct erofs_fscache_bio { struct bio_vec bvecs[BIO_MAX_VECS]; }; -static void erofs_fscache_bio_endio(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_bio *io = priv; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index cadec6b1b55451..6e57b9cc6ed2e0 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -165,8 +165,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) : bdev_file_open_by_path(dif->path, BLK_OPEN_READ, sb->s_type, NULL); - if (IS_ERR(file)) + if (IS_ERR(file)) { + if (file == ERR_PTR(-ENOTBLK)) + return -EINVAL; return PTR_ERR(file); + } if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), @@ -357,7 +360,6 @@ static void erofs_default_options(struct erofs_sb_info *sbi) enum { Opt_user_xattr, Opt_acl, Opt_cache_strategy, Opt_dax, Opt_dax_enum, Opt_device, Opt_fsid, Opt_domain_id, Opt_directio, - Opt_err }; static const struct constant_table erofs_param_cache_strategy[] = { @@ -511,24 +513,52 @@ static int erofs_fc_parse_param(struct fs_context *fc, return 0; } -static struct inode *erofs_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) +static int erofs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) { - return erofs_iget(sb, ino); + erofs_nid_t nid = EROFS_I(inode)->nid; + int len = parent ? 6 : 3; + + if (*max_len < len) { + *max_len = len; + return FILEID_INVALID; + } + + fh[0] = (u32)(nid >> 32); + fh[1] = (u32)(nid & 0xffffffff); + fh[2] = inode->i_generation; + + if (parent) { + nid = EROFS_I(parent)->nid; + + fh[3] = (u32)(nid >> 32); + fh[4] = (u32)(nid & 0xffffffff); + fh[5] = parent->i_generation; + } + + *max_len = len; + return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN; } static struct dentry *erofs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - erofs_nfs_get_inode); + if ((fh_type != FILEID_INO64_GEN && + fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3) + return NULL; + + return d_obtain_alias(erofs_iget(sb, + ((u64)fid->raw[0] << 32) | fid->raw[1])); } static struct dentry *erofs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - erofs_nfs_get_inode); + if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6) + return NULL; + + return d_obtain_alias(erofs_iget(sb, + ((u64)fid->raw[3] << 32) | fid->raw[4])); } static struct dentry *erofs_get_parent(struct dentry *child) @@ -544,7 +574,7 @@ static struct dentry *erofs_get_parent(struct dentry *child) } static const struct export_operations erofs_export_ops = { - .encode_fh = generic_encode_ino32_fh, + .encode_fh = erofs_encode_fh, .fh_to_dentry = erofs_fh_to_dentry, .fh_to_parent = erofs_fh_to_parent, .get_parent = erofs_get_parent, diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 0671184d9cf1ae..b8e6b76c23d5ee 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -79,9 +79,6 @@ struct z_erofs_pcluster { /* L: whether partial decompression or not */ bool partial; - /* L: indicate several pageofs_outs or not */ - bool multibases; - /* L: whether extra buffer allocations are best-effort */ bool besteffort; @@ -725,7 +722,6 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) lockref_init(&pcl->lockref); /* one ref for this request */ pcl->algorithmformat = map->m_algorithmformat; pcl->pclustersize = map->m_plen; - pcl->pageofs_in = pageofs_in; pcl->length = 0; pcl->partial = true; pcl->next = fe->head; @@ -1047,8 +1043,6 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, break; erofs_onlinefolio_split(folio); - if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) - f->pcl->multibases = true; if (f->pcl->length < offset + end - map->m_la) { f->pcl->length = offset + end - map->m_la; f->pcl->pageofs_out = map->m_la & ~PAGE_MASK; @@ -1094,7 +1088,6 @@ struct z_erofs_backend { struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; struct super_block *sb; struct z_erofs_pcluster *pcl; - /* pages with the longest decompressed length for deduplication */ struct page **decompressed_pages; /* pages to keep the compressed data */ @@ -1103,6 +1096,8 @@ struct z_erofs_backend { struct list_head decompressed_secondary_bvecs; struct page **pagepool; unsigned int onstack_used, nr_pages; + /* indicate if temporary copies should be preserved for later use */ + bool keepxcpy; }; struct z_erofs_bvec_item { @@ -1113,18 +1108,20 @@ struct z_erofs_bvec_item { static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be, struct z_erofs_bvec *bvec) { + int poff = bvec->offset + be->pcl->pageofs_out; struct z_erofs_bvec_item *item; - unsigned int pgnr; - - if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) && - (bvec->end == PAGE_SIZE || - bvec->offset + bvec->end == be->pcl->length)) { - pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT; - DBG_BUGON(pgnr >= be->nr_pages); - if (!be->decompressed_pages[pgnr]) { - be->decompressed_pages[pgnr] = bvec->page; + struct page **page; + + if (!(poff & ~PAGE_MASK) && (bvec->end == PAGE_SIZE || + bvec->offset + bvec->end == be->pcl->length)) { + DBG_BUGON((poff >> PAGE_SHIFT) >= be->nr_pages); + page = be->decompressed_pages + (poff >> PAGE_SHIFT); + if (!*page) { + *page = bvec->page; return; } + } else { + be->keepxcpy = true; } /* (cold path) one pcluster is requested multiple times */ @@ -1290,7 +1287,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) .alg = pcl->algorithmformat, .inplace_io = overlapped, .partial_decoding = pcl->partial, - .fillgaps = pcl->multibases, + .fillgaps = be->keepxcpy, .gfp = pcl->besteffort ? GFP_KERNEL : GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); @@ -1347,7 +1344,6 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) pcl->length = 0; pcl->partial = true; - pcl->multibases = false; pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 8de50df05dfe1b..14ea47f954f552 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -559,7 +559,8 @@ static int z_erofs_map_blocks_ext(struct inode *inode, pos += sizeof(__le64); lstart = 0; } else { - lstart = map->m_la >> vi->z_lclusterbits; + lstart = round_down(map->m_la, 1 << vi->z_lclusterbits); + pos += (lstart >> vi->z_lclusterbits) * recsz; pa = EROFS_NULL_ADDR; } @@ -614,7 +615,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; - if (recsz >= offsetof(struct z_erofs_extent, pstart_lo)) + if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; } else if (map->m_plen) { map->m_flags |= EROFS_MAP_MAPPED | diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 100376863a4451..d4dbffdedd08e3 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1996,6 +1996,14 @@ static int ep_try_send_events(struct eventpoll *ep, return res; } +static int ep_schedule_timeout(ktime_t *to) +{ + if (to) + return ktime_after(*to, ktime_get()); + else + return 1; +} + /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. @@ -2104,8 +2112,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, write_unlock_irq(&ep->lock); if (!eavail) - timed_out = !schedule_hrtimeout_range(to, slack, - HRTIMER_MODE_ABS); + timed_out = !ep_schedule_timeout(to) || + !schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 87ee3a17bd29c9..e8c5525afc67a2 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -351,10 +351,9 @@ int ext4_check_blockref(const char *function, unsigned int line, { __le32 *bref = p; unsigned int blk; + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - if (ext4_has_feature_journal(inode->i_sb) && - (inode->i_ino == - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + if (journal && inode == journal->j_inode) return 0; while (bref < p+max) { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 38bc8d74f4cc23..e7ecc7c8a72969 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -691,7 +691,8 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) if (!bh || !buffer_uptodate(bh)) /* * If the block is not in the buffer cache, then it - * must have been written out. + * must have been written out, or, most unlikely, is + * being migrated - false failure should be OK here. */ goto out; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1dc09ed5d403d8..94c7d2d828a64e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -386,10 +386,11 @@ static int __check_block_validity(struct inode *inode, const char *func, unsigned int line, struct ext4_map_blocks *map) { - if (ext4_has_feature_journal(inode->i_sb) && - (inode->i_ino == - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + + if (journal && inode == journal->j_inode) return 0; + if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, "lblock %lu mapped to illegal pblock %llu " @@ -4724,22 +4725,43 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) inode_set_iversion_queried(inode, val); } -static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) - +static int check_igot_inode(struct inode *inode, ext4_iget_flags flags, + const char *function, unsigned int line) { + const char *err_str; + if (flags & EXT4_IGET_EA_INODE) { - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return "missing EA_INODE flag"; + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + err_str = "missing EA_INODE flag"; + goto error; + } if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) || - EXT4_I(inode)->i_file_acl) - return "ea_inode with extended attributes"; + EXT4_I(inode)->i_file_acl) { + err_str = "ea_inode with extended attributes"; + goto error; + } } else { - if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) - return "unexpected EA_INODE flag"; + if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + /* + * open_by_handle_at() could provide an old inode number + * that has since been reused for an ea_inode; this does + * not indicate filesystem corruption + */ + if (flags & EXT4_IGET_HANDLE) + return -ESTALE; + err_str = "unexpected EA_INODE flag"; + goto error; + } + } + if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) { + err_str = "unexpected bad inode w/o EXT4_IGET_BAD"; + goto error; } - if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) - return "unexpected bad inode w/o EXT4_IGET_BAD"; - return NULL; + return 0; + +error: + ext4_error_inode(inode, function, line, 0, err_str); + return -EFSCORRUPTED; } struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, @@ -4751,7 +4773,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, struct ext4_inode_info *ei; struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct inode *inode; - const char *err_str; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; loff_t size; @@ -4780,10 +4801,10 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { - if ((err_str = check_igot_inode(inode, flags)) != NULL) { - ext4_error_inode(inode, function, line, 0, err_str); + ret = check_igot_inode(inode, flags, function, line); + if (ret) { iput(inode); - return ERR_PTR(-EFSCORRUPTED); + return ERR_PTR(ret); } return inode; } @@ -5065,13 +5086,21 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ret = -EFSCORRUPTED; goto bad_inode; } - if ((err_str = check_igot_inode(inode, flags)) != NULL) { - ext4_error_inode(inode, function, line, 0, err_str); - ret = -EFSCORRUPTED; - goto bad_inode; + ret = check_igot_inode(inode, flags, function, line); + /* + * -ESTALE here means there is nothing inherently wrong with the inode, + * it's just not an inode we can return for an fhandle lookup. + */ + if (ret == -ESTALE) { + brelse(iloc.bh); + unlock_new_inode(inode); + iput(inode); + return ERR_PTR(-ESTALE); } - + if (ret) + goto bad_inode; brelse(iloc.bh); + unlock_new_inode(inode); return inode; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0d523e9fb3d529..1e98c5be4e0ad5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3037,10 +3037,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) unsigned char blocksize_bits = min_t(unsigned char, sb->s_blocksize_bits, EXT4_MAX_BLOCK_LOG_SIZE); - struct sg { - struct ext4_group_info info; - ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; - } sg; + DEFINE_RAW_FLEX(struct ext4_group_info, sg, bb_counters, + EXT4_MAX_BLOCK_LOG_SIZE + 2); group--; if (group == 0) @@ -3048,7 +3046,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); - i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + + i = (blocksize_bits + 2) * sizeof(sg->bb_counters[0]) + sizeof(struct ext4_group_info); grinfo = ext4_get_group_info(sb, group); @@ -3068,14 +3066,14 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) * We care only about free space counters in the group info and * these are safe to access even after the buddy has been unloaded */ - memcpy(&sg, grinfo, i); - seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, - sg.info.bb_fragments, sg.info.bb_first_free); + memcpy(sg, grinfo, i); + seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg->bb_free, + sg->bb_fragments, sg->bb_first_free); for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? - sg.info.bb_counters[i] : 0); + sg->bb_counters[i] : 0); seq_puts(seq, " ]"); - if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) + if (EXT4_MB_GRP_BBITMAP_CORRUPT(sg)) seq_puts(seq, " Block bitmap corrupted!"); seq_putc(seq, '\n'); return 0; @@ -6644,7 +6642,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, for (i = 0; i < count; i++) { cond_resched(); if (is_metadata) - bh = sb_find_get_block(inode->i_sb, block + i); + bh = sb_find_get_block_nonatomic(inode->i_sb, + block + i); ext4_forget(handle, is_metadata, inode, bh, block + i); } } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cb5cb33b1d9155..e9712e64ec8f04 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1971,7 +1971,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, * split it in half by count; each resulting block will have at least * half the space free. */ - if (i > 0) + if (i >= 0) split = count - move; else split = count/2; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 54f89f0ee69b18..b0b8748ae287f4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -53,8 +53,8 @@ bool f2fs_is_cp_guaranteed(struct page *page) struct inode *inode; struct f2fs_sb_info *sbi; - if (!mapping) - return false; + if (fscrypt_is_bounce_page(page)) + return page_private_gcing(fscrypt_pagecache_page(page)); inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -3966,7 +3966,7 @@ static int check_swap_activate(struct swap_info_struct *sis, if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || nr_pblocks % blks_per_sec || - !f2fs_valid_pinned_area(sbi, pblock)) { + f2fs_is_sequential_zone_area(sbi, pblock)) { bool last_extent = false; not_aligned++; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f1576dc6ec6797..4f34a7d9760a10 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1780,7 +1780,7 @@ struct f2fs_sb_info { unsigned int dirty_device; /* for checkpoint data flush */ spinlock_t dev_lock; /* protect dirty_device */ bool aligned_blksize; /* all devices has the same logical blksize */ - unsigned int first_zoned_segno; /* first zoned segno */ + unsigned int first_seq_zone_segno; /* first segno in sequential zone */ /* For write statistics */ u64 sectors_written_start; @@ -2518,8 +2518,14 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - sbi->total_valid_block_count -= (block_t)count; + if (unlikely(sbi->total_valid_block_count < count)) { + f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u", + sbi->total_valid_block_count, inode->i_ino, count); + sbi->total_valid_block_count = 0; + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count -= count; + } if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks = min(sbi->reserved_blocks, @@ -4622,12 +4628,16 @@ F2FS_FEATURE_FUNCS(readonly, RO); F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); #ifdef CONFIG_BLK_DEV_ZONED -static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, - block_t blkaddr) +static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi, + unsigned int zone) { - unsigned int zno = blkaddr / sbi->blocks_per_blkz; + return test_bit(zone, FDEV(devi).blkz_seq); +} - return test_bit(zno, FDEV(devi).blkz_seq); +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) +{ + return f2fs_zone_is_seq(sbi, devi, blkaddr / sbi->blocks_per_blkz); } #endif @@ -4699,15 +4709,31 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } -static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, +static inline bool f2fs_is_sequential_zone_area(struct f2fs_sb_info *sbi, block_t blkaddr) { if (f2fs_sb_has_blkzoned(sbi)) { +#ifdef CONFIG_BLK_DEV_ZONED int devi = f2fs_target_device_index(sbi, blkaddr); - return !bdev_is_zoned(FDEV(devi).bdev); + if (!bdev_is_zoned(FDEV(devi).bdev)) + return false; + + if (f2fs_is_multi_device(sbi)) { + if (blkaddr < FDEV(devi).start_blk || + blkaddr > FDEV(devi).end_blk) { + f2fs_err(sbi, "Invalid block %x", blkaddr); + return false; + } + blkaddr -= FDEV(devi).start_blk; + } + + return f2fs_blkz_is_seq(sbi, devi, blkaddr); +#else + return false; +#endif } - return true; + return false; } static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2b8f9239bede7c..8b5a55b72264dd 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2066,6 +2066,9 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + continue; + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); put_gc_inode(&gc_list); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 8f8b9b843bdf4b..28137d499f8f65 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -414,7 +414,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(dir); @@ -914,7 +914,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(old_inode)->i_projid))) return -EXDEV; /* @@ -1107,10 +1107,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid)) || - (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + F2FS_I(old_inode)->i_projid)) || + (is_inode_flag_set(old_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(old_dir)->i_projid, - F2FS_I(new_dentry->d_inode)->i_projid))) + F2FS_I(new_inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(old_dir); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 396ef71f41e359..41ca73622c8d46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -424,7 +424,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi, false); - if (!f2fs_is_checkpoint_ready(sbi)) + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return; /* @@ -2777,7 +2777,7 @@ static int get_new_segment(struct f2fs_sb_info *sbi, if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) segno = 0; else - segno = max(sbi->first_zoned_segno, *newseg); + segno = max(sbi->first_seq_zone_segno, *newseg); hint = GET_SEC_FROM_SEG(sbi, segno); } #endif @@ -2789,7 +2789,7 @@ static int get_new_segment(struct f2fs_sb_info *sbi, if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { /* Write only to sequential zones */ if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { - hint = GET_SEC_FROM_SEG(sbi, sbi->first_zoned_segno); + hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno); secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); } else secno = find_first_zero_bit(free_i->free_secmap, @@ -2838,9 +2838,9 @@ static int get_new_segment(struct f2fs_sb_info *sbi, /* set it as dirty segment in free segmap */ f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); - /* no free section in conventional zone */ + /* no free section in conventional device or conventional zone */ if (new_sec && pinning && - !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { + f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) { ret = -EAGAIN; goto out_unlock; } @@ -3311,7 +3311,7 @@ int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) { f2fs_down_write(&sbi->gc_lock); - err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), + err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1, true, ZONED_PIN_SEC_REQUIRED_COUNT); f2fs_up_write(&sbi->gc_lock); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 0465dc00b349d2..503f6df690bf2b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -429,7 +429,6 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); @@ -437,7 +436,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) next = find_next_bit(free_i->free_segmap, start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { + if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } @@ -463,22 +462,36 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); + bool ret; spin_lock(&free_i->segmap_lock); - if (test_and_clear_bit(segno, free_i->free_segmap)) { - free_i->free_segments++; - - if (!inmem && IS_CURSEC(sbi, secno)) - goto skip_free; - next = find_next_bit(free_i->free_segmap, - start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { - if (test_and_clear_bit(secno, free_i->free_secmap)) - free_i->free_sections++; - } - } -skip_free: + ret = test_and_clear_bit(segno, free_i->free_segmap); + if (!ret) + goto unlock_out; + + free_i->free_segments++; + + if (!inmem && IS_CURSEC(sbi, secno)) + goto unlock_out; + + /* check large section */ + next = find_next_bit(free_i->free_segmap, + start_segno + SEGS_PER_SEC(sbi), start_segno); + if (next < start_segno + f2fs_usable_segs_in_sec(sbi)) + goto unlock_out; + + ret = test_and_clear_bit(secno, free_i->free_secmap); + if (!ret) + goto unlock_out; + + free_i->free_sections++; + + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno) + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno) + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + +unlock_out: spin_unlock(&free_i->segmap_lock); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f087b2b71c8987..386326f7a440eb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1882,9 +1882,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_fsid = u64_to_fsid(id); #ifdef CONFIG_QUOTA - if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) && + if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) && sb_has_quota_limits_enabled(sb, PRJQUOTA)) { - f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf); + f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf); } #endif return 0; @@ -4304,14 +4304,35 @@ static void f2fs_record_error_work(struct work_struct *work) f2fs_record_stop_reason(sbi); } -static inline unsigned int get_first_zoned_segno(struct f2fs_sb_info *sbi) +static inline unsigned int get_first_seq_zone_segno(struct f2fs_sb_info *sbi) { +#ifdef CONFIG_BLK_DEV_ZONED + unsigned int zoneno, total_zones; int devi; - for (devi = 0; devi < sbi->s_ndevs; devi++) - if (bdev_is_zoned(FDEV(devi).bdev)) - return GET_SEGNO(sbi, FDEV(devi).start_blk); - return 0; + if (!f2fs_sb_has_blkzoned(sbi)) + return NULL_SEGNO; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (!bdev_is_zoned(FDEV(devi).bdev)) + continue; + + total_zones = GET_ZONE_FROM_SEG(sbi, FDEV(devi).total_segments); + + for (zoneno = 0; zoneno < total_zones; zoneno++) { + unsigned int segs, blks; + + if (!f2fs_zone_is_seq(sbi, devi, zoneno)) + continue; + + segs = GET_SEG_FROM_SEC(sbi, + zoneno * sbi->secs_per_zone); + blks = SEGS_TO_BLKS(sbi, segs); + return GET_SEGNO(sbi, FDEV(devi).start_blk + blks); + } + } +#endif + return NULL_SEGNO; } static int f2fs_scan_devices(struct f2fs_sb_info *sbi) @@ -4348,6 +4369,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #endif for (i = 0; i < max_devices; i++) { + if (max_devices == 1) { + FDEV(i).total_segments = + le32_to_cpu(raw_super->segment_count_main); + FDEV(i).start_blk = 0; + FDEV(i).end_blk = FDEV(i).total_segments * + BLKS_PER_SEG(sbi); + } + if (i == 0) FDEV(0).bdev_file = sbi->sb->s_bdev_file; else if (!RDEV(i).path[0]) @@ -4718,7 +4747,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->sectors_written_start = f2fs_get_sectors_written(sbi); /* get segno of first zoned block device */ - sbi->first_zoned_segno = get_first_zoned_segno(sbi); + sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); diff --git a/fs/file.c b/fs/file.c index dc3f7e120e3e5d..3a3146664cf371 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,7 +26,7 @@ #include "internal.h" -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) +static noinline bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) { /* * If the reference count was already in the dead zone, then this diff --git a/fs/filesystems.c b/fs/filesystems.c index 58b9067b2391ce..95e5256821a534 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -156,15 +156,19 @@ static int fs_index(const char __user * __name) static int fs_name(unsigned int index, char __user * buf) { struct file_system_type * tmp; - int len, res; + int len, res = -EINVAL; read_lock(&file_systems_lock); - for (tmp = file_systems; tmp; tmp = tmp->next, index--) - if (index <= 0 && try_module_get(tmp->owner)) + for (tmp = file_systems; tmp; tmp = tmp->next, index--) { + if (index == 0) { + if (try_module_get(tmp->owner)) + res = 0; break; + } + } read_unlock(&file_systems_lock); - if (!tmp) - return -EINVAL; + if (res) + return res; /* OK, we got the reference, so we can safely block */ len = strlen(tmp->name) + 1; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 2c7b24cb67adb2..53c2626e90e723 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1669,6 +1669,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc) unsigned int virtqueue_size; int err = -EIO; + if (!fsc->source) + return invalf(fsc, "No source specified"); + /* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object. diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index be7f87a8e11ae1..7bd231d16d4a07 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -4,7 +4,6 @@ config GFS2_FS select BUFFER_HEAD select FS_POSIX_ACL select CRC32 - select LIBCRC32C select QUOTACTL select FS_IOMAP help diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 68fc8af14700d3..eb4270e82ef8ee 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -37,27 +37,6 @@ #include "aops.h" -void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio, - size_t from, size_t len) -{ - struct buffer_head *head = folio_buffers(folio); - unsigned int bsize = head->b_size; - struct buffer_head *bh; - size_t to = from + len; - size_t start, end; - - for (bh = head, start = 0; bh != head || !start; - bh = bh->b_this_page, start = end) { - end = start + bsize; - if (end <= from) - continue; - if (start >= to) - break; - set_buffer_uptodate(bh); - gfs2_trans_add_data(ip->i_gl, bh); - } -} - /** * gfs2_get_block_noalloc - Fills in a buffer head with details about a block * @inode: The inode @@ -133,11 +112,42 @@ static int __gfs2_jdata_write_folio(struct folio *folio, inode->i_sb->s_blocksize, BIT(BH_Dirty)|BIT(BH_Uptodate)); } - gfs2_trans_add_databufs(ip, folio, 0, folio_size(folio)); + gfs2_trans_add_databufs(ip->i_gl, folio, 0, folio_size(folio)); } return gfs2_write_jdata_folio(folio, wbc); } +/** + * gfs2_jdata_writeback - Write jdata folios to the log + * @mapping: The mapping to write + * @wbc: The writeback control + * + * Returns: errno + */ +int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); + struct folio *folio = NULL; + int error; + + BUG_ON(current->journal_info); + if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE)) + return 0; + + while ((folio = writeback_iter(mapping, wbc, folio, &error))) { + if (folio_test_checked(folio)) { + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); + continue; + } + error = __gfs2_jdata_write_folio(folio, wbc); + } + + return error; +} + /** * gfs2_writepages - Write a bunch of dirty pages back to disk * @mapping: The mapping to write diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h index a10c4334d24893..bf002522a78220 100644 --- a/fs/gfs2/aops.h +++ b/fs/gfs2/aops.h @@ -9,7 +9,6 @@ #include "incore.h" void adjust_fs_space(struct inode *inode); -void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio, - size_t from, size_t len); +int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc); #endif /* __AOPS_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 366516b98b3f31..b81984def58ec3 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -988,7 +988,8 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos, struct gfs2_sbd *sdp = GFS2_SB(inode); if (!gfs2_is_stuffed(ip)) - gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos), + gfs2_trans_add_databufs(ip->i_gl, folio, + offset_in_folio(folio, pos), copied); folio_unlock(folio); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d7220a6fe8f55e..ba25b884169e50 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1166,7 +1166,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp) { - struct super_block *s = sdp->sd_vfs; struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type, .ln_sbd = sdp }; @@ -1229,7 +1228,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping = gfs2_glock2aspace(gl); if (mapping) { mapping->a_ops = &gfs2_meta_aops; - mapping->host = s->s_bdev->bd_mapping->host; + mapping->host = sdp->sd_inode; mapping->flags = 0; mapping_set_gfp_mask(mapping, GFP_NOFS); mapping->i_private_data = NULL; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index eb4714f299efb6..116efe335c3212 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -168,7 +168,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static int gfs2_rgrp_metasync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct address_space *metamapping = &sdp->sd_aspace; + struct address_space *metamapping = gfs2_aspace(sdp); struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK; @@ -225,7 +225,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct address_space *mapping = &sdp->sd_aspace; + struct address_space *mapping = gfs2_aspace(sdp); struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl); const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t start, end; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 74abbd4970f80b..0a41c4e76b3267 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -795,7 +795,7 @@ struct gfs2_sbd { /* Log stuff */ - struct address_space sd_aspace; + struct inode *sd_inode; spinlock_t sd_log_lock; @@ -851,6 +851,13 @@ struct gfs2_sbd { unsigned long sd_glock_dqs_held; }; +#define GFS2_BAD_INO 1 + +static inline struct address_space *gfs2_aspace(struct gfs2_sbd *sdp) +{ + return sdp->sd_inode->i_mapping; +} + static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which) { gl->gl_stats.stats[which]++; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 198a8cbaf5e5ad..8fd81444ffea00 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -439,6 +439,74 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) return error; } +static void gfs2_final_release_pages(struct gfs2_inode *ip) +{ + struct inode *inode = &ip->i_inode; + struct gfs2_glock *gl = ip->i_gl; + + if (unlikely(!gl)) { + /* This can only happen during incomplete inode creation. */ + BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); + return; + } + + truncate_inode_pages(gfs2_glock2aspace(gl), 0); + truncate_inode_pages(&inode->i_data, 0); + + if (atomic_read(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + clear_bit(GLF_DIRTY, &gl->gl_flags); + } +} + +int gfs2_dinode_dealloc(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_rgrpd *rgd; + struct gfs2_holder gh; + int error; + + if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { + gfs2_consist_inode(ip); + return -EIO; + } + + gfs2_rindex_update(sdp); + + error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); + if (error) + return error; + + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); + if (!rgd) { + gfs2_consist_inode(ip); + error = -EIO; + goto out_qs; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); + if (error) + goto out_qs; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, + sdp->sd_jdesc->jd_blocks); + if (error) + goto out_rg_gunlock; + + gfs2_free_di(rgd, ip); + + gfs2_final_release_pages(ip); + + gfs2_trans_end(sdp); + +out_rg_gunlock: + gfs2_glock_dq_uninit(&gh); +out_qs: + gfs2_quota_unhold(ip); + return error; +} + static void gfs2_init_dir(struct buffer_head *dibh, const struct gfs2_inode *parent) { @@ -629,10 +697,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error; + int error, dealloc_error; u32 aflags = 0; unsigned blocks = 1; struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, }; + bool xattr_initialized = false; if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; @@ -659,7 +728,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!IS_ERR(inode)) { if (S_ISDIR(inode->i_mode)) { iput(inode); - inode = ERR_PTR(-EISDIR); + inode = NULL; + error = -EISDIR; goto fail_gunlock; } d_instantiate(dentry, inode); @@ -744,11 +814,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (error) - goto fail_free_inode; + goto fail_dealloc_inode; error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) - goto fail_free_inode; + goto fail_dealloc_inode; gfs2_cancel_delete_work(io_gl); io_gl->gl_no_formal_ino = ip->i_no_formal_ino; @@ -772,8 +842,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock3; - if (blocks > 1) + if (blocks > 1) { gfs2_init_xattr(ip); + xattr_initialized = true; + } init_dinode(dip, ip, symname); gfs2_trans_end(sdp); @@ -828,6 +900,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: gfs2_glock_put(io_gl); +fail_dealloc_inode: + set_bit(GIF_ALLOC_FAILED, &ip->i_flags); + dealloc_error = 0; + if (ip->i_eattr) + dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized); + clear_nlink(inode); + mark_inode_dirty(inode); + if (!dealloc_error) + dealloc_error = gfs2_dinode_dealloc(ip); + if (dealloc_error) + fs_warn(sdp, "%s: %d\n", __func__, dealloc_error); + ip->i_no_addr = 0; fail_free_inode: if (ip->i_gl) { gfs2_glock_put(ip->i_gl); @@ -842,10 +926,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(&d_gh); if (!IS_ERR_OR_NULL(inode)) { - set_bit(GIF_ALLOC_FAILED, &ip->i_flags); - clear_nlink(inode); - if (ip->i_no_addr) - mark_inode_dirty(inode); if (inode->i_state & I_NEW) iget_failed(inode); else diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 9e5e1622d50a60..eafe123617e698 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -92,6 +92,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 no_formal_ino, unsigned int blktype); +int gfs2_dinode_dealloc(struct gfs2_inode *ip); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f9c5089783d24c..115c4ac457e90a 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -31,6 +31,7 @@ #include "dir.h" #include "trace_gfs2.h" #include "trans.h" +#include "aops.h" static void gfs2_log_shutdown(struct gfs2_sbd *sdp); @@ -131,7 +132,11 @@ __acquires(&sdp->sd_ail_lock) if (!mapping) continue; spin_unlock(&sdp->sd_ail_lock); - ret = mapping->a_ops->writepages(mapping, wbc); + BUG_ON(GFS2_SB(mapping->host) != sdp); + if (gfs2_is_jdata(GFS2_I(mapping->host))) + ret = gfs2_jdata_writeback(mapping, wbc); + else + ret = mapping->a_ops->writepages(mapping, wbc); if (need_resched()) { blk_finish_plug(plug); cond_resched(); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 198cc705663755..9dc8885c95d072 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -132,7 +132,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) unsigned int bufnum; if (mapping == NULL) - mapping = &sdp->sd_aspace; + mapping = gfs2_aspace(sdp); shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift; index = blkno >> shift; /* convert block to page */ diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 831d988c2ceb74..b7c8a6684d0249 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -44,9 +44,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) struct gfs2_glock_aspace *gla = container_of(mapping, struct gfs2_glock_aspace, mapping); return gla->glock.gl_name.ln_sbd; - } else if (mapping->a_ops == &gfs2_rgrp_aops) - return container_of(mapping, struct gfs2_sbd, sd_aspace); - else + } else return inode->i_sb->s_fs_info; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e83d293c361423..4a0f7de41b2b2f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -64,15 +64,17 @@ static void gfs2_tune_init(struct gfs2_tune *gt) void free_sbd(struct gfs2_sbd *sdp) { + struct super_block *sb = sdp->sd_vfs; + if (sdp->sd_lkstats) free_percpu(sdp->sd_lkstats); + sb->s_fs_info = NULL; kfree(sdp); } static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; - struct address_space *mapping; sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); if (!sdp) @@ -109,16 +111,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_sc_inodes_list); - mapping = &sdp->sd_aspace; - - address_space_init_once(mapping); - mapping->a_ops = &gfs2_rgrp_aops; - mapping->host = sb->s_bdev->bd_mapping->host; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_NOFS); - mapping->i_private_data = NULL; - mapping->writeback_index = 0; - spin_lock_init(&sdp->sd_log_lock); atomic_set(&sdp->sd_log_pinned, 0); INIT_LIST_HEAD(&sdp->sd_log_revokes); @@ -1135,6 +1127,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) int silent = fc->sb_flags & SB_SILENT; struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; + struct address_space *mapping; int error; sdp = init_sbd(sb); @@ -1156,6 +1149,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_flags |= SB_NOSEC; sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; + sb->s_d_op = &gfs2_dops; sb->s_export_op = &gfs2_export_ops; sb->s_qcop = &gfs2_quotactl_ops; @@ -1181,9 +1175,21 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) sdp->sd_tune.gt_statfs_quantum = 30; } + /* Set up an address space for metadata writes */ + sdp->sd_inode = new_inode(sb); + error = -ENOMEM; + if (!sdp->sd_inode) + goto fail_free; + sdp->sd_inode->i_ino = GFS2_BAD_INO; + sdp->sd_inode->i_size = OFFSET_MAX; + + mapping = gfs2_aspace(sdp); + mapping->a_ops = &gfs2_rgrp_aops; + mapping_set_gfp_mask(mapping, GFP_NOFS); + error = init_names(sdp, silent); if (error) - goto fail_free; + goto fail_iput; snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name); @@ -1192,7 +1198,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0, sdp->sd_fsname); if (!sdp->sd_glock_wq) - goto fail_free; + goto fail_iput; sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname); @@ -1309,9 +1315,10 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) fail_glock_wq: if (sdp->sd_glock_wq) destroy_workqueue(sdp->sd_glock_wq); +fail_iput: + iput(sdp->sd_inode); fail_free: free_sbd(sdp); - sb->s_fs_info = NULL; return error; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 44e5658b896c88..0bd7827e6371e2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -648,7 +648,7 @@ static void gfs2_put_super(struct super_block *sb) gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); - truncate_inode_pages_final(&sdp->sd_aspace); + iput(sdp->sd_inode); gfs2_delete_debugfs_file(sdp); gfs2_sys_fs_del(sdp); @@ -674,7 +674,7 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) return sdp->sd_log_error; } -static int gfs2_do_thaw(struct gfs2_sbd *sdp) +static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who) { struct super_block *sb = sdp->sd_vfs; int error; @@ -682,7 +682,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp) error = gfs2_freeze_lock_shared(sdp); if (error) goto fail; - error = thaw_super(sb, FREEZE_HOLDER_USERSPACE); + error = thaw_super(sb, who); if (!error) return 0; @@ -710,7 +710,7 @@ void gfs2_freeze_func(struct work_struct *work) gfs2_freeze_unlock(sdp); set_bit(SDF_FROZEN, &sdp->sd_flags); - error = gfs2_do_thaw(sdp); + error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE); if (error) goto out; @@ -728,6 +728,7 @@ void gfs2_freeze_func(struct work_struct *work) /** * gfs2_freeze_super - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem + * @who: freeze flags * */ @@ -744,7 +745,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) } for (;;) { - error = freeze_super(sb, FREEZE_HOLDER_USERSPACE); + error = freeze_super(sb, who); if (error) { fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); @@ -758,7 +759,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who) break; } - error = gfs2_do_thaw(sdp); + error = gfs2_do_thaw(sdp, who); if (error) goto out; @@ -796,6 +797,7 @@ static int gfs2_freeze_fs(struct super_block *sb) /** * gfs2_thaw_super - reallow writes to the filesystem * @sb: the VFS structure for the filesystem + * @who: freeze flags * */ @@ -814,7 +816,7 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who) atomic_inc(&sb->s_active); gfs2_freeze_unlock(sdp); - error = gfs2_do_thaw(sdp); + error = gfs2_do_thaw(sdp, who); if (!error) { clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); @@ -1173,74 +1175,6 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) return 0; } -static void gfs2_final_release_pages(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - struct gfs2_glock *gl = ip->i_gl; - - if (unlikely(!gl)) { - /* This can only happen during incomplete inode creation. */ - BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)); - return; - } - - truncate_inode_pages(gfs2_glock2aspace(gl), 0); - truncate_inode_pages(&inode->i_data, 0); - - if (atomic_read(&gl->gl_revokes) == 0) { - clear_bit(GLF_LFLUSH, &gl->gl_flags); - clear_bit(GLF_DIRTY, &gl->gl_flags); - } -} - -static int gfs2_dinode_dealloc(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd; - struct gfs2_holder gh; - int error; - - if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { - gfs2_consist_inode(ip); - return -EIO; - } - - gfs2_rindex_update(sdp); - - error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); - if (error) - return error; - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); - if (!rgd) { - gfs2_consist_inode(ip); - error = -EIO; - goto out_qs; - } - - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - LM_FLAG_NODE_SCOPE, &gh); - if (error) - goto out_qs; - - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, - sdp->sd_jdesc->jd_blocks); - if (error) - goto out_rg_gunlock; - - gfs2_free_di(rgd, ip); - - gfs2_final_release_pages(ip); - - gfs2_trans_end(sdp); - -out_rg_gunlock: - gfs2_glock_dq_uninit(&gh); -out_qs: - gfs2_quota_unhold(ip); - return error; -} - /** * gfs2_glock_put_eventually * @gl: The glock to put @@ -1326,9 +1260,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode, struct gfs2_sbd *sdp = sb->s_fs_info; int ret; - if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) - goto should_delete; - if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags)) return EVICT_SHOULD_DEFER_DELETE; @@ -1358,7 +1289,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode, if (inode->i_nlink) return EVICT_SHOULD_SKIP_DELETE; -should_delete: if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) return gfs2_upgrade_iopen_glock(inode); @@ -1382,7 +1312,7 @@ static int evict_unlinked_inode(struct inode *inode) } if (ip->i_eattr) { - ret = gfs2_ea_dealloc(ip); + ret = gfs2_ea_dealloc(ip, true); if (ret) goto out; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index ecc699f8d9fcaa..6286183021022a 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -764,7 +764,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) fs_err(sdp, "error %d adding sysfs files\n", error); kobject_put(&sdp->sd_kobj); wait_for_completion(&sdp->sd_kobj_unregister); - sb->s_fs_info = NULL; return error; } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index f8ae2c666fd609..075f7e9abe47ca 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -226,6 +226,27 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) unlock_buffer(bh); } +void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio, + size_t from, size_t len) +{ + struct buffer_head *head = folio_buffers(folio); + unsigned int bsize = head->b_size; + struct buffer_head *bh; + size_t to = from + len; + size_t start, end; + + for (bh = head, start = 0; bh != head || !start; + bh = bh->b_this_page, start = end) { + end = start + bsize; + if (end <= from) + continue; + if (start >= to) + break; + set_buffer_uptodate(bh); + gfs2_trans_add_data(gl, bh); + } +} + void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) { diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index f8ce5302280d31..790c55f59e6121 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -42,6 +42,8 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, void gfs2_trans_end(struct gfs2_sbd *sdp); void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio, + size_t from, size_t len); void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh); void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 17ae5070a90e67..df9c93de94c793 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1383,7 +1383,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) return error; } -static int ea_dealloc_block(struct gfs2_inode *ip) +static int ea_dealloc_block(struct gfs2_inode *ip, bool initialized) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *rgd; @@ -1416,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) ip->i_eattr = 0; gfs2_add_inode_blocks(&ip->i_inode, -1); - if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + if (initialized) { error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_meta(ip->i_gl, dibh); @@ -1435,11 +1435,12 @@ static int ea_dealloc_block(struct gfs2_inode *ip) /** * gfs2_ea_dealloc - deallocate the extended attribute fork * @ip: the inode + * @initialized: xattrs have been initialized * * Returns: errno */ -int gfs2_ea_dealloc(struct gfs2_inode *ip) +int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized) { int error; @@ -1451,7 +1452,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) if (error) return error; - if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) { + if (initialized) { error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) goto out_quota; @@ -1463,7 +1464,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) } } - error = ea_dealloc_block(ip); + error = ea_dealloc_block(ip, initialized); out_quota: gfs2_quota_unhold(ip); diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index eb12eb7e37c194..3c9788e0e13750 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -54,7 +54,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name, const void *value, size_t size, int flags, int type); ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size); -int gfs2_ea_dealloc(struct gfs2_inode *ip); +int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized); /* Exported to acl.c */ diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 6add6ebfef8967..cb823a8a6ba960 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 1; + if (key_len > sizeof(hfs_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfs_btree_key)); + pr_err("hfs: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 87974d5e679156..079ea80534f7de 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 2; + if (key_len > sizeof(hfsplus_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfsplus_btree_key)); + pr_err("hfsplus: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 31553372b33ace..0ac474888a02e9 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -259,7 +259,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, } /* truncate len if we find any trailing uptodate block(s) */ - for ( ; i <= last; i++) { + while (++i <= last) { if (ifs_block_is_uptodate(ifs, i)) { plen -= (last - i + 1) * block_size; last = i - 1; @@ -1675,6 +1675,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) ioend_flags |= IOMAP_IOEND_SHARED; + if (folio_test_dropbehind(folio)) + ioend_flags |= IOMAP_IOEND_DONTCACHE; if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) ioend_flags |= IOMAP_IOEND_BOUNDARY; diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 35768a63fb1d23..421d247fae5230 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -180,7 +180,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb, return NULL; return isofs_export_iget(sb, - fh_len > 2 ? ifid->parent_block : 0, + fh_len > 3 ? ifid->parent_block : 0, ifid->parent_offset, fh_len > 4 ? ifid->parent_generation : 0); } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 0cf0fddbee81c8..1467f6790747d8 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -345,7 +345,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, bh = bh_in; if (!bh) { - bh = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh) BUFFER_TRACE(bh, "found on hash"); } @@ -355,7 +356,8 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr, /* If there is a different buffer_head lying around in * memory anywhere... */ - bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); + bh2 = __find_get_block_nonatomic(bdev, blocknr, + journal->j_blocksize); if (bh2) { /* ... and it has RevokeValid status... */ if (bh2 != bh && buffer_revokevalid(bh2)) @@ -464,7 +466,8 @@ void jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) * state machine will get very upset later on. */ if (need_cancel) { struct buffer_head *bh2; - bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); + bh2 = __find_get_block_nonatomic(bh->b_bdev, bh->b_blocknr, + bh->b_size); if (bh2) { if (bh2 != bh) clear_buffer_revoked(bh2); @@ -492,9 +495,9 @@ void jbd2_clear_buffer_revoked_flags(journal_t *journal) struct jbd2_revoke_record_s *record; struct buffer_head *bh; record = (struct jbd2_revoke_record_s *)list_entry; - bh = __find_get_block(journal->j_fs_dev, - record->blocknr, - journal->j_blocksize); + bh = __find_get_block_nonatomic(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); if (bh) { clear_buffer_revoked(bh); __brelse(bh); diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index fc70d72c3fe805..43487fa83eaea1 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1580,8 +1580,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn) * invoked before finishing the kernfs operation. Note that while this * function restores the active reference, it doesn't and can't actually * restore the active protection - @kn may already or be in the process of - * being removed. Once kernfs_break_active_protection() is invoked, that - * protection is irreversibly gone for the kernfs operation instance. + * being drained and removed. Once kernfs_break_active_protection() is + * invoked, that protection is irreversibly gone for the kernfs operation + * instance. * * While this function may be called at any point after * kernfs_break_active_protection() is invoked, its most useful location diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 66fe8fe41f0605..a6c692cac61659 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -778,8 +778,9 @@ bool kernfs_should_drain_open_files(struct kernfs_node *kn) /* * @kn being deactivated guarantees that @kn->attr.open can't change * beneath us making the lockless test below safe. + * Callers post kernfs_unbreak_active_protection may be counted in + * kn->active by now, do not WARN_ON because of them. */ - WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); rcu_read_lock(); on = rcu_dereference(kn->attr.open); diff --git a/fs/mount.h b/fs/mount.h index 7aecf2a6047232..ad7173037924a8 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -7,10 +7,6 @@ extern struct list_head notify_list; -typedef __u32 __bitwise mntns_flags_t; - -#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0)) - struct mnt_namespace { struct ns_common ns; struct mount * root; @@ -37,7 +33,6 @@ struct mnt_namespace { struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */ refcount_t passive; /* number references not pinning @mounts */ - mntns_flags_t mntns_flags; } __randomize_layout; struct mnt_pcp { diff --git a/fs/namei.c b/fs/namei.c index 360a86ca1f0270..84a0e0b0111c78 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -125,9 +125,9 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) -static inline void initname(struct filename *name) +static inline void initname(struct filename *name, const char __user *uptr) { - name->uptr = NULL; + name->uptr = uptr; name->aname = NULL; atomic_set(&name->refcnt, 1); } @@ -210,7 +210,7 @@ getname_flags(const char __user *filename, int flags) return ERR_PTR(-ENAMETOOLONG); } } - initname(result); + initname(result, filename); audit_getname(result); return result; } @@ -268,7 +268,7 @@ struct filename *getname_kernel(const char * filename) return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); - initname(result); + initname(result, NULL); audit_getname(result); return result; } @@ -1665,27 +1665,20 @@ static struct dentry *lookup_dcache(const struct qstr *name, return dentry; } -/* - * Parent directory has inode locked exclusive. This is one - * and only case when ->lookup() gets called on non in-lookup - * dentries - as the matter of fact, this only gets called - * when directory is guaranteed to have no in-lookup children - * at all. - * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. - * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. - */ -struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, - unsigned int flags) +static struct dentry *lookup_one_qstr_excl_raw(const struct qstr *name, + struct dentry *base, + unsigned int flags) { - struct dentry *dentry = lookup_dcache(name, base, flags); + struct dentry *dentry; struct dentry *old; - struct inode *dir = base->d_inode; + struct inode *dir; + dentry = lookup_dcache(name, base, flags); if (dentry) - goto found; + return dentry; /* Don't create child dentry for a dead directory. */ + dir = base->d_inode; if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); @@ -1698,7 +1691,24 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, dput(dentry); dentry = old; } -found: + return dentry; +} + +/* + * Parent directory has inode locked exclusive. This is one + * and only case when ->lookup() gets called on non in-lookup + * dentries - as the matter of fact, this only gets called + * when directory is guaranteed to have no in-lookup children + * at all. + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. + */ +struct dentry *lookup_one_qstr_excl(const struct qstr *name, + struct dentry *base, unsigned int flags) +{ + struct dentry *dentry; + + dentry = lookup_one_qstr_excl_raw(name, base, flags); if (IS_ERR(dentry)) return dentry; if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { @@ -2742,23 +2752,48 @@ static int filename_parentat(int dfd, struct filename *name, /* does lookup, returns the object with parent locked */ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path) { + struct path parent_path __free(path_put) = {}; struct dentry *d; struct qstr last; int type, error; - error = filename_parentat(dfd, name, 0, path, &last, &type); + error = filename_parentat(dfd, name, 0, &parent_path, &last, &type); if (error) return ERR_PTR(error); - if (unlikely(type != LAST_NORM)) { - path_put(path); + if (unlikely(type != LAST_NORM)) return ERR_PTR(-EINVAL); + inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl(&last, parent_path.dentry, 0); + if (IS_ERR(d)) { + inode_unlock(parent_path.dentry->d_inode); + return d; } - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, path->dentry, 0); + path->dentry = no_free_ptr(parent_path.dentry); + path->mnt = no_free_ptr(parent_path.mnt); + return d; +} + +struct dentry *kern_path_locked_negative(const char *name, struct path *path) +{ + struct path parent_path __free(path_put) = {}; + struct filename *filename __free(putname) = getname_kernel(name); + struct dentry *d; + struct qstr last; + int type, error; + + error = filename_parentat(AT_FDCWD, filename, 0, &parent_path, &last, &type); + if (error) + return ERR_PTR(error); + if (unlikely(type != LAST_NORM)) + return ERR_PTR(-EINVAL); + inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl_raw(&last, parent_path.dentry, 0); if (IS_ERR(d)) { - inode_unlock(path->dentry->d_inode); - path_put(path); + inode_unlock(parent_path.dentry->d_inode); + return d; } + path->dentry = no_free_ptr(parent_path.dentry); + path->mnt = no_free_ptr(parent_path.mnt); return d; } diff --git a/fs/namespace.c b/fs/namespace.c index 14935a0500a206..d6ac7e533b0212 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -787,15 +787,11 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); - smp_mb(); // see mntput_no_expire() + smp_mb(); // see mntput_no_expire() and do_umount() if (likely(!read_seqretry(&mount_lock, seq))) return 0; - if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { - mnt_add_count(mnt, -1); - return 1; - } lock_mount_hash(); - if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { + if (unlikely(bastard->mnt_flags & (MNT_SYNC_UMOUNT | MNT_DOOMED))) { mnt_add_count(mnt, -1); unlock_mount_hash(); return 1; @@ -1830,6 +1826,8 @@ static inline void namespace_lock(void) down_write(&namespace_sem); } +DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock()) + enum umount_tree_flags { UMOUNT_SYNC = 1, UMOUNT_PROPAGATE = 2, @@ -2046,6 +2044,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, UMOUNT_PROPAGATE); retval = 0; } else { + smp_mb(); // paired with __legitimize_mnt() shrink_submounts(mnt); retval = -EBUSY; if (!propagate_mount_busy(mnt, 2)) { @@ -2383,7 +2382,7 @@ void dissolve_on_fput(struct vfsmount *mnt) return; } - scoped_guard(rwsem_write, &namespace_sem) { + scoped_guard(namespace_lock, &namespace_sem) { ns = m->mnt_ns; if (!must_dissolve(ns)) return; @@ -2425,7 +2424,7 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } -bool has_locked_children(struct mount *mnt, struct dentry *dentry) +static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2439,6 +2438,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) return false; } +bool has_locked_children(struct mount *mnt, struct dentry *dentry) +{ + bool res; + + read_seqlock_excl(&mount_lock); + res = __has_locked_children(mnt, dentry); + read_sequnlock_excl(&mount_lock); + return res; +} + /* * Check that there aren't references to earlier/same mount namespaces in the * specified subtree. Such references can act as pins for mount namespaces @@ -2483,23 +2492,27 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_MNT_UNBINDABLE(old_mnt)) return ERR_PTR(-EINVAL); - if (mnt_has_parent(old_mnt)) { - if (!check_mnt(old_mnt)) - return ERR_PTR(-EINVAL); - } else { - if (!is_mounted(&old_mnt->mnt)) - return ERR_PTR(-EINVAL); - - /* Make sure this isn't something purely kernel internal. */ - if (!is_anon_ns(old_mnt->mnt_ns)) + /* + * Make sure the source mount is acceptable. + * Anything mounted in our mount namespace is allowed. + * Otherwise, it must be the root of an anonymous mount + * namespace, and we need to make sure no namespace + * loops get created. + */ + if (!check_mnt(old_mnt)) { + if (!is_mounted(&old_mnt->mnt) || + !is_anon_ns(old_mnt->mnt_ns) || + mnt_has_parent(old_mnt)) return ERR_PTR(-EINVAL); - /* Make sure we don't create mount namespace loops. */ if (!check_for_nsfs_mounts(old_mnt)) return ERR_PTR(-EINVAL); } - if (has_locked_children(old_mnt, path->dentry)) + if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + if (__has_locked_children(old_mnt, path->dentry)) return ERR_PTR(-EINVAL); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); @@ -2824,56 +2837,62 @@ static struct mountpoint *do_lock_mount(struct path *path, bool beneath) struct vfsmount *mnt = path->mnt; struct dentry *dentry; struct mountpoint *mp = ERR_PTR(-ENOENT); + struct path under = {}; for (;;) { - struct mount *m; + struct mount *m = real_mount(mnt); if (beneath) { - m = real_mount(mnt); + path_put(&under); read_seqlock_excl(&mount_lock); - dentry = dget(m->mnt_mountpoint); + under.mnt = mntget(&m->mnt_parent->mnt); + under.dentry = dget(m->mnt_mountpoint); read_sequnlock_excl(&mount_lock); + dentry = under.dentry; } else { dentry = path->dentry; } inode_lock(dentry->d_inode); - if (unlikely(cant_mount(dentry))) { - inode_unlock(dentry->d_inode); - goto out; - } - namespace_lock(); - if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) { + if (unlikely(cant_mount(dentry) || !is_mounted(mnt))) + break; // not to be mounted on + + if (beneath && unlikely(m->mnt_mountpoint != dentry || + &m->mnt_parent->mnt != under.mnt)) { namespace_unlock(); inode_unlock(dentry->d_inode); - goto out; + continue; // got moved } mnt = lookup_mnt(path); - if (likely(!mnt)) + if (unlikely(mnt)) { + namespace_unlock(); + inode_unlock(dentry->d_inode); + path_put(path); + path->mnt = mnt; + path->dentry = dget(mnt->mnt_root); + continue; // got overmounted + } + mp = get_mountpoint(dentry); + if (IS_ERR(mp)) break; - - namespace_unlock(); - inode_unlock(dentry->d_inode); - if (beneath) - dput(dentry); - path_put(path); - path->mnt = mnt; - path->dentry = dget(mnt->mnt_root); - } - - mp = get_mountpoint(dentry); - if (IS_ERR(mp)) { - namespace_unlock(); - inode_unlock(dentry->d_inode); + if (beneath) { + /* + * @under duplicates the references that will stay + * at least until namespace_unlock(), so the path_put() + * below is safe (and OK to do under namespace_lock - + * we are not dropping the final references here). + */ + path_put(&under); + } + return mp; } - -out: + namespace_unlock(); + inode_unlock(dentry->d_inode); if (beneath) - dput(dentry); - + path_put(&under); return mp; } @@ -2884,14 +2903,11 @@ static inline struct mountpoint *lock_mount(struct path *path) static void unlock_mount(struct mountpoint *where) { - struct dentry *dentry = where->m_dentry; - + inode_unlock(where->m_dentry->d_inode); read_seqlock_excl(&mount_lock); put_mountpoint(where); read_sequnlock_excl(&mount_lock); - namespace_unlock(); - inode_unlock(dentry->d_inode); } static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) @@ -2942,6 +2958,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); + if (!check_mnt(mnt)) { + err = -EINVAL; + goto out_unlock; + } if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -3033,7 +3053,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse) if (!may_copy_tree(old_path)) return mnt; - if (!recurse && has_locked_children(old, old_path->dentry)) + if (!recurse && __has_locked_children(old, old_path->dentry)) return mnt; if (recurse) @@ -3426,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) goto out; /* From mount should not have locked children in place of To's root */ - if (has_locked_children(from, to->mnt.mnt_root)) + if (__has_locked_children(from, to->mnt.mnt_root)) goto out; /* Setting sharing groups is only allowed on private mounts */ @@ -3440,7 +3460,7 @@ static int do_set_group(struct path *from_path, struct path *to_path) if (IS_MNT_SLAVE(from)) { struct mount *m = from->mnt_master; - list_add(&to->mnt_slave, &m->mnt_slave_list); + list_add(&to->mnt_slave, &from->mnt_slave); to->mnt_master = m; } @@ -3465,18 +3485,25 @@ static int do_set_group(struct path *from_path, struct path *to_path) * Check if path is overmounted, i.e., if there's a mount on top of * @path->mnt with @path->dentry as mountpoint. * - * Context: This function expects namespace_lock() to be held. + * Context: namespace_sem must be held at least shared. + * MUST NOT be called under lock_mount_hash() (there one should just + * call __lookup_mnt() and check if it returns NULL). * Return: If path is overmounted true is returned, false if not. */ static inline bool path_overmounted(const struct path *path) { + unsigned seq = read_seqbegin(&mount_lock); + bool no_child; + rcu_read_lock(); - if (unlikely(__lookup_mnt(path->mnt, path->dentry))) { - rcu_read_unlock(); - return true; - } + no_child = !__lookup_mnt(path->mnt, path->dentry); rcu_read_unlock(); - return false; + if (need_seqretry(&mount_lock, seq)) { + read_seqlock_excl(&mount_lock); + no_child = !__lookup_mnt(path->mnt, path->dentry); + read_sequnlock_excl(&mount_lock); + } + return unlikely(!no_child); } /** @@ -3555,7 +3582,8 @@ static int can_move_mount_beneath(const struct path *from, * @mnt_from itself. This defeats the whole purpose of mounting * @mnt_from beneath @mnt_to. */ - if (propagation_would_overmount(parent_mnt_to, mnt_from, mp)) + if (check_mnt(mnt_from) && + propagation_would_overmount(parent_mnt_to, mnt_from, mp)) return -EINVAL; return 0; @@ -3634,46 +3662,41 @@ static int do_move_mount(struct path *old_path, ns = old->mnt_ns; err = -EINVAL; - if (!may_use_mount(p)) - goto out; - /* The thing moved must be mounted... */ if (!is_mounted(&old->mnt)) goto out; - /* ... and either ours or the root of anon namespace */ - if (!(attached ? check_mnt(old) : is_anon_ns(ns))) - goto out; - - if (is_anon_ns(ns)) { + if (check_mnt(old)) { + /* if the source is in our namespace... */ + /* ... it should be detachable from parent */ + if (!mnt_has_parent(old) || IS_MNT_LOCKED(old)) + goto out; + /* ... and the target should be in our namespace */ + if (!check_mnt(p)) + goto out; + } else { /* - * Ending up with two files referring to the root of the - * same anonymous mount namespace would cause an error - * as this would mean trying to move the same mount - * twice into the mount tree which would be rejected - * later. But be explicit about it right here. + * otherwise the source must be the root of some anon namespace. + * AV: check for mount being root of an anon namespace is worth + * an inlined predicate... */ - if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns)) + if (!is_anon_ns(ns) || mnt_has_parent(old)) goto out; - /* - * If this is an anonymous mount tree ensure that mount - * propagation can detect mounts that were just - * propagated to the target mount tree so we don't - * propagate onto them. + * Bail out early if the target is within the same namespace - + * subsequent checks would've rejected that, but they lose + * some corner cases if we check it early. */ - ns->mntns_flags |= MNTNS_PROPAGATING; - } else if (is_anon_ns(p->mnt_ns)) { + if (ns == p->mnt_ns) + goto out; /* - * Don't allow moving an attached mount tree to an - * anonymous mount tree. + * Target should be either in our namespace or in an acceptable + * anon namespace, sensu check_anonymous_mnt(). */ - goto out; + if (!may_use_mount(p)) + goto out; } - if (old->mnt.mnt_flags & MNT_LOCKED) - goto out; - if (!path_mounted(old_path)) goto out; @@ -3713,9 +3736,6 @@ static int do_move_mount(struct path *old_path, if (err) goto out; - if (is_anon_ns(ns)) - ns->mntns_flags &= ~MNTNS_PROPAGATING; - /* if the mount is moved, it should no longer be expire * automatically */ list_del_init(&old->mnt_expire); @@ -5189,8 +5209,8 @@ static void finish_mount_kattr(struct mount_kattr *kattr) mnt_idmap_put(kattr->mnt_idmap); } -static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, - struct mount_kattr *kattr) +static int wants_mount_setattr(struct mount_attr __user *uattr, size_t usize, + struct mount_kattr *kattr) { int ret; struct mount_attr attr; @@ -5213,9 +5233,13 @@ static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, if (attr.attr_set == 0 && attr.attr_clr == 0 && attr.propagation == 0) - return 0; + return 0; /* Tell caller to not bother. */ + + ret = build_mount_kattr(&attr, usize, kattr); + if (ret < 0) + return ret; - return build_mount_kattr(&attr, usize, kattr); + return 1; } SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, @@ -5247,8 +5271,8 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - err = copy_mount_setattr(uattr, usize, &kattr); - if (err) + err = wants_mount_setattr(uattr, usize, &kattr); + if (err <= 0) return err; err = user_path_at(dfd, path, kattr.lookup_flags, &target); @@ -5282,15 +5306,17 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - ret = copy_mount_setattr(uattr, usize, &kattr); - if (ret) + ret = wants_mount_setattr(uattr, usize, &kattr); + if (ret < 0) return ret; - ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; + if (ret) { + ret = do_mount_setattr(&file->f_path, &kattr); + if (ret) + return ret; - finish_mount_kattr(&kattr); + finish_mount_kattr(&kattr); + } } fd = get_unused_fd_flags(flags & O_CLOEXEC); diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 0d1b6d35ff3b80..fd4619275801be 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -262,9 +262,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) if (ret < 0) { subreq->error = ret; /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, false, + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, false, + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } @@ -297,8 +297,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) subreq->error = ret; trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } size -= slice; @@ -312,7 +312,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) if (unlikely(size > 0)) { smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } /* Defer error return as we may need to wait for outstanding I/O. */ @@ -365,12 +365,10 @@ void netfs_readahead(struct readahead_control *ractl) goto cleanup_free; netfs_read_to_pagecache(rreq); - netfs_put_request(rreq, true, netfs_rreq_trace_put_return); - return; + return netfs_put_request(rreq, netfs_rreq_trace_put_return); cleanup_free: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); - return; + return netfs_put_request(rreq, netfs_rreq_trace_put_failed); } EXPORT_SYMBOL(netfs_readahead); @@ -470,11 +468,11 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) folio_mark_uptodate(folio); } folio_unlock(folio); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); alloc_error: folio_unlock(folio); return ret; @@ -530,11 +528,11 @@ int netfs_read_folio(struct file *file, struct folio *folio) netfs_read_to_pagecache(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); alloc_error: folio_unlock(folio); return ret; @@ -689,7 +687,7 @@ int netfs_write_begin(struct netfs_inode *ctx, ret = netfs_wait_for_read(rreq); if (ret < 0) goto error; - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); have_folio: ret = folio_wait_private_2_killable(folio); @@ -701,7 +699,7 @@ int netfs_write_begin(struct netfs_inode *ctx, return 0; error_put: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(rreq, netfs_rreq_trace_put_failed); error: if (folio) { folio_unlock(folio); @@ -752,11 +750,11 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, netfs_read_to_pagecache(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; error_put: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); error: _leave(" = %d", ret); return ret; diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index b4826360a41112..dbb544e183d13d 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -386,7 +386,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, wbc_detach_inode(&wbc); if (ret2 == -EIOCBQUEUED) return ret2; - if (ret == 0) + if (ret == 0 && ret2 < 0) ret = ret2; } diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index 5e3f0aeb51f31f..9902766195d7b2 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -85,7 +85,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) if (rreq->netfs_ops->prepare_read) { ret = rreq->netfs_ops->prepare_read(subreq); if (ret < 0) { - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } } @@ -103,7 +103,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) rreq->netfs_ops->issue_read(subreq); if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) - netfs_wait_for_pause(rreq); + netfs_wait_for_paused_read(rreq); if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) break; if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && @@ -115,7 +115,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) if (unlikely(size > 0)) { smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } return ret; @@ -144,7 +144,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) ret = netfs_dispatch_unbuffered_reads(rreq); if (!rreq->submitted) { - netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); + netfs_put_request(rreq, netfs_rreq_trace_put_no_submit); inode_dio_end(rreq->inode); ret = 0; goto out; @@ -188,7 +188,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, iocb->ki_pos, orig_count, - NETFS_DIO_READ); + iocb->ki_flags & IOCB_DIRECT ? + NETFS_DIO_READ : NETFS_UNBUFFERED_READ); if (IS_ERR(rreq)) return PTR_ERR(rreq); @@ -236,7 +237,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i } out: - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); if (ret > 0) orig_count -= ret; return ret; diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 42ce53cc216e9d..fa9a5bf3c6d512 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -87,6 +87,8 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); + if (async) + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); /* Copy the data into the bounce buffer and encrypt it. */ // TODO @@ -105,19 +107,15 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (!async) { trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); - ret = wreq->error; - if (ret == 0) { - ret = wreq->transferred; + ret = netfs_wait_for_write(wreq); + if (ret > 0) iocb->ki_pos += ret; - } } else { ret = -EIOCBQUEUED; } out: - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked); diff --git a/fs/netfs/fscache_cache.c b/fs/netfs/fscache_cache.c index 9397ed39b0b4ec..8f70f8da064b50 100644 --- a/fs/netfs/fscache_cache.c +++ b/fs/netfs/fscache_cache.c @@ -372,7 +372,7 @@ void fscache_withdraw_cache(struct fscache_cache *cache) EXPORT_SYMBOL(fscache_withdraw_cache); #ifdef CONFIG_PROC_FS -static const char fscache_cache_states[NR__FSCACHE_CACHE_STATE] = "-PAEW"; +static const char fscache_cache_states[NR__FSCACHE_CACHE_STATE] __nonstring = "-PAEW"; /* * Generate a list of caches in /proc/fs/fscache/caches diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index d4d4b3a8b10603..3d56fc73435ffb 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -29,7 +29,7 @@ static LIST_HEAD(fscache_cookie_lru); static DEFINE_SPINLOCK(fscache_cookie_lru_lock); DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out); static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker); -static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD"; +static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] __nonstring = "-LCAIFUWRD"; static unsigned int fscache_lru_cookie_timeout = 10 * HZ; void fscache_print_cookie(struct fscache_cookie *cookie, char prefix) diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index b1722a82c03d3d..e4308457633ca3 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -192,8 +192,7 @@ EXPORT_SYMBOL(__fscache_clear_page_bits); /* * Deal with the completion of writing the data to the cache. */ -static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, - bool was_async) +static void fscache_wreq_done(void *priv, ssize_t transferred_or_error) { struct fscache_write_request *wreq = priv; @@ -202,8 +201,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, wreq->set_bits); if (wreq->term_func) - wreq->term_func(wreq->term_func_priv, transferred_or_error, - was_async); + wreq->term_func(wreq->term_func_priv, transferred_or_error); fscache_end_operation(&wreq->cache_resources); kfree(wreq); } @@ -255,14 +253,14 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, return; abandon_end: - return fscache_wreq_done(wreq, ret, false); + return fscache_wreq_done(wreq, ret); abandon_free: kfree(wreq); abandon: if (using_pgpriv2) fscache_clear_page_bits(mapping, start, len, cond); if (term_func) - term_func(term_func_priv, ret, false); + term_func(term_func_priv, ret); } EXPORT_SYMBOL(__fscache_write_to_cache); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 1c4f953c3d683b..e2ee9183392b93 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -23,7 +23,7 @@ /* * buffered_read.c */ -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async); +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); @@ -62,6 +62,14 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq, enum netfs_folioq_trace trace); void netfs_reset_iter(struct netfs_io_subrequest *subreq); +void netfs_wake_collector(struct netfs_io_request *rreq); +void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq); +void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, + struct netfs_io_stream *stream); +ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); +ssize_t netfs_wait_for_write(struct netfs_io_request *rreq); +void netfs_wait_for_paused_read(struct netfs_io_request *rreq); +void netfs_wait_for_paused_write(struct netfs_io_request *rreq); /* * objects.c @@ -71,9 +79,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, loff_t start, size_t len, enum netfs_io_origin origin); void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); -void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async); -void netfs_put_request(struct netfs_io_request *rreq, bool was_async, - enum netfs_rreq_ref_trace what); +void netfs_clear_subrequests(struct netfs_io_request *rreq); +void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq); static inline void netfs_see_request(struct netfs_io_request *rreq, @@ -92,11 +99,9 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, /* * read_collect.c */ +bool netfs_read_collection(struct netfs_io_request *rreq); void netfs_read_collection_worker(struct work_struct *work); -void netfs_wake_read_collector(struct netfs_io_request *rreq); -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async); -ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); -void netfs_wait_for_pause(struct netfs_io_request *rreq); +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); /* * read_pgpriv2.c @@ -176,8 +181,8 @@ static inline void netfs_stat_d(atomic_t *stat) * write_collect.c */ int netfs_folio_written_back(struct folio *folio); +bool netfs_write_collection(struct netfs_io_request *wreq); void netfs_write_collection_worker(struct work_struct *work); -void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async); /* * write_issue.c @@ -198,8 +203,8 @@ struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache); -int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, - struct folio *writethrough_cache); +ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, + struct folio *writethrough_cache); int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); /* @@ -254,6 +259,21 @@ static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) netfs_group->free(netfs_group); } +/* + * Clear and wake up a NETFS_RREQ_* flag bit on a request. + */ +static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, + unsigned int rreq_flag, + enum netfs_rreq_trace trace) +{ + if (test_bit(rreq_flag, &rreq->flags)) { + trace_netfs_rreq(rreq, trace); + clear_bit_unlock(rreq_flag, &rreq->flags); + smp_mb__after_atomic(); /* Set flag before task state */ + wake_up(&rreq->waitq); + } +} + /* * fscache-cache.c */ diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 4e3e6204083140..3db401d269e7b3 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = { [NETFS_READ_GAPS] = "RG", [NETFS_READ_SINGLE] = "R1", [NETFS_READ_FOR_WRITE] = "RW", + [NETFS_UNBUFFERED_READ] = "UR", [NETFS_DIO_READ] = "DR", [NETFS_WRITEBACK] = "WB", [NETFS_WRITEBACK_SINGLE] = "W1", @@ -127,11 +128,13 @@ static int __init netfs_init(void) if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0) goto error_subreqpool; +#ifdef CONFIG_PROC_FS if (!proc_mkdir("fs/netfs", NULL)) goto error_proc; if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, &netfs_requests_seq_ops)) goto error_procfile; +#endif #ifdef CONFIG_FSCACHE_STATS if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, netfs_stats_show)) @@ -144,9 +147,11 @@ static int __init netfs_init(void) return 0; error_fscache: +#ifdef CONFIG_PROC_FS error_procfile: remove_proc_subtree("fs/netfs", NULL); error_proc: +#endif mempool_exit(&netfs_subrequest_pool); error_subreqpool: kmem_cache_destroy(netfs_subrequest_slab); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 7099aa07737ac0..43b67a28a8fa07 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -313,3 +313,222 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) return true; } EXPORT_SYMBOL(netfs_release_folio); + +/* + * Wake the collection work item. + */ +void netfs_wake_collector(struct netfs_io_request *rreq) +{ + if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && + !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { + queue_work(system_unbound_wq, &rreq->work); + } else { + trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); + wake_up(&rreq->waitq); + } +} + +/* + * Mark a subrequest as no longer being in progress and, if need be, wake the + * collector. + */ +void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *rreq = subreq->rreq; + struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr]; + + clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ + + /* If we are at the head of the queue, wake up the collector. */ + if (list_is_first(&subreq->rreq_link, &stream->subrequests) || + test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) + netfs_wake_collector(rreq); +} + +/* + * Wait for all outstanding I/O in a stream to quiesce. + */ +void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, + struct netfs_io_stream *stream) +{ + struct netfs_io_subrequest *subreq; + DEFINE_WAIT(myself); + + list_for_each_entry(subreq, &stream->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + continue; + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + for (;;) { + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + break; + + trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + } + + finish_wait(&rreq->waitq, &myself); +} + +/* + * Perform collection in app thread if not offloaded to workqueue. + */ +static int netfs_collect_in_app(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + bool need_collect = false, inactive = true; + + for (int i = 0; i < NR_IO_STREAMS; i++) { + struct netfs_io_subrequest *subreq; + struct netfs_io_stream *stream = &rreq->io_streams[i]; + + if (!stream->active) + continue; + inactive = false; + trace_netfs_collect_stream(rreq, stream); + subreq = list_first_entry_or_null(&stream->subrequests, + struct netfs_io_subrequest, + rreq_link); + if (subreq && + (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || + test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { + need_collect = true; + break; + } + } + + if (!need_collect && !inactive) + return 0; /* Sleep */ + + __set_current_state(TASK_RUNNING); + if (collector(rreq)) { + /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + return 1; /* Done */ + } + + if (inactive) { + WARN(true, "Failed to collect inactive req R=%08x\n", + rreq->debug_id); + cond_resched(); + } + return 2; /* Again */ +} + +/* + * Wait for a request to complete, successfully or otherwise. + */ +static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + DEFINE_WAIT(myself); + ssize_t ret; + + for (;;) { + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + switch (netfs_collect_in_app(rreq, collector)) { + case 0: + break; + case 1: + goto all_collected; + case 2: + continue; + } + } + + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + break; + + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + +all_collected: + finish_wait(&rreq->waitq, &myself); + + ret = rreq->error; + if (ret == 0) { + ret = rreq->transferred; + switch (rreq->origin) { + case NETFS_DIO_READ: + case NETFS_DIO_WRITE: + case NETFS_READ_SINGLE: + case NETFS_UNBUFFERED_READ: + case NETFS_UNBUFFERED_WRITE: + break; + default: + if (rreq->submitted < rreq->len) { + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); + ret = -EIO; + } + break; + } + } + + return ret; +} + +ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) +{ + return netfs_wait_for_request(rreq, netfs_read_collection); +} + +ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) +{ + return netfs_wait_for_request(rreq, netfs_write_collection); +} + +/* + * Wait for a paused operation to unpause or complete in some manner. + */ +static void netfs_wait_for_pause(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + DEFINE_WAIT(myself); + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); + + for (;;) { + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + switch (netfs_collect_in_app(rreq, collector)) { + case 0: + break; + case 1: + goto all_collected; + case 2: + continue; + } + } + + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || + !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + break; + + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + +all_collected: + finish_wait(&rreq->waitq, &myself); +} + +void netfs_wait_for_paused_read(struct netfs_io_request *rreq) +{ + return netfs_wait_for_pause(rreq, netfs_read_collection); +} + +void netfs_wait_for_paused_write(struct netfs_io_request *rreq) +{ + return netfs_wait_for_pause(rreq, netfs_write_collection); +} diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index dc6b41ef18b097..31fa0c81e2a43e 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -10,6 +10,8 @@ #include #include "internal.h" +static void netfs_free_request(struct work_struct *work); + /* * Allocate an I/O request and initialise it. */ @@ -34,6 +36,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } memset(rreq, 0, kmem_cache_size(cache)); + INIT_WORK(&rreq->cleanup_work, netfs_free_request); rreq->start = start; rreq->len = len; rreq->origin = origin; @@ -49,13 +52,14 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); init_waitqueue_head(&rreq->waitq); - refcount_set(&rreq->ref, 1); + refcount_set(&rreq->ref, 2); if (origin == NETFS_READAHEAD || origin == NETFS_READPAGE || origin == NETFS_READ_GAPS || origin == NETFS_READ_SINGLE || origin == NETFS_READ_FOR_WRITE || + origin == NETFS_UNBUFFERED_READ || origin == NETFS_DIO_READ) { INIT_WORK(&rreq->work, netfs_read_collection_worker); rreq->io_streams[0].avail = true; @@ -63,7 +67,9 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, INIT_WORK(&rreq->work, netfs_write_collection_worker); } + /* The IN_PROGRESS flag comes with a ref. */ __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { @@ -75,7 +81,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } atomic_inc(&ctx->io_count); - trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); + trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), netfs_rreq_trace_new); netfs_proc_add_rreq(rreq); netfs_stat(&netfs_n_rh_rreq); return rreq; @@ -89,7 +95,7 @@ void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace trace_netfs_rreq_ref(rreq->debug_id, r + 1, what); } -void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) +void netfs_clear_subrequests(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; @@ -101,8 +107,7 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) subreq = list_first_entry(&stream->subrequests, struct netfs_io_subrequest, rreq_link); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, was_async, - netfs_sreq_trace_put_clear); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_clear); } } } @@ -118,13 +123,19 @@ static void netfs_free_request_rcu(struct rcu_head *rcu) static void netfs_free_request(struct work_struct *work) { struct netfs_io_request *rreq = - container_of(work, struct netfs_io_request, work); + container_of(work, struct netfs_io_request, cleanup_work); struct netfs_inode *ictx = netfs_inode(rreq->inode); unsigned int i; trace_netfs_rreq(rreq, netfs_rreq_trace_free); + + /* Cancel/flush the result collection worker. That does not carry a + * ref of its own, so we must wait for it somewhere. + */ + cancel_work_sync(&rreq->work); + netfs_proc_del_rreq(rreq); - netfs_clear_subrequests(rreq, false); + netfs_clear_subrequests(rreq); if (rreq->netfs_ops->free_request) rreq->netfs_ops->free_request(rreq); if (rreq->cache_resources.ops) @@ -145,8 +156,7 @@ static void netfs_free_request(struct work_struct *work) call_rcu(&rreq->rcu, netfs_free_request_rcu); } -void netfs_put_request(struct netfs_io_request *rreq, bool was_async, - enum netfs_rreq_ref_trace what) +void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what) { unsigned int debug_id; bool dead; @@ -156,15 +166,8 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async, debug_id = rreq->debug_id; dead = __refcount_dec_and_test(&rreq->ref, &r); trace_netfs_rreq_ref(debug_id, r - 1, what); - if (dead) { - if (was_async) { - rreq->work.func = netfs_free_request; - if (!queue_work(system_unbound_wq, &rreq->work)) - WARN_ON(1); - } else { - netfs_free_request(&rreq->work); - } - } + if (dead) + WARN_ON(!queue_work(system_unbound_wq, &rreq->cleanup_work)); } } @@ -206,8 +209,7 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq, what); } -static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, - bool was_async) +static void netfs_free_subrequest(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; @@ -216,10 +218,10 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, rreq->netfs_ops->free_subrequest(subreq); mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool); netfs_stat_d(&netfs_n_rh_sreq); - netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq); + netfs_put_request(rreq, netfs_rreq_trace_put_subreq); } -void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, +void netfs_put_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what) { unsigned int debug_index = subreq->debug_index; @@ -230,5 +232,5 @@ void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, dead = __refcount_dec_and_test(&subreq->ref, &r); trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what); if (dead) - netfs_free_subrequest(subreq, was_async); + netfs_free_subrequest(subreq); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 23c75755ad4ed9..bad677e58a4237 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -280,9 +280,13 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) stream->need_retry = true; notes |= NEED_RETRY | MADE_PROGRESS; break; + } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { + notes |= MADE_PROGRESS; } else { if (!stream->failed) - stream->transferred = stream->collected_to - rreq->start; + stream->transferred += transferred; + if (front->transferred < front->len) + set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; } @@ -297,7 +301,7 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) struct netfs_io_subrequest, rreq_link); stream->front = front; spin_unlock(&rreq->lock); - netfs_put_subrequest(remove, false, + netfs_put_subrequest(remove, notes & ABANDON_SREQ ? netfs_sreq_trace_put_abandon : netfs_sreq_trace_put_done); @@ -311,14 +315,8 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) if (notes & NEED_RETRY) goto need_retry; - if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) { - trace_netfs_rreq(rreq, netfs_rreq_trace_unpause); - clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags); - smp_mb__after_atomic(); /* Set PAUSE before task state */ - wake_up(&rreq->waitq); - } - if (notes & MADE_PROGRESS) { + netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); //cond_resched(); goto reassess; } @@ -342,24 +340,10 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) */ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) { - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; unsigned int i; - /* Collect unbuffered reads and direct reads, adding up the transfer - * sizes until we find the first short or failed subrequest. - */ - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - rreq->transferred += subreq->transferred; - - if (subreq->transferred < subreq->len || - test_bit(NETFS_SREQ_FAILED, &subreq->flags)) { - rreq->error = subreq->error; - break; - } - } - - if (rreq->origin == NETFS_DIO_READ) { + if (rreq->origin == NETFS_UNBUFFERED_READ || + rreq->origin == NETFS_DIO_READ) { for (i = 0; i < rreq->direct_bv_count; i++) { flush_dcache_page(rreq->direct_bv[i].bv_page); // TODO: cifs marks pages in the destination buffer @@ -377,7 +361,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); - if (rreq->origin == NETFS_DIO_READ) + if (rreq->origin == NETFS_UNBUFFERED_READ || + rreq->origin == NETFS_DIO_READ) inode_dio_end(rreq->inode); } @@ -410,7 +395,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) * Note that we're in normal kernel thread context at this point, possibly * running on a workqueue. */ -static void netfs_read_collection(struct netfs_io_request *rreq) +bool netfs_read_collection(struct netfs_io_request *rreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -420,11 +405,11 @@ static void netfs_read_collection(struct netfs_io_request *rreq) * queue is empty. */ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) - return; + return false; smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ if (!list_empty(&stream->subrequests)) - return; + return false; /* Okay, declare that all I/O is complete. */ rreq->transferred = stream->transferred; @@ -433,6 +418,7 @@ static void netfs_read_collection(struct netfs_io_request *rreq) //netfs_rreq_is_still_valid(rreq); switch (rreq->origin) { + case NETFS_UNBUFFERED_READ: case NETFS_DIO_READ: case NETFS_READ_GAPS: netfs_rreq_assess_dio(rreq); @@ -445,14 +431,15 @@ static void netfs_read_collection(struct netfs_io_request *rreq) } task_io_account_read(rreq->transferred); - trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); - clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ trace_netfs_rreq(rreq, netfs_rreq_trace_done); - netfs_clear_subrequests(rreq, false); + netfs_clear_subrequests(rreq); netfs_unlock_abandoned_read_pages(rreq); if (unlikely(rreq->copy_to_cache)) netfs_pgpriv2_end_copy_to_cache(rreq); + return true; } void netfs_read_collection_worker(struct work_struct *work) @@ -460,26 +447,12 @@ void netfs_read_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - netfs_read_collection(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_work); -} - -/* - * Wake the collection work item. - */ -void netfs_wake_read_collector(struct netfs_io_request *rreq) -{ - if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && - !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { - if (!work_pending(&rreq->work)) { - netfs_get_request(rreq, netfs_rreq_trace_get_work); - if (!queue_work(system_unbound_wq, &rreq->work)) - netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq); - } - } else { - trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); - wake_up(&rreq->waitq); + if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_read_collection(rreq)) + /* Drop the ref from the IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + else + netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); } } @@ -511,7 +484,7 @@ void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) list_is_first(&subreq->rreq_link, &stream->subrequests) ) { __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } } EXPORT_SYMBOL(netfs_read_subreq_progress); @@ -535,7 +508,6 @@ EXPORT_SYMBOL(netfs_read_subreq_progress); void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; switch (subreq->source) { case NETFS_READ_FROM_CACHE: @@ -582,23 +554,15 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) } trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); - - clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ - - /* If we are at the head of the queue, wake up the collector. */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests) || - test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) - netfs_wake_read_collector(rreq); - - netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated); + netfs_subreq_clear_in_progress(subreq); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_read_subreq_terminated); /* * Handle termination of a read from the cache. */ -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async) +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = priv; @@ -613,94 +577,3 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool } netfs_read_subreq_terminated(subreq); } - -/* - * Wait for the read operation to complete, successfully or otherwise. - */ -ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); - ssize_t ret; - - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - subreq = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); - if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || - test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { - __set_current_state(TASK_RUNNING); - netfs_read_collection(rreq); - continue; - } - - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - break; - - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); - - ret = rreq->error; - if (ret == 0) { - ret = rreq->transferred; - switch (rreq->origin) { - case NETFS_DIO_READ: - case NETFS_READ_SINGLE: - ret = rreq->transferred; - break; - default: - if (rreq->submitted < rreq->len) { - trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); - ret = -EIO; - } - break; - } - } - - return ret; -} - -/* - * Wait for a paused read operation to unpause or complete in some manner. - */ -void netfs_wait_for_pause(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); - - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { - subreq = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); - if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || - test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { - __set_current_state(TASK_RUNNING); - netfs_read_collection(rreq); - continue; - } - } - - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || - !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) - break; - - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); -} diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c index cf7727060215ad..5bbe906a551d57 100644 --- a/fs/netfs/read_pgpriv2.c +++ b/fs/netfs/read_pgpriv2.c @@ -116,7 +116,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache( return creq; cancel_put: - netfs_put_request(creq, false, netfs_rreq_trace_put_return); + netfs_put_request(creq, netfs_rreq_trace_put_return); cancel: rreq->copy_to_cache = ERR_PTR(-ENOBUFS); clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); @@ -155,7 +155,7 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq) smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags); - netfs_put_request(creq, false, netfs_rreq_trace_put_return); + netfs_put_request(creq, netfs_rreq_trace_put_return); creq->copy_to_cache = NULL; } diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 0f294b26e08c96..b99e84a8170af2 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -173,7 +173,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; } @@ -257,35 +257,15 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) */ void netfs_retry_reads(struct netfs_io_request *rreq) { - struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); netfs_stat(&netfs_n_rh_retry_read_req); - set_bit(NETFS_RREQ_RETRYING, &rreq->flags); - /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) - continue; - - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - for (;;) { - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) - break; - - trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); - } + set_bit(NETFS_RREQ_RETRYING, &rreq->flags); + netfs_wait_for_in_progress_stream(rreq, stream); clear_bit(NETFS_RREQ_RETRYING, &rreq->flags); trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index fea0ecdecc5397..fa622a6cd56da3 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c @@ -142,7 +142,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); return ret; cancel: - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); return ret; } @@ -185,11 +185,11 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite netfs_single_dispatch_read(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, true, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret; cleanup_free: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(rreq, netfs_rreq_trace_put_failed); return ret; } EXPORT_SYMBOL(netfs_read_single); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 3fca59e6475d1c..0ce7b53e7fe83f 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -280,7 +280,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) struct netfs_io_subrequest, rreq_link); stream->front = front; spin_unlock(&wreq->lock); - netfs_put_subrequest(remove, false, + netfs_put_subrequest(remove, notes & SAW_FAILURE ? netfs_sreq_trace_put_cancel : netfs_sreq_trace_put_done); @@ -321,18 +321,14 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) if (notes & NEED_RETRY) goto need_retry; - if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); - clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); - smp_mb__after_atomic(); /* Set PAUSE before task state */ - wake_up(&wreq->waitq); - } - if (notes & NEED_REASSESS) { + if (notes & MADE_PROGRESS) { + netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); //cond_resched(); goto reassess_streams; } - if (notes & MADE_PROGRESS) { + + if (notes & NEED_REASSESS) { //cond_resched(); goto reassess_streams; } @@ -356,30 +352,21 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) /* * Perform the collection of subrequests, folios and encryption buffers. */ -void netfs_write_collection_worker(struct work_struct *work) +bool netfs_write_collection(struct netfs_io_request *wreq) { - struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; int s; _enter("R=%x", wreq->debug_id); - netfs_see_request(wreq, netfs_rreq_trace_see_work); - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } - netfs_collect_write_results(wreq); /* We're done when the app thread has finished posting subreqs and all * the queues in all the streams are empty. */ - if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } + if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) + return false; smp_rmb(); /* Read ALL_QUEUED before lists. */ transferred = LONG_MAX; @@ -387,10 +374,8 @@ void netfs_write_collection_worker(struct work_struct *work) struct netfs_io_stream *stream = &wreq->io_streams[s]; if (!stream->active) continue; - if (!list_empty(&stream->subrequests)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } + if (!list_empty(&stream->subrequests)) + return false; if (stream->transferred < transferred) transferred = stream->transferred; } @@ -428,8 +413,8 @@ void netfs_write_collection_worker(struct work_struct *work) inode_dio_end(wreq->inode); _debug("finished"); - trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); - clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags); + netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); @@ -440,19 +425,21 @@ void netfs_write_collection_worker(struct work_struct *work) wreq->iocb = VFS_PTR_POISON; } - netfs_clear_subrequests(wreq, false); - netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete); + netfs_clear_subrequests(wreq); + return true; } -/* - * Wake the collection work item. - */ -void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) +void netfs_write_collection_worker(struct work_struct *work) { - if (!work_pending(&wreq->work)) { - netfs_get_request(wreq, netfs_rreq_trace_get_work); - if (!queue_work(system_unbound_wq, &wreq->work)) - netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq); + struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); + + netfs_see_request(rreq, netfs_rreq_trace_see_work); + if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_write_collection(rreq)) + /* Drop the ref from the IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + else + netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); } } @@ -460,7 +447,6 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) * netfs_write_subrequest_terminated - Note the termination of a write operation. * @_op: The I/O request that has terminated. * @transferred_or_error: The amount of data transferred or an error code. - * @was_async: The termination was asynchronous * * This tells the library that a contributory write I/O operation has * terminated, one way or another, and that it should collect the results. @@ -470,21 +456,16 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) * negative error code. The library will look after reissuing I/O operations * as appropriate and writing downloaded data to the cache. * - * If @was_async is true, the caller might be running in softirq or interrupt - * context and we can't sleep. - * * When this is called, ownership of the subrequest is transferred back to the * library, along with a ref. * * Note that %_op is a void* so that the function can be passed to * kiocb::term_func without the need for a casting wrapper. */ -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async) +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = _op; struct netfs_io_request *wreq = subreq->rreq; - struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); @@ -536,15 +517,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, } trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); - - clear_and_wake_up_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - - /* If we are at the head of the queue, wake up the collector, - * transferring a ref to it if we were the ones to do so. - */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests)) - netfs_wake_write_collector(wreq, was_async); - - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); + netfs_subreq_clear_in_progress(subreq); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_write_subrequest_terminated); diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 77279fc5b5a7cb..50bee2c4130d1e 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -134,7 +134,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, return wreq; nomem: wreq->error = -ENOMEM; - netfs_put_request(wreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(wreq, netfs_rreq_trace_put_failed); return ERR_PTR(-ENOMEM); } @@ -233,7 +233,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream, _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) - return netfs_write_subrequest_terminated(subreq, subreq->error, false); + return netfs_write_subrequest_terminated(subreq, subreq->error); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); stream->issue_write(subreq); @@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq) } if (needs_poke) - netfs_wake_write_collector(wreq, false); + netfs_wake_collector(wreq); } /* @@ -576,6 +576,7 @@ int netfs_writepages(struct address_space *mapping, goto couldnt_start; } + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); trace_netfs_write(wreq, netfs_write_trace_writeback); netfs_stat(&netfs_n_wh_writepages); @@ -599,8 +600,9 @@ int netfs_writepages(struct address_space *mapping, netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); + netfs_wake_collector(wreq); - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", error); return error; @@ -673,11 +675,11 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c /* * End a write operation used when writing through the pagecache. */ -int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, - struct folio *writethrough_cache) +ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, + struct folio *writethrough_cache) { struct netfs_inode *ictx = netfs_inode(wreq->inode); - int ret; + ssize_t ret; _enter("R=%x", wreq->debug_id); @@ -688,13 +690,11 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr mutex_unlock(&ictx->wb_lock); - if (wreq->iocb) { + if (wreq->iocb) ret = -EIOCBQUEUED; - } else { - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); - ret = wreq->error; - } - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + else + ret = netfs_wait_for_write(wreq); + netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } @@ -722,10 +722,8 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t start += part; len -= part; rolling_buffer_advance(&wreq->buffer, part); - if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause); - wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags)); - } + if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) + netfs_wait_for_paused_write(wreq); if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) break; } @@ -885,7 +883,8 @@ int netfs_writeback_single(struct address_space *mapping, goto couldnt_start; } - trace_netfs_write(wreq, netfs_write_trace_writeback); + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); + trace_netfs_write(wreq, netfs_write_trace_writeback_single); netfs_stat(&netfs_n_wh_writepages); if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) @@ -914,8 +913,9 @@ int netfs_writeback_single(struct address_space *mapping, set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); mutex_unlock(&ictx->wb_lock); + netfs_wake_collector(wreq); - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", ret); return ret; diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 545d33079a77d0..9d1d8a8bab7261 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -39,9 +39,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) break; if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { - struct iov_iter source = subreq->io_iter; + struct iov_iter source; - iov_iter_revert(&source, subreq->len - source.count); + netfs_reset_iter(subreq); + source = subreq->io_iter; netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_write(stream, subreq, &source); } @@ -131,7 +132,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_discard); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; } @@ -199,7 +200,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, */ void netfs_retry_writes(struct netfs_io_request *wreq) { - struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; int s; @@ -208,16 +208,13 @@ void netfs_retry_writes(struct netfs_io_request *wreq) /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ + set_bit(NETFS_RREQ_RETRYING, &wreq->flags); for (s = 0; s < NR_IO_STREAMS; s++) { stream = &wreq->io_streams[s]; - if (!stream->active) - continue; - - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); - } + if (stream->active) + netfs_wait_for_in_progress_stream(wreq, stream); } + clear_bit(NETFS_RREQ_RETRYING, &wreq->flags); // TODO: Enc: Fetch changed partial pages // TODO: Enc: Reencrypt content if needed. diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index d3f76101ad4b91..07932ce9246c17 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -2,6 +2,7 @@ config NFS_FS tristate "NFS client support" depends on INET && FILE_LOCKING && MULTIUSER + select CRC32 select LOCKD select SUNRPC select NFS_COMMON @@ -196,7 +197,6 @@ config NFS_USE_KERNEL_DNS config NFS_DEBUG bool depends on NFS_FS && SUNRPC_DEBUG - select CRC32 default y config NFS_DISABLE_UDP_SUPPORT diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 02c916a550205f..6d63b958c4bb13 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1105,6 +1105,8 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) server->namelen = NFS2_MAXNAMLEN; } + /* Linux 'subtree_check' borkenness mandates this setting */ + server->fh_expire_type = NFS_FH_VOL_RENAME; if (!(fattr->valid & NFS_ATTR_FATTR)) { error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, @@ -1199,6 +1201,10 @@ void nfs_clients_init(struct net *net) INIT_LIST_HEAD(&nn->nfs_volume_list); #if IS_ENABLED(CONFIG_NFS_V4) idr_init(&nn->cb_ident_idr); +#endif +#if IS_ENABLED(CONFIG_NFS_V4_1) + INIT_LIST_HEAD(&nn->nfs4_data_server_cache); + spin_lock_init(&nn->nfs4_data_server_lock); #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); @@ -1216,6 +1222,9 @@ void nfs_clients_exit(struct net *net) nfs_cleanup_cb_ident_idr(net); WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); +#if IS_ENABLED(CONFIG_NFS_V4_1) + WARN_ON_ONCE(!list_empty(&nn->nfs4_data_server_cache)); +#endif } #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bd23fc736b3947..d0e0b435a84316 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2676,6 +2676,18 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) unblock_revalidate(new_dentry); } +static bool nfs_rename_is_unsafe_cross_dir(struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct nfs_server *server = NFS_SB(old_dentry->d_sb); + + if (old_dentry->d_parent != new_dentry->d_parent) + return false; + if (server->fh_expire_type & NFS_FH_RENAME_UNSAFE) + return !(server->fh_expire_type & NFS_FH_NOEXPIRE_WITH_OPEN); + return true; +} + /* * RENAME * FIXME: Some nfsds, like the Linux user space nfsd, may generate a @@ -2763,7 +2775,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } - if (S_ISREG(old_inode->i_mode)) + if (S_ISREG(old_inode->i_mode) && + nfs_rename_is_unsafe_cross_dir(old_dentry, new_dentry)) nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f32f8d7c9122bf..48d89716193a7e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -757,7 +757,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct inode *inode = dreq->inode; int flags = NFS_ODIRECT_DONE; @@ -786,6 +785,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_unlock(&inode->i_lock); while (!list_empty(&hdr->pages)) { + struct nfs_page *req; req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4fa304fa5bc4b2..29d9234d5c085f 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -76,6 +76,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct page *scratch; struct list_head dsaddrs; struct nfs4_pnfs_ds_addr *da; + struct net *net = server->nfs_client->cl_net; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -159,8 +160,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -170,7 +170,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err_free_deviceid; } - dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!dsaddr->ds_list[i]) goto out_err_drain_dsaddrs; trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 61ad269c825ff0..e6909cafab6864 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1329,7 +1329,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_READ, task->tk_status); - trace_ff_layout_read_error(hdr); + trace_ff_layout_read_error(hdr, task->tk_status); } err = ff_layout_async_handle_error(task, hdr->args.context->state, @@ -1502,7 +1502,7 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->args.offset, hdr->args.count, &hdr->res.op_status, OP_WRITE, task->tk_status); - trace_ff_layout_write_error(hdr); + trace_ff_layout_write_error(hdr, task->tk_status); } err = ff_layout_async_handle_error(task, hdr->args.context->state, @@ -1551,7 +1551,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, data->args.offset, data->args.count, &data->res.op_status, OP_COMMIT, task->tk_status); - trace_ff_layout_commit_error(data); + trace_ff_layout_commit_error(data, task->tk_status); } err = ff_layout_async_handle_error(task, NULL, data->ds_clp, diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e58bedfb1dcc14..4a304cf17c4b07 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -49,6 +49,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, struct nfs4_pnfs_ds_addr *da; struct nfs4_ff_layout_ds *new_ds = NULL; struct nfs4_ff_ds_version *ds_versions = NULL; + struct net *net = server->nfs_client->cl_net; u32 mp_count; u32 version_count; __be32 *p; @@ -80,8 +81,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, for (i = 0; i < mp_count; i++) { /* multipath ds */ - da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -149,7 +149,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, new_ds->ds_versions = ds_versions; new_ds->ds_versions_cnt = version_count; - new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags); if (!new_ds->ds) goto out_err_drain_dsaddrs; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index e278a1ad1ca3e8..8b07851787312b 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) sreq = netfs->sreq; if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_UNBUFFERED_READ && sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ec8d32d0e2e95f..6655e5f32ec63c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -899,18 +899,11 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } -#ifdef CONFIG_CRC32 static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) { return ~crc32_le(0xFFFFFFFF, &stateid->other[0], NFS4_STATEID_OTHER_SIZE); } -#else -static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) -{ - return 0; -} -#endif static inline bool nfs_current_task_exiting(void) { diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 5c21caeae075c7..e6d36b3d3fc059 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -207,14 +207,16 @@ void nfs_local_probe_async(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs_local_probe_async); -static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf) +static inline void nfs_local_file_put(struct nfsd_file *localio) { - return nfs_to->nfsd_file_get(nf); -} + /* nfs_to_nfsd_file_put_local() expects an __rcu pointer + * but we have a __kernel pointer. It is always safe + * to cast a __kernel pointer to an __rcu pointer + * because the cast only weakens what is known about the pointer. + */ + struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio; -static inline void nfs_local_file_put(struct nfsd_file *nf) -{ - nfs_to->nfsd_file_put(nf); + nfs_to_nfsd_file_put_local(&nf); } /* @@ -226,12 +228,13 @@ static inline void nfs_local_file_put(struct nfsd_file *nf) static struct nfsd_file * __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, struct nfs_fh *fh, struct nfs_file_localio *nfl, + struct nfsd_file __rcu **pnf, const fmode_t mode) { struct nfsd_file *localio; localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, - cred, fh, nfl, mode); + cred, fh, nfl, pnf, mode); if (IS_ERR(localio)) { int status = PTR_ERR(localio); trace_nfs_local_open_fh(fh, mode, status); @@ -258,7 +261,7 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, struct nfs_fh *fh, struct nfs_file_localio *nfl, const fmode_t mode) { - struct nfsd_file *nf, *new, __rcu **pnf; + struct nfsd_file *nf, __rcu **pnf; if (!nfs_server_is_local(clp)) return NULL; @@ -270,29 +273,9 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, else pnf = &nfl->ro_file; - new = NULL; - rcu_read_lock(); - nf = rcu_dereference(*pnf); - if (!nf) { - rcu_read_unlock(); - new = __nfs_local_open_fh(clp, cred, fh, nfl, mode); - if (IS_ERR(new)) - return NULL; - /* try to swap in the pointer */ - spin_lock(&clp->cl_uuid.lock); - nf = rcu_dereference_protected(*pnf, 1); - if (!nf) { - nf = new; - new = NULL; - rcu_assign_pointer(*pnf, nf); - } - spin_unlock(&clp->cl_uuid.lock); - rcu_read_lock(); - } - nf = nfs_local_file_get(nf); - rcu_read_unlock(); - if (new) - nfs_to_nfsd_file_put_local(new); + nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode); + if (IS_ERR(nf)) + return NULL; return nf; } EXPORT_SYMBOL_GPL(nfs_local_open_fh); diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index a68b21603ea9a8..6ba3ea39e928c0 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -31,7 +31,11 @@ struct nfs_net { unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; int cb_users[NFS4_MAX_MINOR_VERSION + 1]; -#endif +#endif /* CONFIG_NFS_V4 */ +#if IS_ENABLED(CONFIG_NFS_V4_1) + struct list_head nfs4_data_server_cache; + spinlock_t nfs4_data_server_lock; +#endif /* CONFIG_NFS_V4_1 */ struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 18d8f6529f615e..a126eb31f62fae 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -104,7 +104,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu) switch (status) { case 0: - status = nfs_refresh_inode(inode, res.fattr); + nfs_refresh_inode(inode, res.fattr); break; case -EPFNOSUPPORT: case -EPROTONOSUPPORT: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 970f28dbf2539e..4b123bca65e12d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -671,6 +671,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, struct nfs_client *clp = server->nfs_client; int ret; + if ((task->tk_rpc_status == -ENETDOWN || + task->tk_rpc_status == -ENETUNREACH) && + task->tk_flags & RPC_TASK_NETUNREACH_FATAL) { + exception->delay = 0; + exception->recovering = 0; + exception->retry = 0; + return -EIO; + } + ret = nfs4_do_handle_exception(server, errorcode, exception); if (exception->delay) { int ret2 = nfs4_exception_should_retrans(server, exception); @@ -5155,13 +5164,15 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ } static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, - struct nfs4_createdata *data) + struct nfs4_createdata *data, int *statusp) { - int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, + struct dentry *ret; + + *statusp = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, &data->arg.seq_args, &data->res.seq_res, 1); - if (status) - return ERR_PTR(status); + if (*statusp) + return NULL; spin_lock(&dir->i_lock); /* Creating a directory bumps nlink in the parent */ @@ -5170,7 +5181,11 @@ static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, data->res.fattr->time_start, NFS_INO_INVALID_DATA); spin_unlock(&dir->i_lock); - return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + ret = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + if (!IS_ERR(ret)) + return ret; + *statusp = PTR_ERR(ret); + return NULL; } static void nfs4_free_createdata(struct nfs4_createdata *data) @@ -5231,17 +5246,18 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, int *statusp) { struct nfs4_createdata *data; - struct dentry *ret = ERR_PTR(-ENOMEM); + struct dentry *ret = NULL; + *statusp = -ENOMEM; data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); if (data == NULL) goto out; data->arg.label = label; - ret = nfs4_do_mkdir(dir, dentry, data); + ret = nfs4_do_mkdir(dir, dentry, data, statusp); nfs4_free_createdata(data); out: @@ -5264,11 +5280,12 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) sattr->ia_mode &= ~current_umask(); do { - alias = _nfs4_proc_mkdir(dir, dentry, sattr, label); - err = PTR_ERR_OR_ZERO(alias); + alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err); trace_nfs4_mkdir(dir, &dentry->d_name, err); - err = nfs4_handle_exception(NFS_SERVER(dir), err, - &exception); + if (err) + alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + err, + &exception)); } while (exception.retry); nfs4_label_release_security(label); @@ -7074,10 +7091,18 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs4_unlockdata *p; struct nfs4_state *state = lsp->ls_state; struct inode *inode = state->inode; + struct nfs_lock_context *l_ctx; p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return NULL; + l_ctx = nfs_get_lock_context(ctx); + if (!IS_ERR(l_ctx)) { + p->l_ctx = l_ctx; + } else { + kfree(p); + return NULL; + } p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; @@ -7085,7 +7110,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->lsp = lsp; /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - p->l_ctx = nfs_get_lock_context(ctx); locks_init_lock(&p->fl); locks_copy_lock(&p->fl, fl); p->server = NFS_SERVER(inode); diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index 351616c61df541..f9c291e2165cd8 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -148,16 +148,12 @@ static inline void nfs4_copy_sessionid(struct nfs4_sessionid *dst, memcpy(dst->data, src->data, NFS4_MAX_SESSIONID_LEN); } -#ifdef CONFIG_CRC32 /* * nfs_session_id_hash - calculate the crc32 hash for the session id * @session - pointer to session */ #define nfs_session_id_hash(sess_id) \ (~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data))) -#else -#define nfs_session_id_hash(session) (0) -#endif #else /* defined(CONFIG_NFS_V4_1) */ static inline int nfs4_init_session(struct nfs_client *clp) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index bc67fe6801b138..deab4c0e21a064 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2051,13 +2051,15 @@ TRACE_EVENT(fl_getdevinfo, DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_PROTO( - const struct nfs_pgio_header *hdr + const struct nfs_pgio_header *hdr, + int error ), - TP_ARGS(hdr), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(unsigned long, error) + __field(unsigned long, nfs_error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -2073,7 +2075,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_fast_assign( const struct inode *inode = hdr->inode; - __entry->error = hdr->res.op_status; + __entry->error = -error; + __entry->nfs_error = hdr->res.op_status; __entry->fhandle = nfs_fhandle_hash(hdr->args.fh); __entry->fileid = NFS_FILEID(inode); __entry->dev = inode->i_sb->s_dev; @@ -2088,7 +2091,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", + "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s " + "nfs_error=%lu (%s)", -__entry->error, show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -2096,28 +2100,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, __entry->fhandle, __entry->offset, __entry->count, __entry->stateid_seq, __entry->stateid_hash, - __get_str(dstaddr) + __get_str(dstaddr), __entry->nfs_error, + show_nfs4_status(__entry->nfs_error) ) ); #define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \ DEFINE_EVENT(nfs4_flexfiles_io_event, name, \ TP_PROTO( \ - const struct nfs_pgio_header *hdr \ + const struct nfs_pgio_header *hdr, \ + int error \ ), \ - TP_ARGS(hdr)) + TP_ARGS(hdr, error)) DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error); DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error); TRACE_EVENT(ff_layout_commit_error, TP_PROTO( - const struct nfs_commit_data *data + const struct nfs_commit_data *data, + int error ), - TP_ARGS(data), + TP_ARGS(data, error), TP_STRUCT__entry( __field(unsigned long, error) + __field(unsigned long, nfs_error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -2131,7 +2139,8 @@ TRACE_EVENT(ff_layout_commit_error, TP_fast_assign( const struct inode *inode = data->inode; - __entry->error = data->res.op_status; + __entry->error = -error; + __entry->nfs_error = data->res.op_status; __entry->fhandle = nfs_fhandle_hash(data->args.fh); __entry->fileid = NFS_FILEID(inode); __entry->dev = inode->i_sb->s_dev; @@ -2142,14 +2151,15 @@ TRACE_EVENT(ff_layout_commit_error, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%llu count=%u dstaddr=%s", + "offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)", -__entry->error, show_nfs4_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, __entry->count, - __get_str(dstaddr) + __get_str(dstaddr), __entry->nfs_error, + show_nfs4_status(__entry->nfs_error) ) ); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5f582713bf05eb..3adb7d0dbec7ac 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -745,6 +745,14 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return remaining; } +static void pnfs_reset_return_info(struct pnfs_layout_hdr *lo) +{ + struct pnfs_layout_segment *lseg; + + list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); +} + static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, struct list_head *free_me, @@ -1246,21 +1254,15 @@ static void pnfs_clear_layoutcommit(struct inode *inode, static void pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, - const struct pnfs_layout_range *range) + const struct pnfs_layout_range *range, + struct list_head *freeme) { - const struct pnfs_layout_segment *lseg; - u32 seq = be32_to_cpu(arg_stateid->seqid); - if (pnfs_layout_is_valid(lo) && - nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) { - list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) { - if (pnfs_seqid_is_newer(lseg->pls_seq, seq) || - !pnfs_should_free_range(&lseg->pls_range, range)) - continue; - pnfs_set_plh_return_info(lo, range->iomode, seq); - break; - } - } + nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + pnfs_reset_return_info(lo); + else + pnfs_mark_layout_stateid_invalid(lo, freeme); + pnfs_clear_layoutreturn_waitbit(lo); } void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, @@ -1268,11 +1270,12 @@ void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo, const struct pnfs_layout_range *range) { struct inode *inode = lo->plh_inode; + LIST_HEAD(freeme); spin_lock(&inode->i_lock); - pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range); - pnfs_clear_layoutreturn_waitbit(lo); + pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range, &freeme); spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); } void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, @@ -1292,6 +1295,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); pnfs_free_returned_lsegs(lo, &freeme, range, seq); pnfs_set_layout_stateid(lo, stateid, NULL, true); + pnfs_reset_return_info(lo); } else pnfs_mark_layout_stateid_invalid(lo, &freeme); out_unlock: @@ -1661,6 +1665,18 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, /* Was there an RPC level error? If not, retry */ if (task->tk_rpc_status == 0) break; + /* + * Is there a fatal network level error? + * If so release the layout, but flag the error. + */ + if ((task->tk_rpc_status == -ENETDOWN || + task->tk_rpc_status == -ENETUNREACH) && + task->tk_flags & RPC_TASK_NETUNREACH_FATAL) { + *ret = 0; + (*respp)->lrs_present = 0; + retval = -EIO; + break; + } /* If the call was not sent, let caller handle it */ if (!RPC_WAS_SENT(task)) return 0; @@ -1695,6 +1711,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct inode *inode = args->inode; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; + LIST_HEAD(freeme); switch (ret) { case -NFS4ERR_BADSESSION: @@ -1703,9 +1720,9 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, case -NFS4ERR_NOMATCHING_LAYOUT: spin_lock(&inode->i_lock); pnfs_layoutreturn_retry_later_locked(lo, &args->stateid, - &args->range); - pnfs_clear_layoutreturn_waitbit(lo); + &args->range, &freeme); spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); break; case 0: if (res->lrs_present) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 30d2613e912b88..91ff877185c8af 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -60,6 +60,7 @@ struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ char *ds_remotestr; /* comma sep list of addrs */ struct list_head ds_addrs; + const struct net *ds_net; struct nfs_client *ds_clp; refcount_t ds_count; unsigned long ds_state; @@ -415,7 +416,8 @@ int pnfs_generic_commit_pagelist(struct inode *inode, int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); -struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, +struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(const struct net *net, + struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_v3_ds_connect_unload(void); int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index dbef837e871ad4..91ef486f40b943 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -16,6 +16,7 @@ #include "nfs4session.h" #include "internal.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_PNFS @@ -504,14 +505,14 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); /* * Data server cache * - * Data servers can be mapped to different device ids. - * nfs4_pnfs_ds reference counting + * Data servers can be mapped to different device ids, but should + * never be shared between net namespaces. + * + * nfs4_pnfs_ds reference counting: * - set to 1 on allocation * - incremented when a device id maps a data server already in the cache. * - decremented when deviceid is removed from the cache. */ -static DEFINE_SPINLOCK(nfs4_ds_cache_lock); -static LIST_HEAD(nfs4_data_server_cache); /* Debug routines */ static void @@ -604,11 +605,11 @@ _same_data_server_addrs_locked(const struct list_head *dsaddrs1, * Lookup DS by addresses. nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct list_head *dsaddrs) +_data_server_lookup_locked(const struct nfs_net *nn, const struct list_head *dsaddrs) { struct nfs4_pnfs_ds *ds; - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) + list_for_each_entry(ds, &nn->nfs4_data_server_cache, ds_node) if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) return ds; return NULL; @@ -653,10 +654,11 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) { - if (refcount_dec_and_lock(&ds->ds_count, - &nfs4_ds_cache_lock)) { + struct nfs_net *nn = net_generic(ds->ds_net, nfs_net_id); + + if (refcount_dec_and_lock(&ds->ds_count, &nn->nfs4_data_server_lock)) { list_del_init(&ds->ds_node); - spin_unlock(&nfs4_ds_cache_lock); + spin_unlock(&nn->nfs4_data_server_lock); destroy_ds(ds); } } @@ -716,8 +718,9 @@ nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) * uncached and return cached struct nfs4_pnfs_ds. */ struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) +nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags) { + struct nfs_net *nn = net_generic(net, nfs_net_id); struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; char *remotestr; @@ -733,16 +736,17 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) /* this is only used for debugging, so it's ok if its NULL */ remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); - spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(dsaddrs); + spin_lock(&nn->nfs4_data_server_lock); + tmp_ds = _data_server_lookup_locked(nn, dsaddrs); if (tmp_ds == NULL) { INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; refcount_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); + ds->ds_net = net; ds->ds_clp = NULL; - list_add(&ds->ds_node, &nfs4_data_server_cache); + list_add(&ds->ds_node, &nn->nfs4_data_server_cache); dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { @@ -754,7 +758,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) refcount_read(&tmp_ds->ds_count)); ds = tmp_ds; } - spin_unlock(&nfs4_ds_cache_lock); + spin_unlock(&nn->nfs4_data_server_lock); out: return ds; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9eea9e62afc9c3..91b5503b6f74d7 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1051,6 +1051,16 @@ int nfs_reconfigure(struct fs_context *fc) sync_filesystem(sb); + /* + * The SB_RDONLY flag has been removed from the superblock during + * mounts to prevent interference between different filesystems. + * Similarly, it is also necessary to ignore the SB_RDONLY flag + * during reconfiguration; otherwise, it may also result in the + * creation of redundant superblocks when mounting a directory with + * different rw and ro flags multiple times. + */ + fc->sb_flags_mask &= ~SB_RDONLY; + /* * Userspace mount programs that send binary options generally send * them populated with default values. We have no way to know which @@ -1308,8 +1318,17 @@ int nfs_get_tree_common(struct fs_context *fc) if (IS_ERR(server)) return PTR_ERR(server); + /* + * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a + * superblock among each filesystem that mounts sub-directories + * belonging to a single exported root path. + * To prevent interference between different filesystems, the + * SB_RDONLY flag should be removed from the superblock. + */ if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; + else + fc->sb_flags &= ~SB_RDONLY; /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 6a0bdea6d6449f..05c7c16e37ab4c 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -151,8 +151,7 @@ EXPORT_SYMBOL_GPL(nfs_localio_enable_client); */ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) { - LIST_HEAD(local_files); - struct nfs_file_localio *nfl, *tmp; + struct nfs_file_localio *nfl; spin_lock(&nfs_uuid->lock); if (unlikely(!rcu_access_pointer(nfs_uuid->net))) { @@ -166,17 +165,42 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) nfs_uuid->dom = NULL; } - list_splice_init(&nfs_uuid->files, &local_files); - spin_unlock(&nfs_uuid->lock); - /* Walk list of files and ensure their last references dropped */ - list_for_each_entry_safe(nfl, tmp, &local_files, list) { - nfs_close_local_fh(nfl); + + while ((nfl = list_first_entry_or_null(&nfs_uuid->files, + struct nfs_file_localio, + list)) != NULL) { + /* If nfs_uuid is already NULL, nfs_close_local_fh is + * closing and we must wait, else we unlink and close. + */ + if (rcu_access_pointer(nfl->nfs_uuid) == NULL) { + /* nfs_close_local_fh() is doing the + * close and we must wait. until it unlinks + */ + wait_var_event_spinlock(nfl, + list_first_entry_or_null( + &nfs_uuid->files, + struct nfs_file_localio, + list) != nfl, + &nfs_uuid->lock); + continue; + } + + /* Remove nfl from nfs_uuid->files list */ + list_del_init(&nfl->list); + spin_unlock(&nfs_uuid->lock); + + nfs_to_nfsd_file_put_local(&nfl->ro_file); + nfs_to_nfsd_file_put_local(&nfl->rw_file); cond_resched(); - } - spin_lock(&nfs_uuid->lock); - BUG_ON(!list_empty(&nfs_uuid->files)); + spin_lock(&nfs_uuid->lock); + /* Now we can allow racing nfs_close_local_fh() to + * skip the locking. + */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); + } /* Remove client from nn->local_clients */ if (nfs_uuid->list_lock) { @@ -237,6 +261,7 @@ static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, struct rpc_clnt *rpc_clnt, const struct cred *cred, const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl, + struct nfsd_file __rcu **pnf, const fmode_t fmode) { struct net *net; @@ -261,10 +286,9 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, rcu_read_unlock(); /* We have an implied reference to net thanks to nfsd_net_try_get */ localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, - cred, nfs_fh, fmode); - if (IS_ERR(localio)) - nfs_to_nfsd_net_put(net); - else + cred, nfs_fh, pnf, fmode); + nfs_to_nfsd_net_put(net); + if (!IS_ERR(localio)) nfs_uuid_add_file(uuid, nfl); return localio; @@ -273,8 +297,6 @@ EXPORT_SYMBOL_GPL(nfs_open_local_fh); void nfs_close_local_fh(struct nfs_file_localio *nfl) { - struct nfsd_file *ro_nf = NULL; - struct nfsd_file *rw_nf = NULL; nfs_uuid_t *nfs_uuid; rcu_read_lock(); @@ -285,28 +307,39 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl) return; } - ro_nf = rcu_access_pointer(nfl->ro_file); - rw_nf = rcu_access_pointer(nfl->rw_file); - if (ro_nf || rw_nf) { - spin_lock(&nfs_uuid->lock); - if (ro_nf) - ro_nf = rcu_dereference_protected(xchg(&nfl->ro_file, NULL), 1); - if (rw_nf) - rw_nf = rcu_dereference_protected(xchg(&nfl->rw_file, NULL), 1); - - /* Remove nfl from nfs_uuid->files list */ - RCU_INIT_POINTER(nfl->nfs_uuid, NULL); - list_del_init(&nfl->list); + spin_lock(&nfs_uuid->lock); + if (!rcu_access_pointer(nfl->nfs_uuid)) { + /* nfs_uuid_put has finished here */ spin_unlock(&nfs_uuid->lock); rcu_read_unlock(); - - if (ro_nf) - nfs_to_nfsd_file_put_local(ro_nf); - if (rw_nf) - nfs_to_nfsd_file_put_local(rw_nf); return; } + if (list_empty(&nfs_uuid->files)) { + /* nfs_uuid_put() has started closing files, wait for it + * to finished + */ + spin_unlock(&nfs_uuid->lock); + rcu_read_unlock(); + wait_var_event(&nfl->nfs_uuid, + rcu_access_pointer(nfl->nfs_uuid) == NULL); + return; + } + /* tell nfs_uuid_put() to wait for us */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + spin_unlock(&nfs_uuid->lock); rcu_read_unlock(); + + nfs_to_nfsd_file_put_local(&nfl->ro_file); + nfs_to_nfsd_file_put_local(&nfl->rw_file); + + /* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put() + * that we are done. The moment we drop the spinlock the + * nfs_uuid could be freed. + */ + spin_lock(&nfs_uuid->lock); + list_del_init(&nfl->list); + wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock); + spin_unlock(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_close_local_fh); diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 792d3fed1b45fd..731a88f6313ebf 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -4,6 +4,7 @@ config NFSD depends on INET depends on FILE_LOCKING depends on FSNOTIFY + select CRC32 select LOCKD select SUNRPC select EXPORTFS diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ab85e6a2454f4c..e108b6c705b459 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -378,14 +378,40 @@ nfsd_file_put(struct nfsd_file *nf) * the reference of the nfsd_file. */ struct net * -nfsd_file_put_local(struct nfsd_file *nf) +nfsd_file_put_local(struct nfsd_file __rcu **pnf) { - struct net *net = nf->nf_net; + struct nfsd_file *nf; + struct net *net = NULL; - nfsd_file_put(nf); + nf = unrcu_pointer(xchg(pnf, NULL)); + if (nf) { + net = nf->nf_net; + nfsd_file_put(nf); + } return net; } +/** + * nfsd_file_get_local - get nfsd_file reference and reference to net + * @nf: nfsd_file of which to put the reference + * + * Get reference to both the nfsd_file and nf->nf_net. + */ +struct nfsd_file * +nfsd_file_get_local(struct nfsd_file *nf) +{ + struct net *net = nf->nf_net; + + if (nfsd_net_try_get(net)) { + nf = nfsd_file_get(nf); + if (!nf) + nfsd_net_put(net); + } else { + nf = NULL; + } + return nf; +} + /** * nfsd_file_file - get the backing file of an nfsd_file * @nf: nfsd_file of which to access the backing file. diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 5865f9c7271214..722b26c71e454a 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -62,7 +62,8 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); -struct net *nfsd_file_put_local(struct nfsd_file *nf); +struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf); +struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c index 238647fa379e32..80d9ff6608a7b9 100644 --- a/fs/nfsd/localio.c +++ b/fs/nfsd/localio.c @@ -24,21 +24,6 @@ #include "filecache.h" #include "cache.h" -static const struct nfsd_localio_operations nfsd_localio_ops = { - .nfsd_net_try_get = nfsd_net_try_get, - .nfsd_net_put = nfsd_net_put, - .nfsd_open_local_fh = nfsd_open_local_fh, - .nfsd_file_put_local = nfsd_file_put_local, - .nfsd_file_get = nfsd_file_get, - .nfsd_file_put = nfsd_file_put, - .nfsd_file_file = nfsd_file_file, -}; - -void nfsd_localio_ops_init(void) -{ - nfs_to = &nfsd_localio_ops; -} - /** * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file * @@ -47,6 +32,7 @@ void nfsd_localio_ops_init(void) * @rpc_clnt: rpc_clnt that the client established * @cred: cred that the client established * @nfs_fh: filehandle to lookup + * @nfp: place to find the nfsd_file, or store it if it was non-NULL * @fmode: fmode_t to use for open * * This function maps a local fh to a path on a local filesystem. @@ -57,10 +43,11 @@ void nfsd_localio_ops_init(void) * set. Caller (NFS client) is responsible for calling nfsd_net_put and * nfsd_file_put (via nfs_to_nfsd_file_put_local). */ -struct nfsd_file * +static struct nfsd_file * nfsd_open_local_fh(struct net *net, struct auth_domain *dom, struct rpc_clnt *rpc_clnt, const struct cred *cred, - const struct nfs_fh *nfs_fh, const fmode_t fmode) + const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf, + const fmode_t fmode) { int mayflags = NFSD_MAY_LOCALIO; struct svc_cred rq_cred; @@ -71,6 +58,15 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (nfs_fh->size > NFS4_FHSIZE) return ERR_PTR(-EINVAL); + if (!nfsd_net_try_get(net)) + return ERR_PTR(-ENXIO); + + rcu_read_lock(); + localio = nfsd_file_get(rcu_dereference(*pnf)); + rcu_read_unlock(); + if (localio) + return localio; + /* nfs_fh -> svc_fh */ fh_init(&fh, NFS4_FHSIZE); fh.fh_handle.fh_size = nfs_fh->size; @@ -92,9 +88,47 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom, if (rq_cred.cr_group_info) put_group_info(rq_cred.cr_group_info); + if (!IS_ERR(localio)) { + struct nfsd_file *new; + if (!nfsd_net_try_get(net)) { + nfsd_file_put(localio); + nfsd_net_put(net); + return ERR_PTR(-ENXIO); + } + nfsd_file_get(localio); + again: + new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio))); + if (new) { + /* Some other thread installed an nfsd_file */ + if (nfsd_file_get(new) == NULL) + goto again; + /* + * Drop the ref we were going to install and the + * one we were going to return. + */ + nfsd_file_put(localio); + nfsd_file_put(localio); + localio = new; + } + } else + nfsd_net_put(net); + return localio; } -EXPORT_SYMBOL_GPL(nfsd_open_local_fh); + +static const struct nfsd_localio_operations nfsd_localio_ops = { + .nfsd_net_try_get = nfsd_net_try_get, + .nfsd_net_put = nfsd_net_put, + .nfsd_open_local_fh = nfsd_open_local_fh, + .nfsd_file_put_local = nfsd_file_put_local, + .nfsd_file_get_local = nfsd_file_get_local, + .nfsd_file_file = nfsd_file_file, +}; + +void nfsd_localio_ops_init(void) +{ + nfs_to = &nfsd_localio_ops; +} /* * UUID_IS_LOCAL XDR functions diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2041268b398a4e..59a693f22452b8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5430,7 +5430,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) queued = nfsd4_run_cb(&dp->dl_recall); WARN_ON_ONCE(!queued); if (!queued) - nfs4_put_stid(&dp->dl_stid); + refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 876152a91f122f..5103c2f4d2253a 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -267,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1, return true; } -#ifdef CONFIG_CRC32 /** * knfsd_fh_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -279,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } -#else -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) -{ - return 0; -} -#endif /** * fh_clear_pre_post_attrs - Reset pre/post attributes diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 0d8f7fb15c2e54..dd0c8e560ef6a2 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 893ab36824cc2b..2d8dc6b35b5477 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index cb01ea81724dca..d0bcf744c553a0 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { nilfs_err(sb, "unable to set blocksize"); @@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index f2d840ae4ded88..87f861e9004f2d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1961,12 +1961,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; if (mark_cmd == FAN_MARK_FLUSH) { - if (mark_type == FAN_MARK_MOUNT) - fsnotify_clear_vfsmount_marks_by_group(group); - else if (mark_type == FAN_MARK_FILESYSTEM) - fsnotify_clear_sb_marks_by_group(group); - else - fsnotify_clear_inode_marks_by_group(group); + fsnotify_clear_marks_by_group(group, obj_type); return 0; } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 78d20e4baa2c9a..1bf2a6593dec66 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -2182,6 +2182,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx, e = hdr_first_de(&n->index->ihdr); fnd_push(fnd, n, e); + if (!e) { + err = -EINVAL; + goto out; + } if (!de_is_last(e)) { /* @@ -2203,6 +2207,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx, n = fnd->nodes[level]; te = hdr_first_de(&n->index->ihdr); + if (!te) { + err = -EINVAL; + goto out; + } /* Copy the candidate entry into the replacement entry buffer. */ re = kmalloc(le16_to_cpu(te->size) + sizeof(u64), GFP_NOFS); if (!re) { diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 3e2957a1e3605c..0f0d27d4644a9b 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -805,6 +805,10 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ret = 0; goto out; } + if (is_compressed(ni)) { + ret = 0; + goto out; + } ret = blockdev_direct_IO(iocb, inode, iter, wr ? ntfs_get_block_direct_IO_W : @@ -2068,5 +2072,6 @@ const struct address_space_operations ntfs_aops_cmpr = { .read_folio = ntfs_read_folio, .readahead = ntfs_readahead, .dirty_folio = block_dirty_folio, + .direct_IO = ntfs_direct_IO, }; // clang-format on diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8ac85b548c7e5..821cb7874685e1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6918,6 +6918,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end, if (IS_ERR(folios[numfolios])) { ret = PTR_ERR(folios[numfolios]); mlog_errno(ret); + folios[numfolios] = NULL; goto out; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f1b4b3e611cb9b..e5f58ff2175f41 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) struct ocfs2_recovery_map *rm; mutex_init(&osb->recovery_lock); - osb->disable_recovery = 0; + osb->recovery_state = OCFS2_REC_ENABLED; osb->recovery_thread_task = NULL; init_waitqueue_head(&osb->recovery_event); @@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) return 0; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) { - mb(); return osb->recovery_thread_task != NULL; } -void ocfs2_recovery_exit(struct ocfs2_super *osb) +static void ocfs2_recovery_disable(struct ocfs2_super *osb, + enum ocfs2_recovery_state state) { - struct ocfs2_recovery_map *rm; - - /* disable any new recovery threads and wait for any currently - * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; + /* + * If recovery thread is not running, we can directly transition to + * final state. + */ + if (!ocfs2_recovery_thread_running(osb)) { + osb->recovery_state = state + 1; + goto out_lock; + } + osb->recovery_state = state; + /* Wait for recovery thread to acknowledge state transition */ + wait_event_cmd(osb->recovery_event, + !ocfs2_recovery_thread_running(osb) || + osb->recovery_state >= state + 1, + mutex_unlock(&osb->recovery_lock), + mutex_lock(&osb->recovery_lock)); +out_lock: mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ + /* + * At this point we know that no more recovery work can be queued so + * wait for any recovery completion work to complete. + */ if (osb->ocfs2_wq) flush_workqueue(osb->ocfs2_wq); +} + +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) +{ + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); +} + +void ocfs2_recovery_exit(struct ocfs2_super *osb) +{ + struct ocfs2_recovery_map *rm; + + /* disable any new recovery threads and wait for any currently + * running ones to exit. Do this before setting the vol_state. */ + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); /* * Now that recovery is shut down, and the osb is about to be @@ -1249,7 +1271,7 @@ static int ocfs2_force_read_journal(struct inode *inode) } for (i = 0; i < p_blocks; i++, p_blkno++) { - bh = __find_get_block(osb->sb->s_bdev, p_blkno, + bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno, osb->sb->s_blocksize); /* block not cached. */ if (!bh) @@ -1472,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg) } } restart: + if (quota_enabled) { + mutex_lock(&osb->recovery_lock); + /* Confirm that recovery thread will no longer recover quotas */ + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; + wake_up(&osb->recovery_event); + } + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) + quota_enabled = 0; + mutex_unlock(&osb->recovery_lock); + } + status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1569,27 +1603,29 @@ static int __ocfs2_recovery_thread(void *arg) ocfs2_free_replay_slots(osb); osb->recovery_thread_task = NULL; - mb(); /* sync with ocfs2_recovery_thread_running */ + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) + osb->recovery_state = OCFS2_REC_DISABLED; wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); - if (quota_enabled) - kfree(rm_quota); + kfree(rm_quota); return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { + int was_set = -1; + mutex_lock(&osb->recovery_lock); + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) + was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, - osb->disable_recovery, osb->recovery_thread_task, - osb->disable_recovery ? - -1 : ocfs2_recovery_map_set(osb, node_num)); + osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->disable_recovery) + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e3c3a35dc5e0e7..6397170f302f22 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); void ocfs2_free_replay_slots(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 51c52768132d70..6aaa94c554c12a 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -308,6 +308,21 @@ enum ocfs2_journal_trigger_type { void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); +enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, + OCFS2_REC_QUOTA_WANT_DISABLE, + /* + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for + * ocfs2_recovery_disable_quota() to work. + */ + OCFS2_REC_QUOTA_DISABLED, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work + */ + OCFS2_REC_DISABLED, +}; + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -370,7 +385,7 @@ struct ocfs2_super struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; - int disable_recovery; + enum ocfs2_recovery_state recovery_state; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2956d888c13145..de7f12858729ac 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -453,8 +453,7 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( /* Sync changes in local quota file into global quota file and * reinitialize local quota file. - * The function expects local quota file to be already locked and - * s_umount locked in shared mode. */ + * The function expects local quota file to be already locked. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, { unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; - struct super_block *sb = osb->sb; struct ocfs2_local_disk_dqinfo *ldinfo; struct buffer_head *bh; handle_t *handle; @@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -677,8 +674,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, break; } out: - up_read(&sb->s_umount); - kfree(rec); + ocfs2_free_quota_recovery(rec); return status; } @@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); /* - * s_umount held in exclusive mode protects us against racing with - * recovery thread... + * ocfs2_dismount_volume() has already aborted quota recovery... */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f7b483f0de2add..6ac4dcd54588cf 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, ac, cl); - if (PTR_ERR(bg_bh) == -ENOSPC) + if (PTR_ERR(bg_bh) == -ENOSPC) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; bg_bh = ocfs2_block_group_alloc_discontig(handle, alloc_inode, ac, cl); + } if (IS_ERR(bg_bh)) { status = PTR_ERR(bg_bh); bg_bh = NULL; @@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; + u32 contig_bits; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, - bits_wanted, min_bits, - ac->ac_max_block, - res)) == -ENOSPC) { + while (1) { + if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) { + contig_bits = le16_to_cpu(bg->bg_contig_free_bits); + if (!contig_bits) + contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, + le16_to_cpu(bg->bg_bits), 0); + if (bits_wanted > contig_bits && contig_bits >= min_bits) + bits_wanted = contig_bits; + } + + status = ac->ac_group_search(alloc_inode, group_bh, + bits_wanted, min_bits, + ac->ac_max_block, res); + if (status != -ENOSPC) + break; if (!bg->bg_next_group) break; @@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; +search: status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); if (!status) { @@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, } } + /* Chains can't supply the bits_wanted contiguous space. + * We should switch to using every single bit when allocating + * from the global bitmap. */ + if (i == le16_to_cpu(cl->cl_next_free_rec) && + status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; + ac->ac_chain = victim; + goto search; + } + set_hint: if (status != -ENOSPC) { /* If the next search of this group is not likely to @@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle, BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL - && ac->ac_which != OCFS2_AC_USE_MAIN); + && ac->ac_which != OCFS2_AC_USE_MAIN + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { WARN_ON(min_clusters > 1); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b481b834857d33..bcf2ed4a86310b 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -29,6 +29,7 @@ struct ocfs2_alloc_context { #define OCFS2_AC_USE_MAIN 2 #define OCFS2_AC_USE_INODE 3 #define OCFS2_AC_USE_META 4 +#define OCFS2_AC_USE_MAIN_DISCONTIG 5 u32 ac_which; /* these are used by the chain search */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8bb5022f30824b..3d2533950bae20 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); + /* Stop quota recovery so that we can disable quotas */ + ocfs2_recovery_disable_quota(osb); + ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 5ac743c6bc2ed5..08a6f372a352f8 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -32,12 +32,13 @@ static int orangefs_writepage_locked(struct folio *folio, len = i_size_read(inode); if (folio->private) { wr = folio->private; - WARN_ON(wr->pos >= len); off = wr->pos; - if (off + wr->len > len) + if ((off + wr->len > len) && (off <= len)) wlen = len - off; else wlen = wr->len; + if (wlen == 0) + wlen = wr->len; } else { WARN_ON(1); off = folio_pos(folio); @@ -46,8 +47,6 @@ static int orangefs_writepage_locked(struct folio *folio, if (wlen > len - off) wlen = len - off; } - /* Should've been handled in orangefs_invalidate_folio. */ - WARN_ON(off == len || off + wlen > len); WARN_ON(wlen == 0); bvec_set_folio(&bv, folio, wlen, offset_in_folio(folio, off)); @@ -320,6 +319,8 @@ static int orangefs_write_begin(struct file *file, wr->len += len; goto okay; } else { + wr->pos = pos; + wr->len = len; ret = orangefs_launder_folio(folio); if (ret) return ret; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6f2f8f4cfbbc17..aef942a758cea5 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -541,8 +541,6 @@ int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); int ovl_ensure_verity_loaded(struct path *path); -int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path, - u8 *digest_buf, int *buf_length); int ovl_validate_verity(struct ovl_fs *ofs, struct path *metapath, struct path *datapath); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b63474d1b06476..e19940d649ca3a 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1138,6 +1138,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, return ERR_PTR(-EINVAL); } + if (ctx->nr == ctx->nr_data) { + pr_err("at least one non-data lowerdir is required\n"); + return ERR_PTR(-EINVAL); + } + err = -EINVAL; for (i = 0; i < ctx->nr; i++) { l = &ctx->lower[i]; diff --git a/fs/pidfs.c b/fs/pidfs.c index d64a4cbeb0dafa..87a53d2ae4bb78 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -336,7 +336,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; - if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + if (kinfo.pid == 0 || kinfo.tgid == 0) return -ESRCH; copy_out: @@ -888,6 +888,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) return ERR_PTR(-ESRCH); flags &= ~PIDFD_CLONE; + flags |= O_RDWR; pidfd_file = dentry_open(&path, flags, current_cred()); /* Raise PIDFD_THREAD explicitly as do_dentry_open() strips it. */ if (!IS_ERR(pidfd_file)) diff --git a/fs/pnode.c b/fs/pnode.c index 7a062a5de10e36..ffd429b760d5d4 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -150,7 +150,7 @@ static struct mount *propagation_next(struct mount *m, struct mount *origin) { /* are there any slaves of this mount? */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); while (1) { @@ -174,7 +174,7 @@ static struct mount *skip_propagation_subtree(struct mount *m, * Advance m such that propagation_next will not return * the slaves of m. */ - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) m = last_slave(m); return m; @@ -185,7 +185,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin) while (1) { while (1) { struct mount *next; - if (!IS_MNT_PROPAGATED(m) && !list_empty(&m->mnt_slave_list)) + if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) return first_slave(m); next = next_peer(m); if (m->mnt_group_id == origin->mnt_group_id) { @@ -226,11 +226,15 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp) struct mount *child; int type; /* skip ones added by this propagate_mnt() */ - if (IS_MNT_PROPAGATED(m)) + if (IS_MNT_NEW(m)) return 0; - /* skip if mountpoint isn't covered by it */ + /* skip if mountpoint isn't visible in m */ if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) return 0; + /* skip if m is in the anon_ns */ + if (is_anon_ns(m->mnt_ns)) + return 0; + if (peers(m, last_dest)) { type = CL_MAKE_SHARED; } else { @@ -380,9 +384,6 @@ bool propagation_would_overmount(const struct mount *from, if (!IS_MNT_SHARED(from)) return false; - if (IS_MNT_PROPAGATED(to)) - return false; - if (to->mnt.mnt_root != mp->m_dentry) return false; diff --git a/fs/pnode.h b/fs/pnode.h index ddafe0d087ca0a..34b6247af01d92 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -12,7 +12,7 @@ #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) -#define IS_MNT_PROPAGATED(m) (!(m)->mnt_ns || ((m)->mnt_ns->mntns_flags & MNTNS_PROPAGATING)) +#define IS_MNT_NEW(m) (!(m)->mnt_ns) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) diff --git a/fs/select.c b/fs/select.c index 7da531b1cf6bec..0eaf3522abe9a2 100644 --- a/fs/select.c +++ b/fs/select.c @@ -857,7 +857,7 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait, int fd = pollfd->fd; __poll_t mask, filter; - if (fd < 0) + if (unlikely(fd < 0)) return 0; CLASS(fd, f)(fd); diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index fe738623cf1ba9..240d82c6f90806 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -29,7 +29,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, { struct cached_fid *cfid; - spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { if (!strcmp(cfid->path, path)) { /* @@ -38,25 +37,20 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * being deleted due to a lease break. */ if (!cfid->time || !cfid->has_lease) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } kref_get(&cfid->refcount); - spin_unlock(&cfids->cfid_list_lock); return cfid; } } if (lookup_only) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } if (cfids->num_entries >= max_cached_dirs) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid = init_cached_dir(path); if (cfid == NULL) { - spin_unlock(&cfids->cfid_list_lock); return NULL; } cfid->cfids = cfids; @@ -74,7 +68,6 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, */ cfid->has_lease = true; - spin_unlock(&cfids->cfid_list_lock); return cfid; } @@ -187,8 +180,10 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, if (!utf16_path) return -ENOMEM; + spin_lock(&cfids->cfid_list_lock); cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs); if (cfid == NULL) { + spin_unlock(&cfids->cfid_list_lock); kfree(utf16_path); return -ENOENT; } @@ -197,7 +192,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - spin_lock(&cfids->cfid_list_lock); if (cfid->has_lease && cfid->time) { spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index e69968e88fe724..35892df7335c75 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -704,18 +704,12 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server) cifs_free_hash(&server->secmech.md5); cifs_free_hash(&server->secmech.sha512); - if (!SERVER_IS_CHAN(server)) { - if (server->secmech.enc) { - crypto_free_aead(server->secmech.enc); - server->secmech.enc = NULL; - } - - if (server->secmech.dec) { - crypto_free_aead(server->secmech.dec); - server->secmech.dec = NULL; - } - } else { + if (server->secmech.enc) { + crypto_free_aead(server->secmech.enc); server->secmech.enc = NULL; + } + if (server->secmech.dec) { + crypto_free_aead(server->secmech.dec); server->secmech.dec = NULL; } } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 07c4688ec4c99e..3b32116b0b4964 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -625,10 +625,8 @@ struct smb_version_operations { bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); - int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data); + struct reparse_data_buffer * (*get_reparse_point_buffer)(const struct kvec *rsp_iov, + u32 *plen); int (*create_reparse_symlink)(const unsigned int xid, struct inode *inode, struct dentry *dentry, diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index 48d0d6f439cf45..1b79fe07476f65 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -1266,10 +1266,9 @@ typedef struct smb_com_query_information_rsp { typedef struct smb_com_setattr_req { struct smb_hdr hdr; /* wct = 8 */ __le16 attr; - __le16 time_low; - __le16 time_high; + __le32 last_write_time; __le16 reserved[5]; /* must be zero */ - __u16 ByteCount; + __le16 ByteCount; __u8 BufferFormat; /* 4 = ASCII */ unsigned char fileName[]; } __attribute__((packed)) SETATTR_REQ; @@ -2256,6 +2255,8 @@ typedef struct { #define FILE_SUPPORTS_ENCRYPTION 0x00020000 #define FILE_SUPPORTS_OBJECT_IDS 0x00010000 #define FILE_VOLUME_IS_COMPRESSED 0x00008000 +#define FILE_SUPPORTS_POSIX_UNLINK_RENAME 0x00000400 +#define FILE_RETURNS_CLEANUP_RESULT_INFO 0x00000200 #define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100 #define FILE_SUPPORTS_REPARSE_POINTS 0x00000080 #define FILE_SUPPORTS_SPARSE_FILES 0x00000040 diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index cfcc07905bdf1b..66093fa78aed7d 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -151,8 +151,7 @@ extern bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 eof, bool from_readdir); extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); -void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, - bool was_async); +void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, @@ -163,6 +162,8 @@ extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, struct cifsFileInfo **ret_file); +extern int cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode, + struct file *file); extern unsigned int smbCalcSize(void *buf); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); @@ -393,6 +394,10 @@ extern int CIFSSMBQFSUnixInfo(const unsigned int xid, struct cifs_tcon *tcon); extern int CIFSSMBQFSPosixInfo(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *FSData); +extern int SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb); extern int CIFSSMBSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 60cb264a01e511..a3ba3346ed313f 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1725,7 +1725,7 @@ cifs_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - cifs_write_subrequest_terminated(wdata, result, true); + cifs_write_subrequest_terminated(wdata, result); release_mid(mid); trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0, server->credits, server->in_flight, @@ -1813,7 +1813,7 @@ cifs_async_writev(struct cifs_io_subrequest *wdata) out: if (rc) { add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, false); + cifs_write_subrequest_terminated(wdata, rc); } } @@ -2753,10 +2753,10 @@ int cifs_query_reparse_point(const unsigned int xid, io_req->TotalParameterCount = 0; io_req->TotalDataCount = 0; - io_req->MaxParameterCount = cpu_to_le32(2); + io_req->MaxParameterCount = cpu_to_le32(0); /* BB find exact data count max from sess structure BB */ io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00); - io_req->MaxSetupCount = 4; + io_req->MaxSetupCount = 1; io_req->Reserved = 0; io_req->ParameterOffset = 0; io_req->DataCount = 0; @@ -2783,6 +2783,22 @@ int cifs_query_reparse_point(const unsigned int xid, goto error; } + /* SetupCount must be 1, otherwise offset to ByteCount is incorrect. */ + if (io_rsp->SetupCount != 1) { + rc = -EIO; + goto error; + } + + /* + * ReturnedDataLen is output length of executed IOCTL. + * DataCount is output length transferred over network. + * Check that we have full FSCTL_GET_REPARSE_POINT buffer. + */ + if (data_count != le16_to_cpu(io_rsp->ReturnedDataLen)) { + rc = -EIO; + goto error; + } + end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount; start = (__u8 *)&io_rsp->hdr.Protocol + data_offset; if (start >= end) { @@ -5171,6 +5187,63 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +int +SMBSetInformation(const unsigned int xid, struct cifs_tcon *tcon, + const char *fileName, __le32 attributes, __le64 write_time, + const struct nls_table *nls_codepage, + struct cifs_sb_info *cifs_sb) +{ + SETATTR_REQ *pSMB; + SETATTR_RSP *pSMBr; + struct timespec64 ts; + int bytes_returned; + int name_len; + int rc; + + cifs_dbg(FYI, "In %s path %s\n", __func__, fileName); + +retry: + rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; + + if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { + name_len = + cifsConvertToUTF16((__le16 *) pSMB->fileName, + fileName, PATH_MAX, nls_codepage, + cifs_remap(cifs_sb)); + name_len++; /* trailing null */ + name_len *= 2; + } else { + name_len = copy_path_name(pSMB->fileName, fileName); + } + /* Only few attributes can be set by this command, others are not accepted by Win9x. */ + pSMB->attr = cpu_to_le16(le32_to_cpu(attributes) & + (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM | ATTR_ARCHIVE)); + /* Zero write time value (in both NT and SETATTR formats) means to not change it. */ + if (le64_to_cpu(write_time) != 0) { + ts = cifs_NTtimeToUnix(write_time); + pSMB->last_write_time = cpu_to_le32(ts.tv_sec); + } + pSMB->BufferFormat = 0x04; + name_len++; /* account for buffer type byte */ + inc_rfc1001_len(pSMB, (__u16)name_len); + pSMB->ByteCount = cpu_to_le16(name_len); + + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned, 0); + if (rc) + cifs_dbg(FYI, "Send error in %s = %d\n", __func__, rc); + + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto retry; + + return rc; +} + /* Some legacy servers such as NT4 require that the file times be set on an open handle, rather than by pathname - this is awkward due to potential access conflicts on the open, but it is unavoidable for these diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index f298e86a3c1fdb..6bf04d9a549138 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -300,7 +300,6 @@ cifs_abort_connection(struct TCP_Server_Info *server) server->ssocket->flags); sock_release(server->ssocket); server->ssocket = NULL; - put_net(cifs_net_ns(server)); } server->sequence_number = 0; server->session_estab = false; @@ -1074,13 +1073,9 @@ clean_demultiplex_info(struct TCP_Server_Info *server) msleep(125); if (cifs_rdma_enabled(server)) smbd_destroy(server); - if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; - - /* Release netns reference for the socket. */ - put_net(cifs_net_ns(server)); } if (!list_empty(&server->pending_mid_q)) { @@ -1128,7 +1123,6 @@ clean_demultiplex_info(struct TCP_Server_Info *server) */ } - /* Release netns reference for this server. */ put_net(cifs_net_ns(server)); kfree(server->leaf_fullpath); kfree(server->hostname); @@ -1774,8 +1768,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, tcp_ses->ops = ctx->ops; tcp_ses->vals = ctx->vals; - - /* Grab netns reference for this server. */ cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); tcp_ses->sign = ctx->sign; @@ -1903,7 +1895,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, out_err_crypto_release: cifs_crypto_secmech_release(tcp_ses); - /* Release netns reference for this server. */ put_net(cifs_net_ns(tcp_ses)); out_err: @@ -1912,10 +1903,8 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, cifs_put_tcp_session(tcp_ses->primary_server, false); kfree(tcp_ses->hostname); kfree(tcp_ses->leaf_fullpath); - if (tcp_ses->ssocket) { + if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); - put_net(cifs_net_ns(tcp_ses)); - } kfree(tcp_ses); } return ERR_PTR(rc); @@ -2556,6 +2545,8 @@ static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) return 0; if (tcon->nodelete != ctx->nodelete) return 0; + if (tcon->posix_extensions != ctx->linux_ext) + return 0; return 1; } @@ -3357,24 +3348,20 @@ generic_ip_connect(struct TCP_Server_Info *server) socket = server->ssocket; } else { struct net *net = cifs_net_ns(server); + struct sock *sk; - rc = sock_create_kern(net, sfamily, SOCK_STREAM, IPPROTO_TCP, &server->ssocket); + rc = __sock_create(net, sfamily, SOCK_STREAM, + IPPROTO_TCP, &server->ssocket, 1); if (rc < 0) { cifs_server_dbg(VFS, "Error %d creating socket\n", rc); return rc; } - /* - * Grab netns reference for the socket. - * - * This reference will be released in several situations: - * - In the failure path before the cifsd thread is started. - * - In the all place where server->socket is released, it is - * also set to NULL. - * - Ultimately in clean_demultiplex_info(), during the final - * teardown. - */ - get_net(net); + sk = server->ssocket->sk; + __netns_tracker_free(net, &sk->ns_tracker, false); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); /* BB other socket options to set KEEPALIVE, NODELAY? */ cifs_dbg(FYI, "Socket created\n"); @@ -3426,7 +3413,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); trace_smb3_connect_err(server->hostname, server->conn_id, &server->dstaddr, rc); - put_net(cifs_net_ns(server)); sock_release(socket); server->ssocket = NULL; return rc; @@ -3767,28 +3753,7 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) } } - /* - * Clamp the rsize/wsize mount arguments if they are too big for the server - * and set the rsize/wsize to the negotiated values if not passed in by - * the user on mount - */ - if ((cifs_sb->ctx->wsize == 0) || - (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { - cifs_sb->ctx->wsize = - round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); - /* - * in the very unlikely event that the server sent a max write size under PAGE_SIZE, - * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 - */ - if (cifs_sb->ctx->wsize == 0) { - cifs_sb->ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); - } - } - if ((cifs_sb->ctx->rsize == 0) || - (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) - cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); - + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); /* * The cookie is initialized from volume info returned above. * Inside cifs_fscache_get_super_cookie it checks diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 8407fb1086643c..9835672267d277 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -130,7 +130,7 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq) else trace_netfs_sreq(subreq, netfs_sreq_trace_fail); add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, false); + cifs_write_subrequest_terminated(wdata, rc); goto out; } @@ -160,10 +160,10 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tlink_tcon(req->cfile->tlink), - cifs_sb->ctx); + if (cifs_sb->ctx->rsize == 0) { + cifs_negotiate_rsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); + } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); @@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq) goto failed; } - if (subreq->rreq->origin != NETFS_DIO_READ) + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); @@ -998,15 +999,23 @@ int cifs_open(struct inode *inode, struct file *file) rc = cifs_get_readable_path(tcon, full_path, &cfile); } if (rc == 0) { - if (file->f_flags == cfile->f_flags) { + unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + + if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && + (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { file->private_data = cfile; spin_lock(&CIFS_I(inode)->deferred_lock); cifs_del_deferred_close(cfile); spin_unlock(&CIFS_I(inode)->deferred_lock); goto use_cache; - } else { - _cifsFileInfo_put(cfile, true, false); } + _cifsFileInfo_put(cfile, true, false); + } else { + /* hard link on the defeered close file */ + rc = cifs_get_hardlink_path(tcon, inode, file); + if (rc) + cifs_close_deferred_file(CIFS_I(inode)); } if (server->oplocks) @@ -2071,6 +2080,29 @@ cifs_move_llist(struct list_head *source, struct list_head *dest) list_move(li, dest); } +int +cifs_get_hardlink_path(struct cifs_tcon *tcon, struct inode *inode, + struct file *file) +{ + struct cifsFileInfo *open_file = NULL; + struct cifsInodeInfo *cinode = CIFS_I(inode); + int rc = 0; + + spin_lock(&tcon->open_file_lock); + spin_lock(&cinode->open_file_lock); + + list_for_each_entry(open_file, &cinode->openFileList, flist) { + if (file->f_flags == open_file->f_flags) { + rc = -EINVAL; + break; + } + } + + spin_unlock(&cinode->open_file_lock); + spin_unlock(&tcon->open_file_lock); + return rc; +} + void cifs_free_llist(struct list_head *llist) { @@ -2395,8 +2427,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) return rc; } -void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, - bool was_async) +void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result) { struct netfs_io_request *wreq = wdata->rreq; struct netfs_inode *ictx = netfs_inode(wreq->inode); @@ -2413,7 +2444,7 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t netfs_resize_file(ictx, wrend, true); } - netfs_write_subrequest_terminated(&wdata->subreq, result, was_async); + netfs_write_subrequest_terminated(&wdata->subreq, result); } struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 2980941b96679c..a634a34d4086a0 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1021,6 +1021,7 @@ static int smb3_reconfigure(struct fs_context *fc) struct dentry *root = fc->root; struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); struct cifs_ses *ses = cifs_sb_master_tcon(cifs_sb)->ses; + unsigned int rsize = ctx->rsize, wsize = ctx->wsize; char *new_password = NULL, *new_password2 = NULL; bool need_recon = false; int rc; @@ -1103,11 +1104,8 @@ static int smb3_reconfigure(struct fs_context *fc) STEAL_STRING(cifs_sb, ctx, iocharset); /* if rsize or wsize not passed in on remount, use previous values */ - if (ctx->rsize == 0) - ctx->rsize = cifs_sb->ctx->rsize; - if (ctx->wsize == 0) - ctx->wsize = cifs_sb->ctx->wsize; - + ctx->rsize = rsize ? CIFS_ALIGN_RSIZE(fc, rsize) : cifs_sb->ctx->rsize; + ctx->wsize = wsize ? CIFS_ALIGN_WSIZE(fc, wsize) : cifs_sb->ctx->wsize; smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); @@ -1312,7 +1310,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, __func__); goto cifs_parse_mount_err; } - ctx->bsize = result.uint_32; + ctx->bsize = CIFS_ALIGN_BSIZE(fc, result.uint_32); ctx->got_bsize = true; break; case Opt_rasize: @@ -1336,24 +1334,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->rasize = result.uint_32; break; case Opt_rsize: - ctx->rsize = result.uint_32; + ctx->rsize = CIFS_ALIGN_RSIZE(fc, result.uint_32); ctx->got_rsize = true; ctx->vol_rsize = ctx->rsize; break; case Opt_wsize: - ctx->wsize = result.uint_32; + ctx->wsize = CIFS_ALIGN_WSIZE(fc, result.uint_32); ctx->got_wsize = true; - if (ctx->wsize % PAGE_SIZE != 0) { - ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); - if (ctx->wsize == 0) { - ctx->wsize = PAGE_SIZE; - cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); - } else { - cifs_dbg(VFS, - "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", - ctx->wsize, PAGE_SIZE); - } - } ctx->vol_wsize = ctx->wsize; break; case Opt_acregmax: diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index d1d29249bcdb97..9e83302ce4b801 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -20,6 +20,21 @@ cifs_dbg(VFS, fmt, ## __VA_ARGS__); \ } while (0) +static inline size_t cifs_io_align(struct fs_context *fc, + const char *name, size_t size) +{ + if (!size || !IS_ALIGNED(size, PAGE_SIZE)) { + cifs_errorf(fc, "unaligned %s, making it a multiple of %lu bytes\n", + name, PAGE_SIZE); + size = umax(round_down(size, PAGE_SIZE), PAGE_SIZE); + } + return size; +} + +#define CIFS_ALIGN_WSIZE(_fc, _size) cifs_io_align(_fc, "wsize", _size) +#define CIFS_ALIGN_RSIZE(_fc, _size) cifs_io_align(_fc, "rsize", _size) +#define CIFS_ALIGN_BSIZE(_fc, _size) cifs_io_align(_fc, "bsize", _size) + enum smb_version { Smb_1 = 1, Smb_20, @@ -361,4 +376,36 @@ static inline void cifs_mount_unlock(void) mutex_unlock(&cifs_mount_mutex); } +static inline void cifs_negotiate_rsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_rsize(tcon, ctx), PAGE_SIZE); + if (ctx->rsize) + size = umax(umin(ctx->rsize, size), PAGE_SIZE); + ctx->rsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_wsize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + unsigned int size; + + size = umax(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); + if (ctx->wsize) + size = umax(umin(ctx->wsize, size), PAGE_SIZE); + ctx->wsize = round_down(size, PAGE_SIZE); +} + +static inline void cifs_negotiate_iosize(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + struct cifs_tcon *tcon) +{ + cifs_negotiate_rsize(server, ctx, tcon); + cifs_negotiate_wsize(server, ctx, tcon); +} + #endif diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index a00a9d91d0da3c..75be4b46bc6f18 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1203,18 +1203,17 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, goto out; } break; - case IO_REPARSE_TAG_MOUNT_POINT: - cifs_create_junction_fattr(fattr, sb); - rc = 0; - goto out; default: /* Check for cached reparse point data */ if (data->symlink_target || data->reparse.buf) { rc = 0; - } else if (iov && server->ops->parse_reparse_point) { - rc = server->ops->parse_reparse_point(cifs_sb, - full_path, - iov, data); + } else if (iov && server->ops->get_reparse_point_buffer) { + struct reparse_data_buffer *reparse_buf; + u32 reparse_len; + + reparse_buf = server->ops->get_reparse_point_buffer(iov, &reparse_len); + rc = parse_reparse_point(reparse_buf, reparse_len, + cifs_sb, full_path, data); /* * If the reparse point was not handled but it is the * name surrogate which points to directory, then treat @@ -1228,6 +1227,16 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, cifs_create_junction_fattr(fattr, sb); goto out; } + /* + * If the reparse point is unsupported by the Linux SMB + * client then let it process by the SMB server. So mask + * the -EOPNOTSUPP error code. This will allow Linux SMB + * client to send SMB OPEN request to server. If server + * does not support this reparse point too then server + * will return error during open the path. + */ + if (rc == -EOPNOTSUPP) + rc = 0; } if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) { diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 50f96259d9adc2..787d6bcb5d1dc4 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -733,7 +733,10 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, else cifs_buf_release(cfile->srch_inf. ntwrk_buf_start); + /* Reset all pointers to the network buffer to prevent stale references */ cfile->srch_inf.ntwrk_buf_start = NULL; + cfile->srch_inf.srch_entries_start = NULL; + cfile->srch_inf.last_entry = NULL; } rc = initiate_cifs_search(xid, file, full_path); if (rc) { @@ -756,11 +759,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, rc = server->ops->query_dir_next(xid, tcon, &cfile->fid, search_flags, &cfile->srch_inf); + if (rc) + return -ENOENT; /* FindFirst/Next set last_entry to NULL on malformed reply */ if (cfile->srch_inf.last_entry) cifs_save_resume_key(cfile->srch_inf.last_entry, cfile); - if (rc) - return -ENOENT; } if (index_to_find < cfile->srch_inf.index_of_last_entry) { /* we found the buffer that contains the entry */ diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 2b9e9885dc4258..bb25e77c5540c2 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -542,12 +542,12 @@ static int wsl_set_reparse_buf(struct reparse_data_buffer **buf, kfree(symname_utf16); return -ENOMEM; } - /* Flag 0x02000000 is unknown, but all wsl symlinks have this value */ - symlink_buf->Flags = cpu_to_le32(0x02000000); - /* PathBuffer is in UTF-8 but without trailing null-term byte */ + /* Version field must be set to 2 (MS-FSCC 2.1.2.7) */ + symlink_buf->Version = cpu_to_le32(2); + /* Target for Version 2 is in UTF-8 but without trailing null-term byte */ symname_utf8_len = utf16s_to_utf8s((wchar_t *)symname_utf16, symname_utf16_len/2, UTF16_LITTLE_ENDIAN, - symlink_buf->PathBuffer, + symlink_buf->Target, symname_utf8_maxlen); *buf = (struct reparse_data_buffer *)symlink_buf; buf_len = sizeof(struct reparse_wsl_symlink_data_buffer) + symname_utf8_len; @@ -1016,29 +1016,36 @@ static int parse_reparse_wsl_symlink(struct reparse_wsl_symlink_data_buffer *buf struct cifs_open_info_data *data) { int len = le16_to_cpu(buf->ReparseDataLength); + int data_offset = offsetof(typeof(*buf), Target) - offsetof(typeof(*buf), Version); int symname_utf8_len; __le16 *symname_utf16; int symname_utf16_len; - if (len <= sizeof(buf->Flags)) { + if (len <= data_offset) { cifs_dbg(VFS, "srv returned malformed wsl symlink buffer\n"); return -EIO; } - /* PathBuffer is in UTF-8 but without trailing null-term byte */ - symname_utf8_len = len - sizeof(buf->Flags); + /* MS-FSCC 2.1.2.7 defines layout of the Target field only for Version 2. */ + if (le32_to_cpu(buf->Version) != 2) { + cifs_dbg(VFS, "srv returned unsupported wsl symlink version %u\n", le32_to_cpu(buf->Version)); + return -EIO; + } + + /* Target for Version 2 is in UTF-8 but without trailing null-term byte */ + symname_utf8_len = len - data_offset; /* * Check that buffer does not contain null byte * because Linux cannot process symlink with null byte. */ - if (strnlen(buf->PathBuffer, symname_utf8_len) != symname_utf8_len) { + if (strnlen(buf->Target, symname_utf8_len) != symname_utf8_len) { cifs_dbg(VFS, "srv returned null byte in wsl symlink target location\n"); return -EIO; } symname_utf16 = kzalloc(symname_utf8_len * 2, GFP_KERNEL); if (!symname_utf16) return -ENOMEM; - symname_utf16_len = utf8s_to_utf16s(buf->PathBuffer, symname_utf8_len, + symname_utf16_len = utf8s_to_utf16s(buf->Target, symname_utf8_len, UTF16_LITTLE_ENDIAN, (wchar_t *) symname_utf16, symname_utf8_len * 2); if (symname_utf16_len < 0) { @@ -1062,8 +1069,6 @@ int parse_reparse_point(struct reparse_data_buffer *buf, const char *full_path, struct cifs_open_info_data *data) { - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - data->reparse.buf = buf; /* See MS-FSCC 2.1.2 */ @@ -1090,24 +1095,17 @@ int parse_reparse_point(struct reparse_data_buffer *buf, } return 0; default: - cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n", - le32_to_cpu(buf->ReparseTag)); return -EOPNOTSUPP; } } -int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data) +struct reparse_data_buffer *smb2_get_reparse_point_buffer(const struct kvec *rsp_iov, + u32 *plen) { - struct reparse_data_buffer *buf; struct smb2_ioctl_rsp *io = rsp_iov->iov_base; - u32 plen = le32_to_cpu(io->OutputCount); - - buf = (struct reparse_data_buffer *)((u8 *)io + - le32_to_cpu(io->OutputOffset)); - return parse_reparse_point(buf, plen, cifs_sb, full_path, data); + *plen = le32_to_cpu(io->OutputCount); + return (struct reparse_data_buffer *)((u8 *)io + + le32_to_cpu(io->OutputOffset)); } static bool wsl_to_fattr(struct cifs_open_info_data *data, @@ -1233,16 +1231,6 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, bool ok; switch (tag) { - case IO_REPARSE_TAG_INTERNAL: - if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) - return false; - fallthrough; - case IO_REPARSE_TAG_DFS: - case IO_REPARSE_TAG_DFSR: - case IO_REPARSE_TAG_MOUNT_POINT: - /* See cifs_create_junction_fattr() */ - fattr->cf_mode = S_IFDIR | 0711; - break; case IO_REPARSE_TAG_LX_SYMLINK: case IO_REPARSE_TAG_LX_FIFO: case IO_REPARSE_TAG_AF_UNIX: @@ -1262,7 +1250,14 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, fattr->cf_mode |= S_IFLNK; break; default: - return false; + if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) + return false; + if (!IS_REPARSE_TAG_NAME_SURROGATE(tag) && + tag != IO_REPARSE_TAG_INTERNAL) + return false; + /* See cifs_create_junction_fattr() */ + fattr->cf_mode = S_IFDIR | 0711; + break; } fattr->cf_dtype = S_DT(fattr->cf_mode); diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index c0be5ab45a78a0..08de853b36a8a9 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -135,9 +135,6 @@ int smb2_create_reparse_symlink(const unsigned int xid, struct inode *inode, int smb2_mknod_reparse(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t dev); -int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data); +struct reparse_data_buffer *smb2_get_reparse_point_buffer(const struct kvec *rsp_iov, u32 *len); #endif /* _CIFS_REPARSE_H */ diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index f2ca5963cd9d24..b3fa9ee2691272 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -680,6 +680,22 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp) *pbcc_area = bcc_ptr; } +static void +ascii_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp) +{ + char *bcc_ptr = *pbcc_area; + + strcpy(bcc_ptr, "Linux version "); + bcc_ptr += strlen("Linux version "); + strcpy(bcc_ptr, init_utsname()->release); + bcc_ptr += strlen(init_utsname()->release) + 1; + + strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); + bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; + + *pbcc_area = bcc_ptr; +} + static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, const struct nls_table *nls_cp) { @@ -704,6 +720,25 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, *pbcc_area = bcc_ptr; } +static void ascii_domain_string(char **pbcc_area, struct cifs_ses *ses, + const struct nls_table *nls_cp) +{ + char *bcc_ptr = *pbcc_area; + int len; + + /* copy domain */ + if (ses->domainName != NULL) { + len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + if (WARN_ON_ONCE(len < 0)) + len = CIFS_MAX_DOMAINNAME_LEN - 1; + bcc_ptr += len; + } /* else we send a null domain name so server will default to its own domain */ + *bcc_ptr = 0; + bcc_ptr++; + + *pbcc_area = bcc_ptr; +} + static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, const struct nls_table *nls_cp) { @@ -749,25 +784,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, *bcc_ptr = 0; bcc_ptr++; /* account for null termination */ - /* copy domain */ - if (ses->domainName != NULL) { - len = strscpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); - if (WARN_ON_ONCE(len < 0)) - len = CIFS_MAX_DOMAINNAME_LEN - 1; - bcc_ptr += len; - } /* else we send a null domain name so server will default to its own domain */ - *bcc_ptr = 0; - bcc_ptr++; - /* BB check for overflow here */ - strcpy(bcc_ptr, "Linux version "); - bcc_ptr += strlen("Linux version "); - strcpy(bcc_ptr, init_utsname()->release); - bcc_ptr += strlen(init_utsname()->release) + 1; - - strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); - bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1; + ascii_domain_string(&bcc_ptr, ses, nls_cp); + ascii_oslm_strings(&bcc_ptr, nls_cp); *pbcc_area = bcc_ptr; } @@ -1570,7 +1590,7 @@ sess_auth_kerberos(struct sess_data *sess_data) sess_data->iov[1].iov_len = msg->secblob_len; pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len); - if (ses->capabilities & CAP_UNICODE) { + if (pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) { /* unicode strings must be word aligned */ if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) { *bcc_ptr = 0; @@ -1579,8 +1599,8 @@ sess_auth_kerberos(struct sess_data *sess_data) unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp); unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp); } else { - /* BB: is this right? */ - ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp); + ascii_oslm_strings(&bcc_ptr, sess_data->nls_cp); + ascii_domain_string(&bcc_ptr, ses, sess_data->nls_cp); } sess_data->iov[2].iov_len = (long) bcc_ptr - diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 26df807fbe7a9a..b27a182629ece9 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -432,7 +432,7 @@ cifs_negotiate(const unsigned int xid, } static unsigned int -cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -467,7 +467,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) } static unsigned int -cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) +smb1_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; @@ -543,24 +543,104 @@ static int cifs_query_path_info(const unsigned int xid, const char *full_path, struct cifs_open_info_data *data) { - int rc; + int rc = -EOPNOTSUPP; FILE_ALL_INFO fi = {}; + struct cifs_search_info search_info = {}; + bool non_unicode_wildcard = false; data->reparse_point = false; data->adjust_tz = false; - /* could do find first instead but this returns more info */ - rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, cifs_sb->local_nls, - cifs_remap(cifs_sb)); /* - * BB optimize code so we do not make the above call when server claims - * no NT SMB support and the above call failed at least once - set flag - * in tcon or mount. + * First try CIFSSMBQPathInfo() function which returns more info + * (NumberOfLinks) than CIFSFindFirst() fallback function. + * Some servers like Win9x do not support SMB_QUERY_FILE_ALL_INFO over + * TRANS2_QUERY_PATH_INFORMATION, but supports it with filehandle over + * TRANS2_QUERY_FILE_INFORMATION (function CIFSSMBQFileInfo(). But SMB + * Open command on non-NT servers works only for files, does not work + * for directories. And moreover Win9x SMB server returns bogus data in + * SMB_QUERY_FILE_ALL_INFO Attributes field. So for non-NT servers, + * do not even use CIFSSMBQPathInfo() or CIFSSMBQFileInfo() function. + */ + if (tcon->ses->capabilities & CAP_NT_SMBS) + rc = CIFSSMBQPathInfo(xid, tcon, full_path, &fi, 0 /* not legacy */, + cifs_sb->local_nls, cifs_remap(cifs_sb)); + + /* + * Non-UNICODE variant of fallback functions below expands wildcards, + * so they cannot be used for querying paths with wildcard characters. + */ + if (rc && !(tcon->ses->capabilities & CAP_UNICODE) && strpbrk(full_path, "*?\"><")) + non_unicode_wildcard = true; + + /* + * Then fallback to CIFSFindFirst() which works also with non-NT servers + * but does not does not provide NumberOfLinks. + */ + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && + !non_unicode_wildcard) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) + search_info.info_level = SMB_FIND_FILE_INFO_STANDARD; + else + search_info.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; + rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb, NULL, + CIFS_SEARCH_CLOSE_ALWAYS | CIFS_SEARCH_CLOSE_AT_END, + &search_info, false); + if (rc == 0) { + if (!(tcon->ses->capabilities & tcon->ses->server->vals->cap_nt_find)) { + FIND_FILE_STANDARD_INFO *di; + int offset = tcon->ses->server->timeAdj; + + di = (FIND_FILE_STANDARD_INFO *)search_info.srch_entries_start; + fi.CreationTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->CreationDate, di->CreationTime, offset))); + fi.LastAccessTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastAccessDate, di->LastAccessTime, offset))); + fi.LastWriteTime = cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm( + di->LastWriteDate, di->LastWriteTime, offset))); + fi.ChangeTime = fi.LastWriteTime; + fi.Attributes = cpu_to_le32(le16_to_cpu(di->Attributes)); + fi.AllocationSize = cpu_to_le64(le32_to_cpu(di->AllocationSize)); + fi.EndOfFile = cpu_to_le64(le32_to_cpu(di->DataSize)); + } else { + FILE_FULL_DIRECTORY_INFO *di; + + di = (FILE_FULL_DIRECTORY_INFO *)search_info.srch_entries_start; + fi.CreationTime = di->CreationTime; + fi.LastAccessTime = di->LastAccessTime; + fi.LastWriteTime = di->LastWriteTime; + fi.ChangeTime = di->ChangeTime; + fi.Attributes = di->ExtFileAttributes; + fi.AllocationSize = di->AllocationSize; + fi.EndOfFile = di->EndOfFile; + fi.EASize = di->EaSize; + } + fi.NumberOfLinks = cpu_to_le32(1); + fi.DeletePending = 0; + fi.Directory = !!(le32_to_cpu(fi.Attributes) & ATTR_DIRECTORY); + cifs_buf_release(search_info.ntwrk_buf_start); + } else if (!full_path[0]) { + /* + * CIFSFindFirst() does not work on root path if the + * root path was exported on the server from the top + * level path (drive letter). + */ + rc = -EOPNOTSUPP; + } + } + + /* + * If everything failed then fallback to the legacy SMB command + * SMB_COM_QUERY_INFORMATION which works with all servers, but + * provide just few information. */ - if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { + if ((rc == -EOPNOTSUPP || rc == -EINVAL) && !non_unicode_wildcard) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); data->adjust_tz = true; + } else if ((rc == -EOPNOTSUPP || rc == -EINVAL) && non_unicode_wildcard) { + /* Path with non-UNICODE wildcard character cannot exist. */ + rc = -ENOENT; } if (!rc) { @@ -568,6 +648,42 @@ static int cifs_query_path_info(const unsigned int xid, data->reparse_point = le32_to_cpu(fi.Attributes) & ATTR_REPARSE; } +#ifdef CONFIG_CIFS_XATTR + /* + * For WSL CHR and BLK reparse points it is required to fetch + * EA $LXDEV which contains major and minor device numbers. + */ + if (!rc && data->reparse_point) { + struct smb2_file_full_ea_info *ea; + + ea = (struct smb2_file_full_ea_info *)data->wsl.eas; + rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_DEV, + &ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1], + SMB2_WSL_XATTR_DEV_SIZE, cifs_sb); + if (rc == SMB2_WSL_XATTR_DEV_SIZE) { + ea->next_entry_offset = cpu_to_le32(0); + ea->flags = 0; + ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN; + ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_DEV_SIZE); + memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_DEV, SMB2_WSL_XATTR_NAME_LEN + 1); + data->wsl.eas_len = sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 + + SMB2_WSL_XATTR_DEV_SIZE; + rc = 0; + } else if (rc >= 0) { + /* It is an error if EA $LXDEV has wrong size. */ + rc = -EINVAL; + } else { + /* + * In all other cases ignore error if fetching + * of EA $LXDEV failed. It is needed only for + * WSL CHR and BLK reparse points and wsl_to_fattr() + * handle the case when EA is missing. + */ + rc = 0; + } + } +#endif + return rc; } @@ -603,6 +719,13 @@ static int cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; FILE_ALL_INFO fi = {}; + /* + * CIFSSMBQFileInfo() for non-NT servers returns bogus data in + * Attributes fields. So do not use this command for non-NT servers. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS)) + return -EOPNOTSUPP; + if (cfile->symlink_target) { data->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!data->symlink_target) @@ -773,6 +896,9 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifsFileInfo *open_file; + FILE_BASIC_INFO new_buf; + struct cifs_open_info_data query_data; + __le64 write_time = buf->LastWriteTime; struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = NULL; @@ -780,20 +906,58 @@ smb_set_file_info(struct inode *inode, const char *full_path, /* if the file is already open for write, just use that fileid */ open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; tcon = tlink_tcon(open_file->tlink); - goto set_via_filehandle; + } else { + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + tlink = NULL; + goto out; + } + tcon = tlink_tcon(tlink); } - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; + /* + * Non-NT servers interprets zero time value in SMB_SET_FILE_BASIC_INFO + * over TRANS2_SET_FILE_INFORMATION as a valid time value. NT servers + * interprets zero time value as do not change existing value on server. + * API of ->set_file_info() callback expects that zero time value has + * the NT meaning - do not change. Therefore if server is non-NT and + * some time values in "buf" are zero, then fetch missing time values. + */ + if (!(tcon->ses->capabilities & CAP_NT_SMBS) && + (!buf->CreationTime || !buf->LastAccessTime || + !buf->LastWriteTime || !buf->ChangeTime)) { + rc = cifs_query_path_info(xid, tcon, cifs_sb, full_path, &query_data); + if (rc) { + if (open_file) { + cifsFileInfo_put(open_file); + open_file = NULL; + } + goto out; + } + /* + * Original write_time from buf->LastWriteTime is preserved + * as SMBSetInformation() interprets zero as do not change. + */ + new_buf = *buf; + buf = &new_buf; + if (!buf->CreationTime) + buf->CreationTime = query_data.fi.CreationTime; + if (!buf->LastAccessTime) + buf->LastAccessTime = query_data.fi.LastAccessTime; + if (!buf->LastWriteTime) + buf->LastWriteTime = query_data.fi.LastWriteTime; + if (!buf->ChangeTime) + buf->ChangeTime = query_data.fi.ChangeTime; } - tcon = tlink_tcon(tlink); + + if (open_file) + goto set_via_filehandle; rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf, cifs_sb->local_nls, cifs_sb); @@ -814,8 +978,45 @@ smb_set_file_info(struct inode *inode, const char *full_path, .fid = &fid, }; - cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for times not supported by this server\n"); - rc = CIFS_open(xid, &oparms, &oplock, NULL); + if (S_ISDIR(inode->i_mode) && !(tcon->ses->capabilities & CAP_NT_SMBS)) { + /* Opening directory path is not possible on non-NT servers. */ + rc = -EOPNOTSUPP; + } else { + /* + * Use cifs_open_file() instead of CIFS_open() as the + * cifs_open_file() selects the correct function which + * works also on non-NT servers. + */ + rc = cifs_open_file(xid, &oparms, &oplock, NULL); + /* + * Opening path for writing on non-NT servers is not + * possible when the read-only attribute is already set. + * Non-NT server in this case returns -EACCES. For those + * servers the only possible way how to clear the read-only + * bit is via SMB_COM_SETATTR command. + */ + if (rc == -EACCES && + (cinode->cifsAttrs & ATTR_READONLY) && + le32_to_cpu(buf->Attributes) != 0 && /* 0 = do not change attrs */ + !(le32_to_cpu(buf->Attributes) & ATTR_READONLY) && + !(tcon->ses->capabilities & CAP_NT_SMBS)) + rc = -EOPNOTSUPP; + } + + /* Fallback to SMB_COM_SETATTR command when absolutelty needed. */ + if (rc == -EOPNOTSUPP) { + cifs_dbg(FYI, "calling SetInformation since SetPathInfo for attrs/times not supported by this server\n"); + rc = SMBSetInformation(xid, tcon, full_path, + buf->Attributes != 0 ? buf->Attributes : cpu_to_le32(cinode->cifsAttrs), + write_time, + cifs_sb->local_nls, cifs_sb); + if (rc == 0) + cinode->cifsAttrs = le32_to_cpu(buf->Attributes); + else + rc = -EACCES; + goto out; + } + if (rc != 0) { if (rc == -EIO) rc = -EINVAL; @@ -823,6 +1024,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, } netpid = current->tgid; + cifs_dbg(FYI, "calling SetFileInfo since SetPathInfo for attrs/times not supported by this server\n"); set_via_filehandle: rc = CIFSSMBSetFileInfo(xid, tcon, buf, fid.netfid, netpid); @@ -833,6 +1035,21 @@ smb_set_file_info(struct inode *inode, const char *full_path, CIFSSMBClose(xid, tcon, fid.netfid); else cifsFileInfo_put(open_file); + + /* + * Setting the read-only bit is not honered on non-NT servers when done + * via open-semantics. So for setting it, use SMB_COM_SETATTR command. + * This command works only after the file is closed, so use it only when + * operation was called without the filehandle. + */ + if (open_file == NULL && + !(tcon->ses->capabilities & CAP_NT_SMBS) && + le32_to_cpu(buf->Attributes) & ATTR_READONLY) { + SMBSetInformation(xid, tcon, full_path, + buf->Attributes, + 0 /* do not change write time */, + cifs_sb->local_nls, cifs_sb); + } out: if (tlink != NULL) cifs_put_tlink(tlink); @@ -970,18 +1187,13 @@ static int cifs_query_symlink(const unsigned int xid, return rc; } -static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb, - const char *full_path, - struct kvec *rsp_iov, - struct cifs_open_info_data *data) +static struct reparse_data_buffer *cifs_get_reparse_point_buffer(const struct kvec *rsp_iov, + u32 *plen) { - struct reparse_data_buffer *buf; TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base; - u32 plen = le16_to_cpu(io->ByteCount); - - buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + - le32_to_cpu(io->DataOffset)); - return parse_reparse_point(buf, plen, cifs_sb, full_path, data); + *plen = le16_to_cpu(io->ByteCount); + return (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol + + le32_to_cpu(io->DataOffset)); } static bool @@ -1130,8 +1342,8 @@ struct smb_version_operations smb1_operations = { .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, - .negotiate_wsize = cifs_negotiate_wsize, - .negotiate_rsize = cifs_negotiate_rsize, + .negotiate_wsize = smb1_negotiate_wsize, + .negotiate_rsize = smb1_negotiate_rsize, .sess_setup = CIFS_SessSetup, .logoff = CIFSSMBLogoff, .tree_connect = CIFSTCon, @@ -1157,7 +1369,7 @@ struct smb_version_operations smb1_operations = { .rename = CIFSSMBRename, .create_hardlink = CIFSCreateHardLink, .query_symlink = cifs_query_symlink, - .parse_reparse_point = cifs_parse_reparse_point, + .get_reparse_point_buffer = cifs_get_reparse_point_buffer, .open = cifs_open_file, .set_fid = cifs_set_fid, .close = cifs_close_file, diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 57d9bfbadd97b2..2a3e46b8e15af6 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -666,6 +666,8 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, /* smb2_parse_contexts() fills idata->fi.IndexNumber */ rc = smb2_parse_contexts(server, &rsp_iov[0], &oparms->fid->epoch, oparms->fid->lease_key, &oplock, &idata->fi, NULL); + if (rc) + cifs_dbg(VFS, "rc: %d parsing context of compound op\n", rc); } for (i = 0; i < num_cmds; i++) { diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 41d8cd20b25f88..2fe8eeb9853563 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4555,9 +4555,9 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf, return rc; } } else { - if (unlikely(!server->secmech.dec)) - return -EIO; - + rc = smb3_crypto_aead_allocate(server); + if (unlikely(rc)) + return rc; tfm = server->secmech.dec; } @@ -5303,7 +5303,7 @@ struct smb_version_operations smb20_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, @@ -5406,7 +5406,7 @@ struct smb_version_operations smb21_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, @@ -5513,7 +5513,7 @@ struct smb_version_operations smb30_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, @@ -5629,7 +5629,7 @@ struct smb_version_operations smb311_operations = { .unlink = smb2_unlink, .rename = smb2_rename_path, .create_hardlink = smb2_create_hardlink, - .parse_reparse_point = smb2_parse_reparse_point, + .get_reparse_point_buffer = smb2_get_reparse_point_buffer, .query_mf_symlink = smb3_query_mf_symlink, .create_mf_symlink = smb3_create_mf_symlink, .create_reparse_symlink = smb2_create_reparse_symlink, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 81e05db8e4d5a1..399185ca7cacb0 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1252,15 +1252,8 @@ SMB2_negotiate(const unsigned int xid, cifs_server_dbg(VFS, "Missing expected negotiate contexts\n"); } - if (server->cipher_type && !rc) { - if (!SERVER_IS_CHAN(server)) { - rc = smb3_crypto_aead_allocate(server); - } else { - /* For channels, just reuse the primary server crypto secmech. */ - server->secmech.enc = server->primary_server->secmech.enc; - server->secmech.dec = server->primary_server->secmech.dec; - } - } + if (server->cipher_type && !rc) + rc = smb3_crypto_aead_allocate(server); neg_exit: free_rsp_buf(resp_buftype, rsp); return rc; @@ -2928,6 +2921,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, req->CreateContextsOffset = cpu_to_le32( sizeof(struct smb2_create_req) + iov[1].iov_len); + le32_add_cpu(&req->CreateContextsLength, iov[n_iov-1].iov_len); pc_buf = iov[n_iov-1].iov_base; } @@ -2974,7 +2968,7 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode, /* Eventually save off posix specific response info and timestamps */ err_free_rsp_buf: - free_rsp_buf(resp_buftype, rsp); + free_rsp_buf(resp_buftype, rsp_iov.iov_base); kfree(pc_buf); err_free_req: cifs_small_buf_release(req); @@ -4099,12 +4093,8 @@ static void cifs_renegotiate_iosize(struct TCP_Server_Info *server, return; spin_lock(&tcon->sb_list_lock); - list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) { - cifs_sb->ctx->rsize = - server->ops->negotiate_rsize(tcon, cifs_sb->ctx); - cifs_sb->ctx->wsize = - server->ops->negotiate_wsize(tcon, cifs_sb->ctx); - } + list_for_each_entry(cifs_sb, &tcon->cifs_sb_list, tcon_sb_link) + cifs_negotiate_iosize(server, cifs_sb->ctx, tcon); spin_unlock(&tcon->sb_list_lock); } @@ -4898,7 +4888,7 @@ smb2_writev_callback(struct mid_q_entry *mid) 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); - cifs_write_subrequest_terminated(wdata, result ?: written, true); + cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, server->credits, server->in_flight, @@ -5071,7 +5061,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) -(int)wdata->credits.value, cifs_trace_rw_credits_write_response_clear); add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, true); + cifs_write_subrequest_terminated(wdata, rc); } } diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 764dca80c15cb1..f79a5165a7cc6a 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1567,13 +1567,13 @@ struct reparse_nfs_data_buffer { __u8 DataBuffer[]; } __packed; -/* For IO_REPARSE_TAG_LX_SYMLINK */ +/* For IO_REPARSE_TAG_LX_SYMLINK - see MS-FSCC 2.1.2.7 */ struct reparse_wsl_symlink_data_buffer { __le32 ReparseTag; __le16 ReparseDataLength; __u16 Reserved; - __le32 Flags; - __u8 PathBuffer[]; /* Variable Length UTF-8 string without nul-term */ + __le32 Version; /* Always 2 */ + __u8 Target[]; /* Variable Length UTF-8 string without nul-term */ } __packed; struct validate_negotiate_info_req { diff --git a/fs/smb/server/auth.c b/fs/smb/server/auth.c index 83caa384974932..b3d121052408cc 100644 --- a/fs/smb/server/auth.c +++ b/fs/smb/server/auth.c @@ -550,7 +550,19 @@ int ksmbd_krb5_authenticate(struct ksmbd_session *sess, char *in_blob, retval = -ENOMEM; goto out; } - sess->user = user; + + if (!sess->user) { + /* First successful authentication */ + sess->user = user; + } else { + if (!ksmbd_compare_user(sess->user, user)) { + ksmbd_debug(AUTH, "different user tried to reuse session\n"); + retval = -EPERM; + ksmbd_free_user(user); + goto out; + } + ksmbd_free_user(user); + } memcpy(sess->sess_key, resp->payload, resp->session_key_len); memcpy(out_blob, resp->payload + resp->session_key_len, diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index c1f22c12911179..83764c230e9d4c 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -39,8 +39,10 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) xa_destroy(&conn->sessions); kvfree(conn->request_buf); kfree(conn->preauth_info); - if (atomic_dec_and_test(&conn->refcnt)) + if (atomic_dec_and_test(&conn->refcnt)) { + ksmbd_free_transport(conn->transport); kfree(conn); + } } /** diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 3f45f28f6f0f8e..9dec4c2940bc04 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -59,10 +59,12 @@ static void ksmbd_session_rpc_clear_list(struct ksmbd_session *sess) struct ksmbd_session_rpc *entry; long index; + down_write(&sess->rpc_lock); xa_for_each(&sess->rpc_handle_list, index, entry) { xa_erase(&sess->rpc_handle_list, index); __session_rpc_close(sess, entry); } + up_write(&sess->rpc_lock); xa_destroy(&sess->rpc_handle_list); } @@ -92,7 +94,7 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) { struct ksmbd_session_rpc *entry, *old; struct ksmbd_rpc_command *resp; - int method; + int method, id; method = __rpc_method(rpc_name); if (!method) @@ -102,26 +104,29 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name) if (!entry) return -ENOMEM; + down_read(&sess->rpc_lock); entry->method = method; - entry->id = ksmbd_ipc_id_alloc(); - if (entry->id < 0) + entry->id = id = ksmbd_ipc_id_alloc(); + if (id < 0) goto free_entry; - old = xa_store(&sess->rpc_handle_list, entry->id, entry, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->rpc_handle_list, id, entry, KSMBD_DEFAULT_GFP); if (xa_is_err(old)) goto free_id; - resp = ksmbd_rpc_open(sess, entry->id); + resp = ksmbd_rpc_open(sess, id); if (!resp) goto erase_xa; + up_read(&sess->rpc_lock); kvfree(resp); - return entry->id; + return id; erase_xa: xa_erase(&sess->rpc_handle_list, entry->id); free_id: ksmbd_rpc_id_free(entry->id); free_entry: kfree(entry); + up_read(&sess->rpc_lock); return -EINVAL; } @@ -129,9 +134,11 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id) { struct ksmbd_session_rpc *entry; + down_write(&sess->rpc_lock); entry = xa_erase(&sess->rpc_handle_list, id); if (entry) __session_rpc_close(sess, entry); + up_write(&sess->rpc_lock); } int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id) @@ -439,6 +446,7 @@ static struct ksmbd_session *__session_create(int protocol) sess->sequence_number = 1; rwlock_init(&sess->tree_conns_lock); atomic_set(&sess->refcnt, 2); + init_rwsem(&sess->rpc_lock); ret = __init_smb2_session(sess); if (ret) diff --git a/fs/smb/server/mgmt/user_session.h b/fs/smb/server/mgmt/user_session.h index f21348381d5984..c5749d6ec7151c 100644 --- a/fs/smb/server/mgmt/user_session.h +++ b/fs/smb/server/mgmt/user_session.h @@ -63,6 +63,7 @@ struct ksmbd_session { rwlock_t tree_conns_lock; atomic_t refcnt; + struct rw_semaphore rpc_lock; }; static inline int test_session_flag(struct ksmbd_session *sess, int bit) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index f103b1bd040040..d7a8a580d01362 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -129,14 +129,6 @@ static void free_opinfo(struct oplock_info *opinfo) kfree(opinfo); } -static inline void opinfo_free_rcu(struct rcu_head *rcu_head) -{ - struct oplock_info *opinfo; - - opinfo = container_of(rcu_head, struct oplock_info, rcu_head); - free_opinfo(opinfo); -} - struct oplock_info *opinfo_get(struct ksmbd_file *fp) { struct oplock_info *opinfo; @@ -154,12 +146,9 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci) { struct oplock_info *opinfo; - if (list_empty(&ci->m_op_list)) - return NULL; - - rcu_read_lock(); - opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info, - op_entry); + down_read(&ci->m_lock); + opinfo = list_first_entry_or_null(&ci->m_op_list, struct oplock_info, + op_entry); if (opinfo) { if (opinfo->conn == NULL || !atomic_inc_not_zero(&opinfo->refcount)) @@ -171,8 +160,7 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci) } } } - - rcu_read_unlock(); + up_read(&ci->m_lock); return opinfo; } @@ -185,7 +173,7 @@ void opinfo_put(struct oplock_info *opinfo) if (!atomic_dec_and_test(&opinfo->refcount)) return; - call_rcu(&opinfo->rcu_head, opinfo_free_rcu); + free_opinfo(opinfo); } static void opinfo_add(struct oplock_info *opinfo) @@ -193,7 +181,7 @@ static void opinfo_add(struct oplock_info *opinfo) struct ksmbd_inode *ci = opinfo->o_fp->f_ci; down_write(&ci->m_lock); - list_add_rcu(&opinfo->op_entry, &ci->m_op_list); + list_add(&opinfo->op_entry, &ci->m_op_list); up_write(&ci->m_lock); } @@ -207,7 +195,7 @@ static void opinfo_del(struct oplock_info *opinfo) write_unlock(&lease_list_lock); } down_write(&ci->m_lock); - list_del_rcu(&opinfo->op_entry); + list_del(&opinfo->op_entry); up_write(&ci->m_lock); } @@ -1347,8 +1335,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, ci = fp->f_ci; op = opinfo_get(fp); - rcu_read_lock(); - list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) { + down_read(&ci->m_lock); + list_for_each_entry(brk_op, &ci->m_op_list, op_entry) { if (brk_op->conn == NULL) continue; @@ -1358,7 +1346,6 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, if (ksmbd_conn_releasing(brk_op->conn)) continue; - rcu_read_unlock(); if (brk_op->is_lease && (brk_op->o_lease->state & (~(SMB2_LEASE_READ_CACHING_LE | SMB2_LEASE_HANDLE_CACHING_LE)))) { @@ -1388,9 +1375,8 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE, NULL); next: opinfo_put(brk_op); - rcu_read_lock(); } - rcu_read_unlock(); + up_read(&ci->m_lock); if (op) opinfo_put(op); @@ -1507,7 +1493,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease_v2) - 4) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1523,7 +1509,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < sizeof(struct create_lease)) - return NULL; + goto err_out; memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; @@ -1532,6 +1518,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req) lreq->version = 1; } return lreq; +err_out: + kfree(lreq); + return NULL; } /** diff --git a/fs/smb/server/oplock.h b/fs/smb/server/oplock.h index 3f64f07872638e..9a56eaadd0dd8f 100644 --- a/fs/smb/server/oplock.h +++ b/fs/smb/server/oplock.h @@ -71,7 +71,6 @@ struct oplock_info { struct list_head lease_entry; wait_queue_head_t oplock_q; /* Other server threads */ wait_queue_head_t oplock_brk; /* oplock breaking wait */ - struct rcu_head rcu_head; }; struct lease_break_info { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index d24d95d15d876b..f2a2be8467c669 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -633,6 +633,11 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\0') { + kfree(name); + return ERR_PTR(-EINVAL); + } + if (*name == '\\') { pr_err("not allow directory name included leading slash\n"); kfree(name); @@ -1445,7 +1450,7 @@ static int ntlm_authenticate(struct ksmbd_work *work, { struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; - struct channel *chann = NULL; + struct channel *chann = NULL, *old; struct ksmbd_user *user; u64 prev_id; int sz, rc; @@ -1557,7 +1562,12 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; chann->conn = conn; - xa_store(&sess->ksmbd_chann_list, (long)conn, chann, KSMBD_DEFAULT_GFP); + old = xa_store(&sess->ksmbd_chann_list, (long)conn, chann, + KSMBD_DEFAULT_GFP); + if (xa_is_err(old)) { + kfree(chann); + return xa_err(old); + } } } @@ -1602,9 +1612,6 @@ static int krb5_authenticate(struct ksmbd_work *work, if (prev_sess_id && prev_sess_id != sess->id) destroy_previous_session(conn, sess->user, prev_sess_id); - if (sess->state == SMB2_SESSION_VALID) - ksmbd_free_user(sess->user); - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { @@ -2247,10 +2254,6 @@ int smb2_session_logoff(struct ksmbd_work *work) sess->state = SMB2_SESSION_EXPIRED; up_write(&conn->session_lock); - if (sess->user) { - ksmbd_free_user(sess->user); - sess->user = NULL; - } ksmbd_all_conn_set_status(sess_id, KSMBD_SESS_NEED_SETUP); rsp->StructureSize = cpu_to_le16(4); diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index a3d8a905b07e07..d742ba754348bb 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -72,6 +72,8 @@ #define FILE_SUPPORTS_ENCRYPTION 0x00020000 #define FILE_SUPPORTS_OBJECT_IDS 0x00010000 #define FILE_VOLUME_IS_COMPRESSED 0x00008000 +#define FILE_SUPPORTS_POSIX_UNLINK_RENAME 0x00000400 +#define FILE_RETURNS_CLEANUP_RESULT_INFO 0x00000200 #define FILE_SUPPORTS_REMOTE_STORAGE 0x00000100 #define FILE_SUPPORTS_REPARSE_POINTS 0x00000080 #define FILE_SUPPORTS_SPARSE_FILES 0x00000040 diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 3f185ae60dc514..2a3e2b0ce5570a 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -310,7 +310,11 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) server_conf.signing = req->signing; server_conf.tcp_port = req->tcp_port; server_conf.ipc_timeout = req->ipc_timeout * HZ; - server_conf.deadtime = req->deadtime * SMB_ECHO_INTERVAL; + if (check_mul_overflow(req->deadtime, SMB_ECHO_INTERVAL, + &server_conf.deadtime)) { + ret = -EINVAL; + goto out; + } server_conf.share_fake_fscaps = req->share_fake_fscaps; ksmbd_init_domain(req->sub_auth); @@ -337,6 +341,7 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) server_conf.bind_interfaces_only = req->bind_interfaces_only; ret |= ksmbd_tcp_set_interfaces(KSMBD_STARTUP_CONFIG_INTERFACES(req), req->ifc_list_sz); +out: if (ret) { pr_err("Server configuration error: %s %s %s\n", req->netbios_name, req->server_string, diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 7f38a3c3f5bd69..abedf510899a74 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -93,17 +93,21 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return t; } -static void free_transport(struct tcp_transport *t) +void ksmbd_free_transport(struct ksmbd_transport *kt) { - kernel_sock_shutdown(t->sock, SHUT_RDWR); - sock_release(t->sock); - t->sock = NULL; + struct tcp_transport *t = TCP_TRANS(kt); - ksmbd_conn_free(KSMBD_TRANS(t)->conn); + sock_release(t->sock); kfree(t->iov); kfree(t); } +static void free_transport(struct tcp_transport *t) +{ + kernel_sock_shutdown(t->sock, SHUT_RDWR); + ksmbd_conn_free(KSMBD_TRANS(t)->conn); +} + /** * kvec_array_init() - initialize a IO vector segment * @new: IO vector to be initialized diff --git a/fs/smb/server/transport_tcp.h b/fs/smb/server/transport_tcp.h index 8c9aa624cfe3ca..1e51675ee1b209 100644 --- a/fs/smb/server/transport_tcp.h +++ b/fs/smb/server/transport_tcp.h @@ -8,6 +8,7 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz); struct interface *ksmbd_find_netdev_name_iface_list(char *netdev_name); +void ksmbd_free_transport(struct ksmbd_transport *kt); int ksmbd_tcp_init(void); void ksmbd_tcp_destroy(void); diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 8554aa5a1059fd..baf0d3031a44a0 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -409,10 +409,15 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n", *pos, count); + if (*pos >= XATTR_SIZE_MAX) { + pr_err("stream write position %lld is out of bounds\n", *pos); + return -EINVAL; + } + size = *pos + count; if (size > XATTR_SIZE_MAX) { size = XATTR_SIZE_MAX; - count = (*pos + count) - XATTR_SIZE_MAX; + count = XATTR_SIZE_MAX - *pos; } v_len = ksmbd_vfs_getcasexattr(idmap, @@ -479,7 +484,8 @@ int ksmbd_vfs_write(struct ksmbd_work *work, struct ksmbd_file *fp, int err = 0; if (work->conn->connection_type) { - if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE))) { + if (!(fp->daccess & (FILE_WRITE_DATA_LE | FILE_APPEND_DATA_LE)) || + S_ISDIR(file_inode(fp->filp)->i_mode)) { pr_err("no right to write(%pD)\n", fp->filp); err = -EACCES; goto out; @@ -676,7 +682,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path, struct ksmbd_file *parent_fp; int new_type; int err, lookup_flags = LOOKUP_NO_SYMLINKS; - int target_lookup_flags = LOOKUP_RENAME_TARGET; + int target_lookup_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; if (ksmbd_override_fsids(work)) return -ENOMEM; diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 8d1f30dcba7e8e..dfed6fce890498 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -661,21 +661,40 @@ __close_file_table_ids(struct ksmbd_file_table *ft, bool (*skip)(struct ksmbd_tree_connect *tcon, struct ksmbd_file *fp)) { - unsigned int id; - struct ksmbd_file *fp; - int num = 0; + struct ksmbd_file *fp; + unsigned int id = 0; + int num = 0; + + while (1) { + write_lock(&ft->lock); + fp = idr_get_next(ft->idr, &id); + if (!fp) { + write_unlock(&ft->lock); + break; + } - idr_for_each_entry(ft->idr, fp, id) { - if (skip(tcon, fp)) + if (skip(tcon, fp) || + !atomic_dec_and_test(&fp->refcount)) { + id++; + write_unlock(&ft->lock); continue; + } set_close_state_blocked_works(fp); + idr_remove(ft->idr, fp->volatile_id); + fp->volatile_id = KSMBD_NO_FID; + write_unlock(&ft->lock); + + down_write(&fp->f_ci->m_lock); + list_del_init(&fp->node); + up_write(&fp->f_ci->m_lock); - if (!atomic_dec_and_test(&fp->refcount)) - continue; __ksmbd_close_fd(ft, fp); + num++; + id++; } + return num; } @@ -713,12 +732,8 @@ static bool tree_conn_fd_check(struct ksmbd_tree_connect *tcon, static bool ksmbd_durable_scavenger_alive(void) { - mutex_lock(&durable_scavenger_lock); - if (!durable_scavenger_running) { - mutex_unlock(&durable_scavenger_lock); + if (!durable_scavenger_running) return false; - } - mutex_unlock(&durable_scavenger_lock); if (kthread_should_stop()) return false; @@ -799,9 +814,7 @@ static int ksmbd_durable_scavenger(void *dummy) break; } - mutex_lock(&durable_scavenger_lock); durable_scavenger_running = false; - mutex_unlock(&durable_scavenger_lock); module_put(THIS_MODULE); diff --git a/fs/splice.c b/fs/splice.c index 90d464241f151c..4d6df083e0c06a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -45,7 +45,7 @@ * here if set to avoid blocking other users of this pipe if splice is * being done on it. */ -static noinline void noinline pipe_clear_nowait(struct file *file) +static noinline void pipe_clear_nowait(struct file *file) { fmode_t fmode = READ_ONCE(file->f_mode); diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 67c55fe32ce88d..992ea0e372572f 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->panic_on_errors = (opts->errors == Opt_errors_panic); msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); + if (!msblk->devblksize) { + errorf(fc, "squashfs: unable to set blocksize\n"); + return -EINVAL; + } + msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); diff --git a/fs/stat.c b/fs/stat.c index f13308bfdc983a..3d9222807214a7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -204,12 +204,25 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, STATX_ATTR_DAX); idmap = mnt_idmap(path->mnt); - if (inode->i_op->getattr) - return inode->i_op->getattr(idmap, path, stat, - request_mask, - query_flags); + if (inode->i_op->getattr) { + int ret; + + ret = inode->i_op->getattr(idmap, path, stat, request_mask, + query_flags); + if (ret) + return ret; + } else { + generic_fillattr(idmap, request_mask, inode, stat); + } + + /* + * If this is a block device inode, override the filesystem attributes + * with the block device specific parameters that need to be obtained + * from the bdev backing inode. + */ + if (S_ISBLK(stat->mode)) + bdev_statx(path, stat, request_mask); - generic_fillattr(idmap, request_mask, inode, stat); return 0; } EXPORT_SYMBOL(vfs_getattr_nosec); @@ -295,15 +308,6 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, if (path_mounted(path)) stat->attributes |= STATX_ATTR_MOUNT_ROOT; stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; - - /* - * If this is a block device inode, override the filesystem - * attributes with the block device specific parameters that need to be - * obtained from the bdev backing inode. - */ - if (S_ISBLK(stat->mode)) - bdev_statx(path, stat, request_mask); - return 0; } diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 4f33a4a4888613..b4071c9cf8c951 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -115,7 +115,7 @@ void udf_truncate_tail_extent(struct inode *inode) } /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ - if (ret == 0) + if (ret >= 0) iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d80f943461992f..22f4bf956ba1c4 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, user_uffdio_copy = (struct uffdio_copy __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_copy->copy))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_copy, user_uffdio_copy, @@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage, @@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) user_uffdio_continue = (struct uffdio_continue __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_continue->mapped))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_continue, user_uffdio_continue, @@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long user_uffdio_poison = (struct uffdio_poison __user *)arg; ret = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_poison->updated))) + return -EFAULT; goto out; + } ret = -EFAULT; if (copy_from_user(&uffdio_poison, user_uffdio_poison, @@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx, user_uffdio_move = (struct uffdio_move __user *) arg; - if (atomic_read(&ctx->mmap_changing)) - return -EAGAIN; + ret = -EAGAIN; + if (unlikely(atomic_read(&ctx->mmap_changing))) { + if (unlikely(put_user(ret, &user_uffdio_move->move))) + return -EFAULT; + goto out; + } if (copy_from_user(&uffdio_move, user_uffdio_move, /* don't copy "move" last field */ diff --git a/fs/xattr.c b/fs/xattr.c index 02bee149ad9674..8ec5b0204bfdc5 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -703,7 +703,7 @@ static int path_setxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) error = -EBADF; @@ -847,7 +847,7 @@ static ssize_t path_getxattrat(int dfd, const char __user *pathname, return error; filename = getname_maybe_null(pathname, at_flags); - if (!filename) { + if (!filename && dfd >= 0) { CLASS(fd, f)(dfd); if (fd_empty(f)) return -EBADF; @@ -1428,6 +1428,15 @@ static bool xattr_is_trusted(const char *name) return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } +static bool xattr_is_maclabel(const char *name) +{ + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + + return !strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) && + security_ismaclabel(suffix); +} + /** * simple_xattr_list - list all xattr objects * @inode: inode from which to get the xattrs @@ -1460,6 +1469,17 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, if (err) return err; + err = security_inode_listsecurity(inode, buffer, remaining_size); + if (err < 0) + return err; + + if (buffer) { + if (remaining_size < err) + return -ERANGE; + buffer += err; + } + remaining_size -= err; + read_lock(&xattrs->lock); for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { xattr = rb_entry(rbp, struct simple_xattr, rb_node); @@ -1468,6 +1488,10 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, if (!trusted && xattr_is_trusted(xattr->name)) continue; + /* skip MAC labels; these are provided by LSM above */ + if (xattr_is_maclabel(xattr->name)) + continue; + err = xattr_list_one(&buffer, &remaining_size, xattr->name); if (err) break; diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index fffd6fffdce0f0..ae0ca68584963a 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -3,7 +3,7 @@ config XFS_FS tristate "XFS filesystem support" depends on BLOCK select EXPORTFS - select LIBCRC32C + select CRC32 select FS_IOMAP help XFS is a high performance journaling filesystem which originated diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 26a04a78348967..63151feb9c3fd5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -436,6 +436,25 @@ xfs_map_blocks( return 0; } +static bool +xfs_ioend_needs_wq_completion( + struct iomap_ioend *ioend) +{ + /* Changing inode size requires a transaction. */ + if (xfs_ioend_is_append(ioend)) + return true; + + /* Extent manipulation requires a transaction. */ + if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)) + return true; + + /* Page cache invalidation cannot be done in irq context. */ + if (ioend->io_flags & IOMAP_IOEND_DONTCACHE) + return true; + + return false; +} + static int xfs_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -460,8 +479,7 @@ xfs_submit_ioend( memalloc_nofs_restore(nofs_flag); /* send ioends that might require a transaction to the completion wq */ - if (xfs_ioend_is_append(ioend) || - (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))) + if (xfs_ioend_needs_wq_completion(ioend)) ioend->io_bio.bi_end_io = xfs_end_bio; if (status) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8e7f1b324b3bea..1a2b3f06fa717e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -105,6 +105,7 @@ xfs_buf_free( { unsigned int size = BBTOB(bp->b_length); + might_sleep(); trace_xfs_buf_free(bp, _RET_IP_); ASSERT(list_empty(&bp->b_lru)); diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c index b4ffd80b7cb632..dcbfa274e06dc6 100644 --- a/fs/xfs/xfs_buf_mem.c +++ b/fs/xfs/xfs_buf_mem.c @@ -165,7 +165,7 @@ xmbuf_map_backing_mem( folio_set_dirty(folio); folio_unlock(folio); - bp->b_addr = folio_address(folio); + bp->b_addr = folio_address(folio) + offset_in_folio(folio, pos); return 0; } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index c1a306268ae439..94d0873bcd6289 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -167,6 +167,14 @@ xfs_discard_extents( return error; } +/* + * Care must be taken setting up the trim cursor as the perags may not have been + * initialised when the cursor is initialised. e.g. a clean mount which hasn't + * read in AGFs and the first operation run on the mounted fs is a trim. This + * can result in perag fields that aren't initialised until + * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for + * the free space search. + */ struct xfs_trim_cur { xfs_agblock_t start; xfs_extlen_t count; @@ -204,6 +212,14 @@ xfs_trim_gather_extents( if (error) goto out_trans_cancel; + /* + * First time through tcur->count will not have been initialised as + * pag->pagf_longest is not guaranteed to be valid before we read + * the AGF buffer above. + */ + if (!tcur->count) + tcur->count = pag->pagf_longest; + if (tcur->by_bno) { /* sub-AG discard request always starts at tcur->start */ cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag); @@ -350,7 +366,6 @@ xfs_trim_perag_extents( { struct xfs_trim_cur tcur = { .start = start, - .count = pag->pagf_longest, .end = end, .minlen = minlen, }; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index edbc521870a10a..b4e32f0860b7e6 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1186,9 +1186,8 @@ xfs_qm_dqflush_done( if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && (lip->li_lsn == qlip->qli_flush_lsn || test_bit(XFS_LI_FAILED, &lip->li_flags))) { - spin_lock(&ailp->ail_lock); - xfs_clear_li_failed(lip); + clear_bit(XFS_LI_FAILED, &lip->li_flags); if (lip->li_lsn == qlip->qli_flush_lsn) { /* xfs_ail_update_finish() drops the AIL lock */ tail_lsn = xfs_ail_delete_one(ailp, lip); diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index a4bc1642fe5615..414b27a8645886 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -876,6 +876,7 @@ xfs_getfsmap_rtdev_rmapbt( const struct xfs_fsmap *keys, struct xfs_getfsmap_info *info) { + struct xfs_fsmap key0 = *keys; /* struct copy */ struct xfs_mount *mp = tp->t_mountp; struct xfs_rtgroup *rtg = NULL; struct xfs_btree_cur *bt_cur = NULL; @@ -887,32 +888,46 @@ xfs_getfsmap_rtdev_rmapbt( int error = 0; eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart + mp->m_sb.sb_rblocks); - if (keys[0].fmr_physical >= eofs) + if (key0.fmr_physical >= eofs) return 0; + /* + * On zoned filesystems with an internal rt volume, the volume comes + * immediately after the end of the data volume. However, the + * xfs_rtblock_t address space is relative to the start of the data + * device, which means that the first @rtstart fsblocks do not actually + * point anywhere. If a fsmap query comes in with the low key starting + * below @rtstart, report it as "owned by filesystem". + */ rtstart_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart); - if (keys[0].fmr_physical < rtstart_daddr) { + if (xfs_has_zoned(mp) && key0.fmr_physical < rtstart_daddr) { struct xfs_fsmap_irec frec = { .owner = XFS_RMAP_OWN_FS, .len_daddr = rtstart_daddr, }; - /* Adjust the low key if we are continuing from where we left off. */ - if (keys[0].fmr_length > 0) { - info->low_daddr = keys[0].fmr_physical + keys[0].fmr_length; - return 0; + /* + * Adjust the start of the query range if we're picking up from + * a previous round, and only emit the record if we haven't + * already gone past. + */ + key0.fmr_physical += key0.fmr_length; + if (key0.fmr_physical < rtstart_daddr) { + error = xfs_getfsmap_helper(tp, info, &frec); + if (error) + return error; + + key0.fmr_physical = rtstart_daddr; } - /* Fabricate an rmap entry for space occupied by the data dev */ - error = xfs_getfsmap_helper(tp, info, &frec); - if (error) - return error; + /* Zero the other fields to avoid further adjustments. */ + key0.fmr_owner = 0; + key0.fmr_offset = 0; + key0.fmr_length = 0; } - start_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr + keys[0].fmr_physical); - end_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr + - min(eofs - 1, keys[1].fmr_physical)); - + start_rtb = xfs_daddr_to_rtb(mp, key0.fmr_physical); + end_rtb = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical)); info->missing_owner = XFS_FMR_OWN_FREE; /* @@ -920,12 +935,12 @@ xfs_getfsmap_rtdev_rmapbt( * low to the fsmap low key and max out the high key to the end * of the rtgroup. */ - info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); - error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, key0.fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, &key0); if (error) return error; - info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); - xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, key0.fmr_length); + xfs_getfsmap_set_irec_flags(&info->low, &key0); /* Adjust the low key if we are continuing from where we left off. */ if (info->low.rm_blockcount == 0) { diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 40fc1bf900af90..c6cb0b6b9e4605 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -1089,13 +1089,7 @@ xfs_iflush_abort( * state. Whilst the inode is in the AIL, it should have a valid buffer * pointer for push operations to access - it is only safe to remove the * inode from the buffer once it has been removed from the AIL. - * - * We also clear the failed bit before removing the item from the AIL - * as xfs_trans_ail_delete()->xfs_clear_li_failed() will release buffer - * references the inode item owns and needs to hold until we've fully - * aborted the inode log item and detached it from the buffer. */ - clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags); xfs_trans_ail_delete(&iip->ili_item, 0); /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6493bdb5735108..980aabc495128e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2888,7 +2888,7 @@ xlog_force_and_check_iclog( * * 1. the current iclog is active and has no data; the previous iclog * is in the active or dirty state. - * 2. the current iclog is drity, and the previous iclog is in the + * 2. the current iclog is dirty, and the previous iclog is in the * active or dirty state. * * We may sleep if: diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 799b84220ebb84..e5192c12e7acf8 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -229,6 +229,7 @@ typedef struct xfs_mount { bool m_finobt_nores; /* no per-AG finobt resv. */ bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; + unsigned int m_zonegc_low_space; /* * Bitsets of per-fs metadata that have been checked and/or are sick. diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b2dd0c0bf50979..4a11ddccc563ad 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1149,7 +1149,7 @@ xfs_init_percpu_counters( return 0; free_freecounters: - while (--i > 0) + while (--i >= 0) percpu_counter_destroy(&mp->m_free[i].count); percpu_counter_destroy(&mp->m_delalloc_rtextents); free_delalloc: @@ -2114,6 +2114,21 @@ xfs_fs_reconfigure( if (error) return error; + /* attr2 -> noattr2 */ + if (xfs_has_noattr2(new_mp)) { + if (xfs_has_crc(mp)) { + xfs_warn(mp, + "attr2 is always enabled for a V5 filesystem - can't be changed."); + return -EINVAL; + } + mp->m_features &= ~XFS_FEAT_ATTR2; + mp->m_features |= XFS_FEAT_NOATTR2; + } else if (xfs_has_attr2(new_mp)) { + /* noattr2 -> attr2 */ + mp->m_features &= ~XFS_FEAT_NOATTR2; + mp->m_features |= XFS_FEAT_ATTR2; + } + /* inode32 -> inode64 */ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { mp->m_features &= ~XFS_FEAT_SMALL_INUMS; @@ -2126,6 +2141,17 @@ xfs_fs_reconfigure( mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } + /* + * Now that mp has been modified according to the remount options, we + * do a final option validation with xfs_finish_flags() just like it is + * just like it is done during mount. We cannot use + * done during mount. We cannot use xfs_finish_flags() on new_mp as it + * contains only the user given options. + */ + error = xfs_finish_flags(mp); + if (error) + return error; + /* ro -> rw */ if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { error = xfs_remount_rw(mp); diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index b7e82d85f04371..7a5c5ef2db928d 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -718,8 +718,40 @@ max_open_zones_show( } XFS_SYSFS_ATTR_RO(max_open_zones); +static ssize_t +zonegc_low_space_store( + struct kobject *kobj, + const char *buf, + size_t count) +{ + int ret; + unsigned int val; + + ret = kstrtouint(buf, 0, &val); + if (ret) + return ret; + + if (val > 100) + return -EINVAL; + + zoned_to_mp(kobj)->m_zonegc_low_space = val; + + return count; +} + +static ssize_t +zonegc_low_space_show( + struct kobject *kobj, + char *buf) +{ + return sysfs_emit(buf, "%u\n", + zoned_to_mp(kobj)->m_zonegc_low_space); +} +XFS_SYSFS_ATTR_RW(zonegc_low_space); + static struct attribute *xfs_zoned_attrs[] = { ATTR_LIST(max_open_zones), + ATTR_LIST(zonegc_low_space), NULL, }; ATTRIBUTE_GROUPS(xfs_zoned); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fcb1828e598fb..67c328d23e4ae9 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -315,7 +315,7 @@ xfs_ail_splice( } /* - * Delete the given item from the AIL. Return a pointer to the item. + * Delete the given item from the AIL. */ static void xfs_ail_delete( @@ -777,26 +777,28 @@ xfs_ail_update_finish( } /* - * xfs_trans_ail_update - bulk AIL insertion operation. + * xfs_trans_ail_update_bulk - bulk AIL insertion operation. * - * @xfs_trans_ail_update takes an array of log items that all need to be + * @xfs_trans_ail_update_bulk takes an array of log items that all need to be * positioned at the same LSN in the AIL. If an item is not in the AIL, it will - * be added. Otherwise, it will be repositioned by removing it and re-adding - * it to the AIL. If we move the first item in the AIL, update the log tail to - * match the new minimum LSN in the AIL. + * be added. Otherwise, it will be repositioned by removing it and re-adding + * it to the AIL. * - * This function takes the AIL lock once to execute the update operations on - * all the items in the array, and as such should not be called with the AIL - * lock held. As a result, once we have the AIL lock, we need to check each log - * item LSN to confirm it needs to be moved forward in the AIL. + * If we move the first item in the AIL, update the log tail to match the new + * minimum LSN in the AIL. * - * To optimise the insert operation, we delete all the items from the AIL in - * the first pass, moving them into a temporary list, then splice the temporary - * list into the correct position in the AIL. This avoids needing to do an - * insert operation on every item. + * This function should be called with the AIL lock held. * - * This function must be called with the AIL lock held. The lock is dropped - * before returning. + * To optimise the insert operation, we add all items to a temporary list, then + * splice this list into the correct position in the AIL. + * + * Items that are already in the AIL are first deleted from their current + * location before being added to the temporary list. + * + * This avoids needing to do an insert operation on every item. + * + * The AIL lock is dropped by xfs_ail_update_finish() before returning to + * the caller. */ void xfs_trans_ail_update_bulk( @@ -909,10 +911,9 @@ xfs_trans_ail_delete( return; } - /* xfs_ail_update_finish() drops the AIL lock */ - xfs_clear_li_failed(lip); + clear_bit(XFS_LI_FAILED, &lip->li_flags); tail_lsn = xfs_ail_delete_one(ailp, lip); - xfs_ail_update_finish(ailp, tail_lsn); + xfs_ail_update_finish(ailp, tail_lsn); /* drops the AIL lock */ } int diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index bd841df93021ff..f945f0450b16fd 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -167,32 +167,4 @@ xfs_trans_ail_copy_lsn( } #endif -static inline void -xfs_clear_li_failed( - struct xfs_log_item *lip) -{ - struct xfs_buf *bp = lip->li_buf; - - ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); - lockdep_assert_held(&lip->li_ailp->ail_lock); - - if (test_and_clear_bit(XFS_LI_FAILED, &lip->li_flags)) { - lip->li_buf = NULL; - xfs_buf_rele(bp); - } -} - -static inline void -xfs_set_li_failed( - struct xfs_log_item *lip, - struct xfs_buf *bp) -{ - lockdep_assert_held(&lip->li_ailp->ail_lock); - - if (!test_and_set_bit(XFS_LI_FAILED, &lip->li_flags)) { - xfs_buf_hold(bp); - lip->li_buf = bp; - } -} - #endif /* __XFS_TRANS_PRIV_H__ */ diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 52af234936a238..d509e49b2aaae5 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -1201,6 +1201,13 @@ xfs_mount_zones( xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, iz.available + iz.reclaimable); + /* + * The user may configure GC to free up a percentage of unused blocks. + * By default this is 0. GC will always trigger at the minimum level + * for keeping max_open_zones available for data placement. + */ + mp->m_zonegc_low_space = 0; + error = xfs_zone_gc_mount(mp); if (error) goto out_free_zone_info; diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index c5136ea9bb1ddd..d613a4094db65c 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -162,18 +162,36 @@ struct xfs_zone_gc_data { /* * We aim to keep enough zones free in stock to fully use the open zone limit - * for data placement purposes. + * for data placement purposes. Additionally, the m_zonegc_low_space tunable + * can be set to make sure a fraction of the unused blocks are available for + * writing. */ bool xfs_zoned_need_gc( struct xfs_mount *mp) { + s64 available, free, threshold; + s32 remainder; + if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE)) return false; - if (xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE) < + + available = xfs_estimate_freecounter(mp, XC_FREE_RTAVAILABLE); + + if (available < mp->m_groups[XG_TYPE_RTG].blocks * (mp->m_max_open_zones - XFS_OPEN_GC_ZONES)) return true; + + free = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS); + + threshold = div_s64_rem(free, 100, &remainder); + threshold = threshold * mp->m_zonegc_low_space + + remainder * div_s64(mp->m_zonegc_low_space, 100); + + if (available < threshold) + return true; + return false; } @@ -789,7 +807,8 @@ xfs_zone_gc_write_chunk( { struct xfs_zone_gc_data *data = chunk->data; struct xfs_mount *mp = chunk->ip->i_mount; - unsigned int folio_offset = chunk->bio.bi_io_vec->bv_offset; + phys_addr_t bvec_paddr = + bvec_phys(bio_first_bvec_all(&chunk->bio)); struct xfs_gc_bio *split_chunk; if (chunk->bio.bi_status) @@ -804,7 +823,7 @@ xfs_zone_gc_write_chunk( bio_reset(&chunk->bio, mp->m_rtdev_targp->bt_bdev, REQ_OP_WRITE); bio_add_folio_nofail(&chunk->bio, chunk->scratch->folio, chunk->len, - folio_offset); + offset_in_folio(chunk->scratch->folio, bvec_paddr)); while ((split_chunk = xfs_zone_gc_split_write(data, chunk))) xfs_zone_gc_submit_write(data, split_chunk); diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 451f6276da494f..74cc61e3ab09b1 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,12 +66,12 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE]; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ - char oem_id[ACPI_OEM_ID_SIZE]; /* ASCII OEM identification */ - char oem_table_id[ACPI_OEM_TABLE_ID_SIZE]; /* ASCII OEM table identification */ + char oem_id[ACPI_OEM_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM identification */ + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE] ACPI_NONSTRING; /* ASCII OEM table identification */ u32 oem_revision; /* OEM revision number */ char asl_compiler_id[ACPI_NAMESEG_SIZE]; /* ASCII ASL compiler vendor ID */ u32 asl_compiler_revision; /* ASL compiler version */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 80767e8bf3ad43..f7b3c4a4b7e7c3 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -1327,4 +1327,8 @@ typedef enum { #define ACPI_FLEX_ARRAY(TYPE, NAME) TYPE NAME[0] #endif +#ifndef ACPI_NONSTRING +#define ACPI_NONSTRING /* No terminating NUL character */ +#endif + #endif /* __ACTYPES_H__ */ diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index 04b4bf62051707..68e9379623e6dc 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -72,4 +72,12 @@ TYPE NAME[]; \ } +/* + * Explicitly mark strings that lack a terminating NUL character so + * that ACPICA can be built with -Wunterminated-string-initialization. + */ +#if __has_attribute(__nonstring__) +#define ACPI_NONSTRING __attribute__((__nonstring__)) +#endif + #endif /* __ACGCC_H__ */ diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec989b..a67988316d06b8 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -10,6 +10,7 @@ #include #include +#include #include /* Set this bit for virtual address instead of SG list. */ @@ -581,7 +582,10 @@ static inline struct ahash_request *ahash_request_alloc_noprof( * ahash_request_free() - zeroize and free the request data structure * @req: request data structure cipher handle to be freed */ -void ahash_request_free(struct ahash_request *req); +static inline void ahash_request_free(struct ahash_request *req) +{ + kfree_sensitive(req); +} static inline struct ahash_request *ahash_request_cast( struct crypto_async_request *req) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 485e22cf517efc..052ac7924af3ed 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -249,7 +249,7 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) static inline bool ahash_request_chained(struct ahash_request *req) { - return crypto_request_chained(&req->base); + return false; } static inline bool ahash_request_isvirt(struct ahash_request *req) diff --git a/include/crypto/sig.h b/include/crypto/sig.h index 11024708c06929..fa6dafafab3f0d 100644 --- a/include/crypto/sig.h +++ b/include/crypto/sig.h @@ -128,7 +128,7 @@ static inline void crypto_free_sig(struct crypto_sig *tfm) /** * crypto_sig_keysize() - Get key size * - * Function returns the key size in bytes. + * Function returns the key size in bits. * Function assumes that the key is already set in the transformation. If this * function is called without a setkey or with a failed setkey, you may end up * in a NULL dereference. diff --git a/include/cxl/features.h b/include/cxl/features.h index a3bb34694c06d8..5f7f842765a5f3 100644 --- a/include/cxl/features.h +++ b/include/cxl/features.h @@ -66,7 +66,7 @@ struct cxl_memdev; #ifdef CONFIG_CXL_FEATURES inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds); int devm_cxl_setup_features(struct cxl_dev_state *cxlds); -int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd); +int devm_cxl_setup_fwctl(struct device *host, struct cxl_memdev *cxlmd); #else static inline struct cxl_features_state *to_cxlfs(struct cxl_dev_state *cxlds) { @@ -78,7 +78,8 @@ static inline int devm_cxl_setup_features(struct cxl_dev_state *cxlds) return -EOPNOTSUPP; } -static inline int devm_cxl_setup_fwctl(struct cxl_memdev *cxlmd) +static inline int devm_cxl_setup_fwctl(struct device *host, + struct cxl_memdev *cxlmd) { return -EOPNOTSUPP; } diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index 2bf893eabb4b23..bcd54020d6ba51 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -585,8 +585,7 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje */ static inline bool drm_gem_is_imported(const struct drm_gem_object *obj) { - /* The dma-buf's priv field points to the original GEM object. */ - return obj->dma_buf && (obj->dma_buf->priv != obj); + return !!obj->import_attach; } #ifdef CONFIG_LOCKDEP diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index df120b4d1f836c..eaf704d3d05e8b 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops { * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. * @size: Size of device memory allocation + * @timeslice_expiration: Timeslice expiration in jiffies */ struct drm_gpusvm_devmem { struct device *dev; @@ -97,6 +98,7 @@ struct drm_gpusvm_devmem { const struct drm_gpusvm_devmem_ops *ops; struct drm_pagemap *dpagemap; size_t size; + u64 timeslice_expiration; }; /** @@ -185,6 +187,31 @@ struct drm_gpusvm_notifier { } flags; }; +/** + * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags + * + * @migrate_devmem: Flag indicating whether the range can be migrated to device memory + * @unmapped: Flag indicating if the range has been unmapped + * @partial_unmap: Flag indicating if the range has been partially unmapped + * @has_devmem_pages: Flag indicating if the range has devmem pages + * @has_dma_mapping: Flag indicating if the range has a DMA mapping + * @__flags: Flags for range in u16 form (used for READ_ONCE) + */ +struct drm_gpusvm_range_flags { + union { + struct { + /* All flags below must be set upon creation */ + u16 migrate_devmem : 1; + /* All flags below must be set / cleared under notifier lock */ + u16 unmapped : 1; + u16 partial_unmap : 1; + u16 has_devmem_pages : 1; + u16 has_dma_mapping : 1; + }; + u16 __flags; + }; +}; + /** * struct drm_gpusvm_range - Structure representing a GPU SVM range * @@ -198,11 +225,6 @@ struct drm_gpusvm_notifier { * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping. * Note this is assuming only one drm_pagemap per range is allowed. * @flags: Flags for range - * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory - * @flags.unmapped: Flag indicating if the range has been unmapped - * @flags.partial_unmap: Flag indicating if the range has been partially unmapped - * @flags.has_devmem_pages: Flag indicating if the range has devmem pages - * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping * * This structure represents a GPU SVM range used for tracking memory ranges * mapped in a DRM device. @@ -216,15 +238,7 @@ struct drm_gpusvm_range { unsigned long notifier_seq; struct drm_pagemap_device_addr *dma_addr; struct drm_pagemap *dpagemap; - struct { - /* All flags below must be set upon creation */ - u16 migrate_devmem : 1; - /* All flags below must be set / cleared under notifier lock */ - u16 unmapped : 1; - u16 partial_unmap : 1; - u16 has_devmem_pages : 1; - u16 has_dma_mapping : 1; - } flags; + struct drm_gpusvm_range_flags flags; }; /** @@ -283,17 +297,22 @@ struct drm_gpusvm { * @check_pages_threshold: Check CPU pages for present if chunk is less than or * equal to threshold. If not present, reduce chunk * size. + * @timeslice_ms: The timeslice MS which in minimum time a piece of memory + * remains with either exclusive GPU or CPU access. * @in_notifier: entering from a MMU notifier * @read_only: operating on read-only memory * @devmem_possible: possible to use device memory + * @devmem_only: use only device memory * * Context that is DRM GPUSVM is operating in (i.e. user arguments). */ struct drm_gpusvm_ctx { unsigned long check_pages_threshold; + unsigned long timeslice_ms; unsigned int in_notifier :1; unsigned int read_only :1; unsigned int devmem_possible :1; + unsigned int devmem_only :1; }; int drm_gpusvm_init(struct drm_gpusvm *gpusvm, diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h index 11d59ce0bac0bb..1c62d1d4458cae 100644 --- a/include/drm/drm_kunit_helpers.h +++ b/include/drm/drm_kunit_helpers.h @@ -118,6 +118,9 @@ drm_kunit_helper_create_crtc(struct kunit *test, const struct drm_crtc_funcs *funcs, const struct drm_crtc_helper_funcs *helper_funcs); +int drm_kunit_add_mode_destroy_action(struct kunit *test, + struct drm_display_mode *mode); + struct drm_display_mode * drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev, u8 video_code); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 50928a7ae98e3d..2aa1ef4a855757 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -71,6 +71,8 @@ enum drm_sched_priority { DRM_SCHED_PRIORITY_COUNT }; +struct drm_sched_entity_stats; + /** * struct drm_sched_entity - A wrapper around a job queue (typically * attached to the DRM file_priv). @@ -109,6 +111,8 @@ struct drm_sched_entity { */ struct drm_sched_rq *rq; + struct drm_sched_entity_stats *stats; + /** * @sched_list: * @@ -238,9 +242,7 @@ struct drm_sched_entity { /** * struct drm_sched_rq - queue of entities to be scheduled. * - * @sched: the scheduler to which this rq belongs to. - * @lock: protects @entities, @rb_tree_root and @current_entity. - * @current_entity: the entity which is to be scheduled. + * @lock: protects @entities, @rb_tree_root and @rr_deadline. * @entities: list of the entities to be scheduled. * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling * @@ -249,11 +251,9 @@ struct drm_sched_entity { * the next entity to emit commands from. */ struct drm_sched_rq { - struct drm_gpu_scheduler *sched; - spinlock_t lock; /* Following members are protected by the @lock: */ - struct drm_sched_entity *current_entity; + ktime_t rr_deadline; struct list_head entities; struct rb_root_cached rb_tree_root; }; @@ -334,13 +334,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); struct drm_sched_job { u64 id; - /** - * @submit_ts: - * - * When the job was pushed into the entity queue. - */ - ktime_t submit_ts; - /** * @sched: * @@ -352,6 +345,7 @@ struct drm_sched_job { struct drm_sched_fence *s_fence; struct drm_sched_entity *entity; + struct drm_sched_entity_stats *entity_stats; enum drm_sched_priority s_priority; u32 credits; @@ -476,9 +470,7 @@ struct drm_sched_backend_ops { * @credit_count: the current credit count of this scheduler * @timeout: the time after which a job is removed from the scheduler. * @name: name of the ring for which this scheduler is being used. - * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, - * as there's usually one run-queue per priority, but could be less. - * @sched_rq: An allocated array of run-queues of size @num_rqs; + * @rq: Scheduler's run queue. * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler * waits on this wait queue until all the scheduled jobs are * finished. @@ -509,8 +501,7 @@ struct drm_gpu_scheduler { atomic_t credit_count; long timeout; const char *name; - u32 num_rqs; - struct drm_sched_rq **sched_rq; + struct drm_sched_rq rq; wait_queue_head_t job_scheduled; atomic64_t job_id_count; struct workqueue_struct *submit_wq; diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 4736ea525048e4..a7ce9523c50d37 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -850,6 +850,7 @@ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ MACRO__(0xE210, ## __VA_ARGS__), \ + MACRO__(0xE211, ## __VA_ARGS__), \ MACRO__(0xE212, ## __VA_ARGS__), \ MACRO__(0xE215, ## __VA_ARGS__), \ MACRO__(0xE216, ## __VA_ARGS__) @@ -860,6 +861,10 @@ MACRO__(0xB081, ## __VA_ARGS__), \ MACRO__(0xB082, ## __VA_ARGS__), \ MACRO__(0xB083, ## __VA_ARGS__), \ + MACRO__(0xB084, ## __VA_ARGS__), \ + MACRO__(0xB085, ## __VA_ARGS__), \ + MACRO__(0xB086, ## __VA_ARGS__), \ + MACRO__(0xB087, ## __VA_ARGS__), \ MACRO__(0xB08F, ## __VA_ARGS__), \ MACRO__(0xB090, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 24ad120b882746..c33cba111171fb 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -9,14 +9,12 @@ #include #include -struct ttm_backup; - /** * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer * @handle: The handle to convert. * * Converts an opaque handle received from the - * struct ttm_backoup_ops::backup_page() function to an (invalid) + * ttm_backup_backup_page() function to an (invalid) * struct page pointer suitable for a struct page array. * * Return: An (invalid) struct page pointer. @@ -45,8 +43,8 @@ static inline bool ttm_backup_page_ptr_is_handle(const struct page *page) * * Return: The handle that was previously used in * ttm_backup_handle_to_page_ptr() to obtain a struct page pointer, suitable - * for use as argument in the struct ttm_backup_ops drop() or - * copy_backed_up_page() functions. + * for use as argument in the struct ttm_backup_drop() or + * ttm_backup_copy_page() functions. */ static inline unsigned long ttm_backup_page_ptr_to_handle(const struct page *page) @@ -55,20 +53,20 @@ ttm_backup_page_ptr_to_handle(const struct page *page) return (unsigned long)page >> 1; } -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle); +void ttm_backup_drop(struct file *backup, pgoff_t handle); -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr); s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp); -void ttm_backup_fini(struct ttm_backup *backup); +void ttm_backup_fini(struct file *backup); u64 ttm_backup_bytes_avail(void); -struct ttm_backup *ttm_backup_shmem_create(loff_t size); +struct file *ttm_backup_shmem_create(loff_t size); #endif diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 13cf47f3322f62..406437ad674bf1 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -118,7 +118,7 @@ struct ttm_tt { * ttm_tt_create() callback is responsible for assigning * this field. */ - struct ttm_backup *backup; + struct file *backup; /** * @caching: The current caching state of the pages, see enum * ttm_caching. diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index abf0bd76e3703a..6f5976aca3e860 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -475,7 +475,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_CREATE_PORT 0x0095 #define HVCALL_CONNECT_PORT 0x0096 #define HVCALL_START_VP 0x0099 -#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a +#define HVCALL_GET_VP_INDEX_FROM_APIC_ID 0x009a #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0 diff --git a/include/kunit/clk.h b/include/kunit/clk.h index 0afae7688157b8..f226044cc78d11 100644 --- a/include/kunit/clk.h +++ b/include/kunit/clk.h @@ -6,6 +6,7 @@ struct clk; struct clk_hw; struct device; struct device_node; +struct of_phandle_args; struct kunit; struct clk * diff --git a/include/kunit/test.h b/include/kunit/test.h index 0ffb97c785663f..39c768f87dc9f4 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -67,7 +67,7 @@ enum kunit_status { /* * Speed Attribute is stored as an enum and separated into categories of - * speed: very_slowm, slow, and normal. These speeds are relative to + * speed: very_slow, slow, and normal. These speeds are relative to * other KUnit tests. * * Note: unset speed attribute acts as default of KUNIT_SPEED_NORMAL. diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index a946e0203e6d60..8f7931eb7d164c 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #else /* ARCH_NEEDS_WEAK_PER_CPU */ +#ifdef MODULE + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section(ALLOC_TAG_SECTION_NAME) = { \ + .ct = CODE_TAG_INIT, \ + .counters = NULL }; + +#else /* MODULE */ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ @@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; +#endif /* MODULE */ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 255701e1251b4a..f652a5028b5907 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,12 +46,12 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); -void __init sdei_init(void); +void __init acpi_sdei_init(void); void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } -static inline void sdei_init(void) { } +static inline void acpi_sdei_init(void) { } static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8e7af9a03b41dd..e721148c95d07d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -249,6 +249,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && + (inode->i_sb->s_iflags & SB_I_CGROUPWB) && (!lockdep_is_held(&inode->i_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); diff --git a/include/linux/bio.h b/include/linux/bio.h index cafc7c215de8be..b474a47ec7eefe 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -11,6 +11,7 @@ #include #define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; @@ -290,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e39c45bc0a97ee..9a1f0ee40b5661 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -712,23 +712,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) (q->limits.features & BLK_FEAT_ZONED); } -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return disk->nr_zones; -} -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { if (!blk_queue_is_zoned(disk->queue)) @@ -736,11 +719,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return disk_nr_zones(bdev->bd_disk); -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -847,6 +825,51 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) (sb->s_blocksize_bits - SECTOR_SHIFT); } +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return disk->nr_zones; +} +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); + +/** + * disk_zone_capacity - returns the zone capacity of zone containing @sector + * @disk: disk to work with + * @sector: sector number within the querying zone + * + * Returns the zone capacity of a zone containing @sector. @sector can be any + * sector in the zone. + */ +static inline unsigned int disk_zone_capacity(struct gendisk *disk, + sector_t sector) +{ + sector_t zone_sectors = disk->queue->limits.chunk_sectors; + + if (sector + zone_sectors >= get_capacity(disk)) + return disk->last_zone_capacity; + return disk->zone_capacity; +} +static inline unsigned int bdev_zone_capacity(struct block_device *bdev, + sector_t pos) +{ + return disk_zone_capacity(bdev->bd_disk, pos); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); @@ -1614,6 +1637,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } +int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1670,10 +1694,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -/* just for blk-cgroup, don't use elsewhere */ -struct block_device *blkdev_get_no_open(dev_t dev); -void blkdev_put_no_open(struct block_device *bdev); - struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); bool disk_live(struct gendisk *disk); @@ -1685,7 +1705,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx(struct path *, struct kstat *, u32); +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1703,8 +1723,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +static inline void bdev_statx(const struct path *path, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9734544b6957cf..d1f02f8e3e55f4 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -356,7 +356,11 @@ enum { INSN_F_SPI_MASK = 0x3f, /* 6 bits */ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ - INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ + INSN_F_STACK_ACCESS = BIT(9), + + INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */ + INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */ + /* total 12 bits are used now. */ }; static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); @@ -365,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ - u32 prev_idx : 22; - /* special flags, e.g., whether insn is doing register stack spill/load */ - u32 flags : 10; + u32 prev_idx : 20; + /* special INSN_F_xxx flags */ + u32 flags : 12; /* additional registers that need precision tracking when this * jump is backtracked, vector of six 10-bit records */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f0a4ad7839b62f..0029ff880e2748 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -222,6 +223,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -397,6 +400,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 204b22a99c4ba6..0a80e1f9aa201c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d55b30057a4555..50b14a5661c7a4 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -490,9 +490,6 @@ extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -509,9 +506,6 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter); -extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 485b651869d9b5..5bc8f55c8cca14 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -710,6 +710,7 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_killed)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 28e999f2c64213..e7da3c3b098b3a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -344,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { - return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); + return css->flags & CSS_DYING; } static inline void cgroup_get(struct cgroup *cgrp) diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d14dbd26b37085..0ee4c21c6dbc7c 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,10 +36,10 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct codetag_type *cttype, - struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, - struct codetag_module *cmod); + void (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES void (*module_replaced)(struct module *mod, struct module *new_mod); bool (*needs_section_mem)(struct module *mod, unsigned long size); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d687..76e41805b92de9 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index d79a242b271d6e..cfcf6e4707ed94 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -723,7 +723,7 @@ coresight_find_output_type(struct coresight_platform_data *pdata, union coresight_dev_subtype subtype); int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, - struct platform_driver *pdev_drv); + struct platform_driver *pdev_drv, struct module *owner); void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e3049543008b9c..3aa955102b349a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev, extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 400fee6427a5c0..7a5b391dcc0176 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -776,8 +776,8 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -840,12 +840,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -855,6 +855,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -908,12 +916,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -923,6 +931,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -993,12 +1009,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1008,7 +1024,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1017,11 +1043,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1032,29 +1060,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8d1395f945bfef..e9f07e37dd6faa 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,65 +173,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) - -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(14) - -#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */ +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; + #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(18) - -#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(23) - -#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(25) -#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c4333c..85ab710ec0e724 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,10 +629,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 65655a5d1be283..ca42d5e46ccc6b 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -4,6 +4,7 @@ #include #include +#include #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -53,7 +54,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -93,9 +94,15 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); + +/* + * Called from mark_readonly(), where the system transitions to ROX. + */ +void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } +static inline void execmem_cache_make_ro(void) { } #endif /** @@ -170,6 +177,8 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); + #ifdef CONFIG_MMU /** * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fc93f0abf513cd..25c4a5afbd4432 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -314,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { + if (!nop) + return false; + /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -321,6 +324,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, if (fh_flags & EXPORT_FH_FID) return exportfs_can_encode_fid(nop); + /* + * If a connectable file handle was requested, we need to make sure that + * filesystem can also decode connected file handles. + */ + if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent) + return false; + /* * If a decodeable file handle was requested, we need to make sure that * filesystem can also decode file handles. diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 7db62fbc0500b0..31551e4cb8f34c 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -61,7 +61,6 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) atomic_long_set(&ref->refcnt, cnt - 1); } -bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt); bool __file_ref_put(file_ref_t *ref, unsigned long cnt); /** @@ -178,20 +177,14 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) */ static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) { - long old, new; + long old; old = atomic_long_read(&ref->refcnt); - do { - if (unlikely(old < 0)) - return __file_ref_put_badval(ref, old); - - if (old == FILE_REF_ONEREF) - new = FILE_REF_DEAD; - else - new = old - 1; - } while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new)); - - return new == FILE_REF_DEAD; + if (likely(old == FILE_REF_ONEREF)) { + if (likely(atomic_long_try_cmpxchg(&ref->refcnt, &old, FILE_REF_DEAD))) + return true; + } + return file_ref_put(ref); } /** diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h index 4f87a908ab4f6b..ecd821ed8064f1 100644 --- a/include/linux/firmware/cirrus/cs_dsp_test_utils.h +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -104,7 +104,6 @@ unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_w unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, unsigned int alg_id, int mem_type); -unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv); unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header); void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv); int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header); diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e36..a4fd649e2c3fcd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2186,7 +2186,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) +#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607fb1..266e6c9e6f83ad 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -628,7 +628,7 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, term_func, term_func_priv, using_pgpriv2, caching); else if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6cd8d1d28b8be4..fc27b53c58c2e3 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -907,21 +907,6 @@ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); -/* run all the marks in a group, and clear all of the vfsmount marks */ -static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); -} -/* run all the marks in a group, and clear all of the inode marks */ -static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); -} -/* run all the marks in a group, and clear all of the sn marks */ -static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); -} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 6fa0a268d53827..097be89487bf5c 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -2,6 +2,11 @@ /* * fwnode.h - Firmware device node object handle type definition. * + * This header file provides low-level data types and definitions for firmware + * and device property providers. The respective API header files supplied by + * them should contain all of the requisite data types and definitions for end + * users, so including it directly should not be necessary. + * * Copyright (C) 2015, Intel Corporation * Author: Rafael J. Wysocki */ diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 45b651c05b9c87..8adc8e9cb4a7b6 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -31,6 +31,7 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) #define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3) +/* GPIOD_FLAGS_BIT_NONEXCLUSIVE is DEPRECATED, don't use in new code. */ #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** diff --git a/include/linux/hid.h b/include/linux/hid.h index ef9a90ca0fbd6a..daae1d6d11a744 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -740,8 +740,9 @@ struct hid_descriptor { __le16 bcdHID; __u8 bCountryCode; __u8 bNumDescriptors; + struct hid_class_descriptor rpt_desc; - struct hid_class_descriptor desc[1]; + struct hid_class_descriptor opt_descs[]; } __attribute__ ((packed)); #define HID_DEVICE(b, g, ven, prod) \ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90ecf..c698f8415675ef 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1adcba3ddd7660..1ef867bb8c44b0 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -345,7 +345,7 @@ static inline void hrtimer_update_function(struct hrtimer *timer, if (WARN_ON_ONCE(!function)) return; #endif - timer->function = function; + ACCESS_PRIVATE(timer, function) = function; } /* Forward a hrtimer so it expires after now: */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f3ac832ee7f38..4861a7e304bbf4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 675959fb97ba9d..b52ac40d583095 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1002,6 +1002,12 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; + + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + + /* boolean to control visibility of sysfs for ring buffer */ + bool ring_sysfs_visible; }; #define lock_requestor(channel, flags) \ @@ -1161,13 +1167,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc24..7edc3fb0641cba 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -111,6 +111,8 @@ /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 +#define IEEE80211_S1G_BCN_CSSID 0x200 +#define IEEE80211_S1G_BCN_ANO 0x400 /* see 802.11ah-2016 9.9 NDP CMAC frames */ #define IEEE80211_S1G_1MHZ_NDP_BITS 25 @@ -153,9 +155,6 @@ #define IEEE80211_ANO_NETTYPE_WILD 15 -/* bits unique to S1G beacon */ -#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 - /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 @@ -627,6 +626,42 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON); } +/** + * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * next TBTT field + */ +static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); +} + +/** + * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * ANO field + */ +static inline bool ieee80211_s1g_has_ano(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO)); +} + +/** + * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * compressed SSID field + */ +static inline bool ieee80211_s1g_has_cssid(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); +} + /** * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder @@ -1245,16 +1280,40 @@ struct ieee80211_ext { u8 change_seq; u8 variable[0]; } __packed s1g_beacon; - struct { - u8 sa[ETH_ALEN]; - __le32 timestamp; - u8 change_seq; - u8 next_tbtt[3]; - u8 variable[0]; - } __packed s1g_short_beacon; } u; } __packed __aligned(2); +/** + * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields + * @fc: frame control bytes in little-endian byteorder + * Return: total length in bytes of the optional fixed-length fields + * + * S1G beacons may contain up to three optional fixed-length fields that + * precede the variable-length elements. Whether these fields are present + * is indicated by flags in the frame control field. + * + * From IEEE 802.11-2024 section 9.3.4.3: + * - Next TBTT field may be 0 or 3 bytes + * - Short SSID field may be 0 or 4 bytes + * - Access Network Options (ANO) field may be 0 or 1 byte + */ +static inline size_t +ieee80211_s1g_optional_len(__le16 fc) +{ + size_t len = 0; + + if (ieee80211_s1g_has_next_tbtt(fc)) + len += 3; + + if (ieee80211_s1g_has_cssid(fc)) + len += 4; + + if (ieee80211_s1g_has_ano(fc)) + len += 1; + + return len; +} + #define IEEE80211_TWT_CONTROL_NDP BIT(0) #define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1) #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3) @@ -1526,7 +1585,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 68416b135151d7..522644d62f30f0 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -377,13 +377,16 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, #define IOMAP_IOEND_BOUNDARY (1U << 2) /* is direct I/O */ #define IOMAP_IOEND_DIRECT (1U << 3) +/* is DONTCACHE I/O */ +#define IOMAP_IOEND_DONTCACHE (1U << 4) /* * Flags that if set on either ioend prevent the merge of two ioends. * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) */ #define IOMAP_IOEND_NOMERGE_FLAGS \ - (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT) + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \ + IOMAP_IOEND_DONTCACHE) /* * Structure for writeback I/O completions. diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ccce8a751e2a55..4273871845eebb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -440,10 +440,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -456,8 +456,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ @@ -750,6 +750,7 @@ struct iommu_domain_ops { * @dev: struct device for sysfs handling * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs + * @ready: set once iommu_device_register() has completed successfully */ struct iommu_device { struct list_head list; @@ -758,6 +759,7 @@ struct iommu_device { struct device *dev; struct iommu_group *singleton_group; u32 max_pasids; + bool ready; }; /** diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h deleted file mode 100644 index ea4e087fac9833..00000000000000 --- a/include/linux/irqchip/irq-davinci-aintc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_AINTC_ -#define _LINUX_IRQ_DAVINCI_AINTC_ - -#include - -/** - * struct davinci_aintc_config - configuration data for davinci-aintc driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - * @prios: an array of size num_irqs containing priority settings for - * each interrupt - */ -struct davinci_aintc_config { - struct resource reg; - unsigned int num_irqs; - u8 *prios; -}; - -void davinci_aintc_init(const struct davinci_aintc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_AINTC_ */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5438a1b446a6b0..291d49b9bf0549 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2382,7 +2382,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 1a0bc35839e360..16a2ee4f8310b1 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -52,44 +52,23 @@ __local_unlock_irqrestore(lock, flags) /** - * localtry_lock_init - Runtime initialize a lock instance - */ -#define localtry_lock_init(lock) __localtry_lock_init(lock) - -/** - * localtry_lock - Acquire a per CPU local lock - * @lock: The lock variable - */ -#define localtry_lock(lock) __localtry_lock(lock) - -/** - * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts - * @lock: The lock variable - */ -#define localtry_lock_irq(lock) __localtry_lock_irq(lock) - -/** - * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable - * interrupts - * @lock: The lock variable - * @flags: Storage for interrupt flags + * local_lock_init - Runtime initialize a lock instance */ -#define localtry_lock_irqsave(lock, flags) \ - __localtry_lock_irqsave(lock, flags) +#define local_trylock_init(lock) __local_trylock_init(lock) /** - * localtry_trylock - Try to acquire a per CPU local lock. + * local_trylock - Try to acquire a per CPU local lock * @lock: The lock variable * * The function can be used in any context such as NMI or HARDIRQ. Due to * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock(lock) __localtry_trylock(lock) +#define local_trylock(lock) __local_trylock(lock) /** - * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable - * interrupts if acquired + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired * @lock: The lock variable * @flags: Storage for interrupt flags * @@ -97,29 +76,8 @@ * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock_irqsave(lock, flags) \ - __localtry_trylock_irqsave(lock, flags) - -/** - * local_unlock - Release a per CPU local lock - * @lock: The lock variable - */ -#define localtry_unlock(lock) __localtry_unlock(lock) - -/** - * local_unlock_irq - Release a per CPU local lock and enable interrupts - * @lock: The lock variable - */ -#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock) - -/** - * localtry_unlock_irqrestore - Release a per CPU local lock and restore - * interrupt flags - * @lock: The lock variable - * @flags: Interrupt flags to restore - */ -#define localtry_unlock_irqrestore(lock, flags) \ - __localtry_unlock_irqrestore(lock, flags) +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 67bd13d142fac3..8d5ac16a9b1794 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,10 +15,11 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { local_lock_t llock; - unsigned int acquired; -} localtry_lock_t; + u8 acquired; +} local_trylock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ @@ -29,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -56,6 +60,7 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } @@ -63,7 +68,7 @@ static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -76,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -87,149 +94,117 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + __percpu local_lock_t *: (void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - preempt_enable(); \ + __local_lock_acquire(lock); \ } while (0) -#define __local_unlock_irq(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_enable(); \ - } while (0) - -#define __local_unlock_irqrestore(lock, flags) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) - -#define __local_lock_nested_bh(lock) \ - do { \ - lockdep_assert_in_softirq(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock_nested_bh(lock) \ - local_lock_release(this_cpu_ptr(lock)) - -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) \ -do { \ - __local_lock_init(&(lock)->llock); \ - WRITE_ONCE((lock)->acquired, 0); \ -} while (0) - -#define __localtry_lock(lock) \ - do { \ - localtry_lock_t *lt; \ - preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irq(lock) \ - do { \ - localtry_lock_t *lt; \ - local_irq_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irqsave(lock, flags) \ - do { \ - localtry_lock_t *lt; \ - local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_unlock(lock) \ +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + __percpu local_lock_t *: (void)0); \ + } while (0) + +#define __local_unlock(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) -#define __localtry_unlock_irq(lock) \ +#define __local_unlock_irq(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) -#define __localtry_unlock_irqrestore(lock, flags) \ +#define __local_unlock_irqrestore(lock, flags) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -237,16 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; -typedef spinlock_t localtry_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -283,17 +260,7 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) __local_lock_init(lock) -#define __localtry_lock(lock) __local_lock(lock) -#define __localtry_lock_irq(lock) __local_lock(lock) -#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) -#define __localtry_unlock(lock) __local_unlock(lock) -#define __localtry_unlock_irq(lock) __local_unlock(lock) -#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ int __locked; \ \ @@ -308,11 +275,11 @@ do { \ __locked; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ typecheck(unsigned long, flags); \ flags = 0; \ - __localtry_trylock(lock); \ + __local_trylock(lock); \ }) #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3c3deac57894ef..e43ff9f980a46b 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -45,10 +45,7 @@ struct mdio_device { unsigned int reset_deassert_delay; }; -static inline struct mdio_device *to_mdio_device(const struct device *dev) -{ - return container_of(dev, struct mdio_device, dev); -} +#define to_mdio_device(__dev) container_of_const(__dev, struct mdio_device, dev) /* struct mdio_driver_common: Common to all MDIO drivers */ struct mdio_driver_common { diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 591bf5b5e8dc22..9af01bdd86d26d 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,7 +44,6 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) -#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d1dfbad9a44730..e6ba8f4f4bd1f4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -398,6 +398,7 @@ struct mlx5_core_rsc_common { enum mlx5_res_type res; refcount_t refcount; struct completion free; + bool invalid; }; struct mlx5_uars_page { diff --git a/include/linux/mm.h b/include/linux/mm.h index b7f13f087954bd..ece96e871a3b62 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -193,6 +193,14 @@ static inline void __mm_zero_struct_page(struct page *page) extern int sysctl_max_map_count; +extern bool sysctl_workingset_protection; +extern u8 sysctl_anon_min_ratio; +extern u8 sysctl_clean_low_ratio; +extern u8 sysctl_clean_min_ratio; +int vm_workingset_protection_update_handler( + const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; @@ -385,7 +393,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 38 +# define VM_UFFD_MINOR_BIT 41 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE @@ -1218,6 +1226,23 @@ static inline unsigned int folio_order(const struct folio *folio) return folio_large_order(folio); } +/** + * folio_reset_order - Reset the folio order and derived _nr_pages + * @folio: The folio. + * + * Reset the order and derived _nr_pages to 0. Must only be used in the + * process of splitting large folios. + */ +static inline void folio_reset_order(struct folio *folio) +{ + if (WARN_ON_ONCE(!folio_test_large(folio))) + return; + folio->_flags_1 &= ~0xffUL; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif +} + #include /* @@ -4248,4 +4273,62 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); #define VM_SEALED_SYSMAP VM_NONE #endif +/* + * DMA mapping IDs for page_pool + * + * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot + * mistake a valid kernel pointer with any of the values we write into this + * field. + * + * On architectures that set POISON_POINTER_DELTA, this is already ensured, + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is + * 0, we make sure that we leave the two topmost bits empty, as that guarantees + * we won't mistake a valid kernel pointer for a value we set, regardless of the + * VMSPLIT setting. + * + * Altogether, this means that the number of bits available is constrained by + * the size of an unsigned long (at the upper end, subtracting two bits per the + * above), and the definition of PP_SIGNATURE (with or without + * POISON_POINTER_DELTA). + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +/* Always leave out the topmost two; see above. */ +#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) + +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(struct page *page) +{ + return false; +} +#endif + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mman.h b/include/linux/mman.h index bce214fece16b9..f4c6346a8fcd29 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -155,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +#endif arch_calc_vm_flag_bits(file, flags); } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 25e80b2ca7f41a..6ccec1bf2896ff 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -967,6 +967,9 @@ struct zone { #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; + + /* To be called once the last page in the zone is accepted */ + struct work_struct unaccepted_cleanup; #endif /* zone flags, see below */ @@ -1499,8 +1502,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. diff --git a/include/linux/module.h b/include/linux/module.h index d94b196d5a34e1..8050f77c3b64f8 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) @@ -584,6 +586,11 @@ struct module { atomic_t refcnt; #endif +#ifdef CONFIG_MITIGATION_ITS + int its_num_pages; + void **its_page_array; +#endif + #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; diff --git a/include/linux/mount.h b/include/linux/mount.h index dcc17ce8a959e0..1a3136e53eaa07 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,48 +22,52 @@ struct fs_context; struct file; struct path; -#define MNT_NOSUID 0x01 -#define MNT_NODEV 0x02 -#define MNT_NOEXEC 0x04 -#define MNT_NOATIME 0x08 -#define MNT_NODIRATIME 0x10 -#define MNT_RELATIME 0x20 -#define MNT_READONLY 0x40 /* does the user want this to be r/o? */ -#define MNT_NOSYMFOLLOW 0x80 - -#define MNT_SHRINKABLE 0x100 -#define MNT_WRITE_HOLD 0x200 - -#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -/* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ -#define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ - | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY | MNT_NOSYMFOLLOW) -#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) - -#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) - -#define MNT_INTERNAL 0x4000 - -#define MNT_LOCK_ATIME 0x040000 -#define MNT_LOCK_NOEXEC 0x080000 -#define MNT_LOCK_NOSUID 0x100000 -#define MNT_LOCK_NODEV 0x200000 -#define MNT_LOCK_READONLY 0x400000 -#define MNT_LOCKED 0x800000 -#define MNT_DOOMED 0x1000000 -#define MNT_SYNC_UMOUNT 0x2000000 -#define MNT_MARKED 0x4000000 -#define MNT_UMOUNT 0x8000000 +enum mount_flags { + MNT_NOSUID = 0x01, + MNT_NODEV = 0x02, + MNT_NOEXEC = 0x04, + MNT_NOATIME = 0x08, + MNT_NODIRATIME = 0x10, + MNT_RELATIME = 0x20, + MNT_READONLY = 0x40, /* does the user want this to be r/o? */ + MNT_NOSYMFOLLOW = 0x80, + + MNT_SHRINKABLE = 0x100, + MNT_WRITE_HOLD = 0x200, + + MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ + MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ + + MNT_INTERNAL = 0x4000, + + MNT_LOCK_ATIME = 0x040000, + MNT_LOCK_NOEXEC = 0x080000, + MNT_LOCK_NOSUID = 0x100000, + MNT_LOCK_NODEV = 0x200000, + MNT_LOCK_READONLY = 0x400000, + MNT_LOCKED = 0x800000, + MNT_DOOMED = 0x1000000, + MNT_SYNC_UMOUNT = 0x2000000, + MNT_MARKED = 0x4000000, + MNT_UMOUNT = 0x8000000, + + /* + * MNT_SHARED_MASK is the set of flags that should be cleared when a + * mount becomes shared. Currently, this is only the flag that says a + * mount cannot be bind mounted, since this is how we create a mount + * that shares events with another mount. If you add a new MNT_* + * flag, consider how it interacts with shared mounts. + */ + MNT_SHARED_MASK = MNT_UNBINDABLE, + MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME + | MNT_READONLY | MNT_NOSYMFOLLOW, + MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, + + MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | + MNT_LOCKED, +}; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 58a2401e4b551b..0075f6e5c3da9d 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -262,6 +262,11 @@ struct mr_table { int mroute_reg_vif_num; }; +static inline bool mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net_initialized(net); +} + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 1e748958dad404..311f145eb4e843 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -67,7 +67,7 @@ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1), \ - __VA_OPT__(SPI_MEM_OP_MAX_FREQ(__VA_ARGS__))) + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_FAST_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ diff --git a/include/linux/namei.h b/include/linux/namei.h index e3042176cdf489..bbaf55fb3101f2 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -62,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf3b6445817bb9..33338a233cc724 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1012,9 +1012,13 @@ struct netdev_bpf { #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { - int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); - void (*xdo_dev_state_delete) (struct xfrm_state *x); - void (*xdo_dev_state_free) (struct xfrm_state *x); + int (*xdo_dev_state_add)(struct net_device *dev, + struct xfrm_state *x, + struct netlink_ext_ack *extack); + void (*xdo_dev_state_delete)(struct net_device *dev, + struct xfrm_state *x); + void (*xdo_dev_state_free)(struct net_device *dev, + struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); @@ -4429,6 +4433,7 @@ void linkwatch_fire_event(struct net_device *dev); * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); +void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4971,9 +4976,11 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); +void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c86a11cfc4a36a..1464b3a104989d 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -51,8 +51,7 @@ enum netfs_io_source { NETFS_INVALID_WRITE, } __mode(byte); -typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, - bool was_async); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error); /* * Per-inode context. This wraps the VFS inode. @@ -207,6 +206,7 @@ enum netfs_io_origin { NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ NETFS_READ_SINGLE, /* This read should be treated as a single object */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_UNBUFFERED_READ, /* This is an unbuffered read */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */ @@ -223,9 +223,10 @@ enum netfs_io_origin { */ struct netfs_io_request { union { - struct work_struct work; + struct work_struct cleanup_work; /* Deferred cleanup work */ struct rcu_head rcu; }; + struct work_struct work; /* Result collector work */ struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ @@ -270,7 +271,7 @@ struct netfs_io_request { #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ @@ -279,6 +280,7 @@ struct netfs_io_request { #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ #define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; @@ -439,15 +441,14 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); + enum netfs_sreq_ref_trace what); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 9ad727ddfedb34..0906a0b40c6aa5 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -55,7 +55,6 @@ enum nfs3_stable_how { NFS_INVALID_STABLE_HOW = -1 }; -#ifdef CONFIG_CRC32 /** * nfs_fhandle_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -67,10 +66,4 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); } -#else /* CONFIG_CRC32 */ -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} -#endif /* CONFIG_CRC32 */ #endif /* _LINUX_NFS_H */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 71319637a84e61..ee03f3cef30ca5 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -213,6 +213,15 @@ struct nfs_server { char *fscache_uniq; /* Uniquifier (or NULL) */ #endif + /* The following #defines numerically match the NFSv4 equivalents */ +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) +#define NFS_FH_VOLATILE_ANY (0x2) +#define NFS_FH_VOL_MIGRATION (0x4) +#define NFS_FH_VOL_RENAME (0x8) +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ u32 pnfs_blksize; /* layout_blksize attr */ #if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set @@ -236,9 +245,6 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 9aa8a43843d717..5c7c92659e736f 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -50,10 +50,6 @@ void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, spinlock_t *nn_local_clients_lock); /* localio needs to map filehandle -> struct nfsd_file */ -extern struct nfsd_file * -nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, - const struct cred *, const struct nfs_fh *, - const fmode_t) __must_hold(rcu); void nfs_close_local_fh(struct nfs_file_localio *); struct nfsd_localio_operations { @@ -64,10 +60,10 @@ struct nfsd_localio_operations { struct rpc_clnt *, const struct cred *, const struct nfs_fh *, + struct nfsd_file __rcu **pnf, const fmode_t); - struct net *(*nfsd_file_put_local)(struct nfsd_file *); - struct nfsd_file *(*nfsd_file_get)(struct nfsd_file *); - void (*nfsd_file_put)(struct nfsd_file *); + struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); + struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -77,6 +73,7 @@ extern const struct nfsd_localio_operations *nfs_to; struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, struct nfs_file_localio *, + struct nfsd_file __rcu **pnf, const fmode_t); static inline void nfs_to_nfsd_net_put(struct net *net) @@ -91,16 +88,19 @@ static inline void nfs_to_nfsd_net_put(struct net *net) rcu_read_unlock(); } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file __rcu **localio) { /* - * Must not hold RCU otherwise nfsd_file_put() can easily trigger: - * "Voluntary context switch within RCU read-side critical section!" - * by scheduling deep in underlying filesystem (e.g. XFS). + * Either *localio must be guaranteed to be non-NULL, or caller + * must prevent nfsd shutdown from completing as nfs_close_local_fh() + * does by blocking the nfs_uuid from being finally put. */ - struct net *net = nfs_to->nfsd_file_put_local(localio); + struct net *net; - nfs_to_nfsd_net_put(net); + net = nfs_to->nfsd_file_put_local(localio); + + if (net) + nfs_to_nfsd_net_put(net); } #else /* CONFIG_NFS_LOCALIO */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2479ed10f53e37..5d7afb6079f1d8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -2094,7 +2094,7 @@ enum { NVME_SC_BAD_ATTRIBUTES = 0x180, NVME_SC_INVALID_PI = 0x181, NVME_SC_READ_ONLY = 0x182, - NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + NVME_SC_CMD_SIZE_LIM_EXCEEDED = 0x183, /* * I/O Command Set Specific - Fabrics commands: diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0c7e3dcfe8670c..89e9d604988351 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -389,24 +389,37 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) struct_size((type *)NULL, member, count) /** - * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. - * Enables caller macro to pass (different) initializer. + * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass arbitrary trailing expressions * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: initializer expression (could be empty for no init). + * @trailer: Trailing expressions for attributes and/or initializers. */ -#define _DEFINE_FLEX(type, name, member, count, initializer...) \ +#define __DEFINE_FLEX(type, name, member, count, trailer...) \ _Static_assert(__builtin_constant_p(count), \ "onstack flex array members require compile-time const count"); \ union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u initializer; \ + } name##_u trailer; \ type *name = (type *)&name##_u +/** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + __DEFINE_FLEX(type, name, member, count, = { .obj initializer }) + /** * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing * flexible array member, when it does not have a __counted_by annotation. @@ -421,7 +434,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Use __struct_size(@name) to get compile-time size of it afterwards. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ - _DEFINE_FLEX(type, name, member, count, = {}) + __DEFINE_FLEX(type, name, member, count, = { }) /** * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing @@ -438,6 +451,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Use __struct_size(@NAME) to get compile-time size of it afterwards. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ - _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, }) #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e6a21b62dcceef..3b814ce08331e2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 26baa78f1ca7d8..8fa2ff2682bce5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1341,7 +1341,7 @@ struct readahead_control { ._index = i, \ } -#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) +#define VM_READAHEAD_PAGES (SZ_8M / PAGE_SIZE) void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 879d19cebd4fc6..749cee0bcf2cc0 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -114,6 +114,8 @@ struct pci_epf_driver { * @phys_addr: physical address that should be mapped to the BAR * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR + * @aligned_size: the size actually allocated to accommodate the iATU alignment + * requirement * @barno: BAR number * @flags: flags that are set for the BAR */ @@ -121,6 +123,7 @@ struct pci_epf_bar { dma_addr_t phys_addr; void *addr; size_t size; + size_t aligned_size; enum pci_barno barno; int flags; }; diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e9671..081e5c0a3ddf4e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -245,6 +245,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + /* Device requires write to PCI_MSIX_ENTRY_DATA before any MSIX reads */ + PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST = (__force pci_dev_flags_t) (1 << 13), }; enum pci_irq_reroute_variant { @@ -1848,6 +1850,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif +#ifdef CONFIG_HOTPLUG_PCI +void pci_hp_ignore_link_change(struct pci_dev *pdev); +void pci_hp_unignore_link_change(struct pci_dev *pdev); +#else +static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { } +static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } +#endif + #ifdef CONFIG_PCIEAER bool pci_aer_available(void); #else diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 52b5ea663b9f09..85bf8dd9f08740 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -15,11 +15,7 @@ /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#ifdef CONFIG_MEM_ALLOC_PROFILING -#define PERCPU_MODULE_RESERVE (8 << 13) -#else #define PERCPU_MODULE_RESERVE (8 << 10) -#endif #else #define PERCPU_MODULE_RESERVE 0 #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5a9bf15d44617b..0069ba6866a48b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -823,7 +823,6 @@ struct perf_event { struct irq_work pending_disable_irq; struct callback_head pending_task; unsigned int pending_work; - struct rcuwait pending_work_wait; atomic_t event_limit; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index c740779778304a..8a7f4f802c5748 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -188,6 +188,13 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) return tag; } +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +{ + if (mem_alloc_profiling_enabled()) + return __pgalloc_tag_get(page); + return NULL; +} + void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); void pgalloc_tag_swap(struct folio *new, struct folio *old); @@ -199,6 +206,7 @@ static inline void clear_page_tag_ref(struct page *page) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e2b705c149454a..b50447ef1c921c 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1511,8 +1511,9 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, /* * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page - * tables copied during copy_page_range(). On success, stores the pfn to be - * passed to untrack_pfn_copy(). + * tables copied during copy_page_range(). Will store the pfn to be + * passed to untrack_pfn_copy() only if there is something to be untracked. + * Callers should initialize the pfn to 0. */ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long *pfn) @@ -1522,7 +1523,9 @@ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, /* * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during - * copy_page_range(), but after track_pfn_copy() was already called. + * copy_page_range(), but after track_pfn_copy() was already called. Can + * be called even if track_pfn_copy() did not actually track anything: + * handled internally. */ static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn) diff --git a/include/linux/phy.h b/include/linux/phy.h index a2bfae80c44975..bef68f6af99a92 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -744,10 +744,7 @@ struct phy_device { #define PHY_F_NO_IRQ 0x80000000 #define PHY_F_RXC_ALWAYS_ON 0x40000000 -static inline struct phy_device *to_phy_device(const struct device *dev) -{ - return container_of(to_mdio_device(dev), struct phy_device, mdio); -} +#define to_phy_device(__dev) container_of_const(to_mdio_device(__dev), struct phy_device, mdio) /** * struct phy_tdr_config - Configuration of a TDR raw test diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 1f5773ab566010..30659b615fca8c 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -361,23 +361,29 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_link_down() - take the link down + * mac_link_down() - notification that the link has gone down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), force the link down and disable any - * Energy Efficient Ethernet MAC configuration. Interface type - * selection must be done in mac_config(). + * Notifies the MAC that the link has gone down. This will not be called + * unless mac_link_up() has been previously called. + * + * The MAC should stop processing packets for transmission and reception. + * phylink will have called netif_carrier_off() to notify the networking + * stack that the link has gone down, so MAC drivers should not make this + * call. + * + * If @mode is %MLO_AN_INBAND, then this function must not prevent the + * link coming up. */ void mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); /** - * mac_link_up() - allow the link to come up + * mac_link_up() - notification that the link has come up * @config: a pointer to a &struct phylink_config. - * @phy: any attached phy + * @phy: any attached phy (deprecated - please use LPI interfaces) * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @speed: link speed @@ -385,7 +391,10 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * @tx_pause: link transmit pause enablement status * @rx_pause: link receive pause enablement status * - * Configure the MAC for an established link. + * Notifies the MAC that the link has come up, and the parameters of the + * link as seen from the MACs point of view. If mac_link_up() has been + * called previously, there will be an intervening call to mac_link_down() + * before this method will be subsequently called. * * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link * settings, and should be used to configure the MAC block appropriately @@ -397,9 +406,9 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * that the user wishes to override the pause settings, and this should * be allowed when considering the implementation of this method. * - * If in-band negotiation mode is disabled, allow the link to come up. If - * @phy is non-%NULL, configure Energy Efficient Ethernet by calling - * phy_init_eee() and perform appropriate MAC configuration for EEE. + * Once configured, the MAC may begin to process packets for transmission + * and reception. + * * Interface type selection must be done in mac_config(). */ void mac_link_up(struct phylink_config *config, struct phy_device *phy, diff --git a/include/linux/platform_data/x86/intel_pmc_ipc.h b/include/linux/platform_data/x86/intel_pmc_ipc.h index 6e603a8c075f90..1d34435b70016a 100644 --- a/include/linux/platform_data/x86/intel_pmc_ipc.h +++ b/include/linux/platform_data/x86/intel_pmc_ipc.h @@ -36,6 +36,7 @@ struct pmc_ipc_rbuf { */ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf *rbuf) { +#ifdef CONFIG_ACPI struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object params[PMC_IPCS_PARAM_COUNT] = { {.type = ACPI_TYPE_INTEGER,}, @@ -89,6 +90,9 @@ static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf } return 0; +#else + return -ENODEV; +#endif /* CONFIG_ACPI */ } #endif /* INTEL_PMC_IPC_H */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7fb5a459847ef3..756b842dcd3091 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -96,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); +int devm_pm_runtime_set_active_enabled(struct device *dev); extern int devm_pm_runtime_enable(struct device *dev); +int devm_pm_runtime_get_noresume(struct device *dev); /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. @@ -294,7 +296,9 @@ static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } +static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} diff --git a/include/linux/poison.h b/include/linux/poison.h index 331a9a996fa874..8ca2235f78d5d9 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -70,6 +70,10 @@ #define KEY_DESTROY 0xbd /********** net/core/page_pool.c **********/ +/* + * page_pool uses additional free bits within this value to store data, see the + * definition of PP_DMA_INDEX_MASK in mm.h + */ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) /********** net/core/skbuff.c **********/ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ccaaf4c7d5f6a4..ea39dd23a1976e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -240,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } -void netdev_set_operstate(struct net_device *dev, int newstate); +void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 493d9de4e47241..dc6ebaee3d18b2 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -365,7 +365,7 @@ struct sdw_intel_res { * on e.g. which machine driver to select (I2S mode, HDaudio or * SoundWire). */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info); void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 0ba5e49bace42c..6e64f0193777b0 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -249,10 +249,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(const struct device *dev) -{ - return dev ? container_of(dev, struct spi_device, dev) : NULL; -} +#define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev) /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e39d4d563b1975..785048a3b3e604 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,7 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +101,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +122,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +145,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 6c3125300c009a..a3d8305e88a51e 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -224,7 +224,7 @@ enum tpm2_const { enum tpm2_timeouts { TPM2_TIMEOUT_A = 750, - TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, TPM2_DURATION_SHORT = 20, @@ -257,6 +257,7 @@ enum tpm2_return_codes { TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, TPM2_RC_RETRY = 0x0922, + TPM2_RC_SESSION_MEMORY = 0x0903, }; enum tpm2_command_codes { @@ -437,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc) return (rc & BIT(7)) ? rc & 0xbf : rc; } +/* + * Convert a return value from tpm_transmit_cmd() to POSIX error code. + */ +static inline ssize_t tpm_ret_to_err(ssize_t ret) +{ + if (ret < 0) + return ret; + + switch (tpm2_rc_value(ret)) { + case TPM2_RC_SUCCESS: + return 0; + case TPM2_RC_SESSION_MEMORY: + return -ENOMEM; + default: + return -EFAULT; + } +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index a0bb6d01213780..93129fea552eaa 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -168,6 +168,8 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns, #ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) @@ -201,6 +203,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else +#define unprivileged_userns_clone 0 + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4d16c13d0df580..64cb4b04be7add 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -220,6 +220,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * occurs. * @reset_done: optional function to call after transport specific reset * operation has finished. + * @shutdown: synchronize with the device on shutdown. If provided, replaces + * the virtio core implementation. */ struct virtio_driver { struct device_driver driver; @@ -237,6 +239,7 @@ struct virtio_driver { int (*restore)(struct virtio_device *dev); int (*reset_prepare)(struct virtio_device *dev); int (*reset_done)(struct virtio_device *dev); + void (*shutdown)(struct virtio_device *dev); }; #define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0387d64e2c66c6..36fb3edfa403d9 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -140,6 +140,7 @@ struct virtio_vsock_sock { u32 last_fwd_cnt; u32 rx_bytes; u32 buf_alloc; + u32 buf_used; struct sk_buff_head rx_queue; u32 msg_count; }; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e393..5ca8d4dd149d4e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -61,6 +61,7 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; + unsigned long requested_size; }; struct vmap_area { diff --git a/include/linux/wait.h b/include/linux/wait.h index 965a19809c7e56..3d442267a2565c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); @@ -1195,6 +1196,7 @@ do { \ */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a8586c3058c7cd..521a9d0acac692 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -557,7 +557,8 @@ enum { #define ESCO_LINK 0x02 /* Low Energy links do not have defined link type. Use invented one */ #define LE_LINK 0x80 -#define ISO_LINK 0x82 +#define CIS_LINK 0x82 +#define BIS_LINK 0x83 #define INVALID_LINK 0xff /* LMP features */ @@ -1931,6 +1932,8 @@ struct hci_cp_le_pa_create_sync { __u8 sync_cte_type; } __packed; +#define HCI_OP_LE_PA_CREATE_SYNC_CANCEL 0x2045 + #define HCI_OP_LE_PA_TERM_SYNC 0x2046 struct hci_cp_le_pa_term_sync { __le16 handle; @@ -2830,7 +2833,7 @@ struct hci_evt_le_create_big_complete { __le16 bis_handle[]; } __packed; -#define HCI_EVT_LE_BIG_SYNC_ESTABILISHED 0x1d +#define HCI_EVT_LE_BIG_SYNC_ESTABLISHED 0x1d struct hci_evt_le_big_sync_estabilished { __u8 status; __u8 handle; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 5115da34f88128..d15316bffd70bb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -545,6 +545,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; struct list_head mesh_pending; + struct mutex mgmt_pending_lock; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; @@ -996,7 +997,8 @@ static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num++; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: h->iso_num++; break; } @@ -1022,7 +1024,8 @@ static inline void hci_conn_hash_del(struct hci_dev *hdev, struct hci_conn *c) case ESCO_LINK: h->sco_num--; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: h->iso_num--; break; } @@ -1039,7 +1042,8 @@ static inline unsigned int hci_conn_num(struct hci_dev *hdev, __u8 type) case SCO_LINK: case ESCO_LINK: return h->sco_num; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: return h->iso_num; default: return 0; @@ -1100,7 +1104,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, ba) || c->type != ISO_LINK) + if (bacmp(&c->dst, ba) || c->type != BIS_LINK) continue; if (c->iso_qos.bcast.bis == bis) { @@ -1113,10 +1117,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_bis(struct hci_dev *hdev, return NULL; } -static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, - __u8 sid, - bdaddr_t *dst, - __u8 dst_type) +static inline struct hci_conn * +hci_conn_hash_lookup_create_pa_sync(struct hci_dev *hdev) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; @@ -1124,8 +1126,10 @@ static inline struct hci_conn *hci_conn_hash_lookup_sid(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || bacmp(&c->dst, dst) || - c->dst_type != dst_type || c->sid != sid) + if (c->type != BIS_LINK) + continue; + + if (!test_bit(HCI_CONN_CREATE_PA_SYNC, &c->flags)) continue; rcu_read_unlock(); @@ -1148,8 +1152,8 @@ hci_conn_hash_lookup_per_adv_bis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, ba) || c->type != ISO_LINK || - !test_bit(HCI_CONN_PER_ADV, &c->flags)) + if (bacmp(&c->dst, ba) || c->type != BIS_LINK || + !test_bit(HCI_CONN_PER_ADV, &c->flags)) continue; if (c->iso_qos.bcast.big == big && @@ -1238,7 +1242,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY)) + if (c->type != CIS_LINK) continue; /* Match CIG ID if set */ @@ -1270,7 +1274,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_cig(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || !bacmp(&c->dst, BDADDR_ANY)) + if (c->type != CIS_LINK) continue; if (handle == c->iso_qos.ucast.cig) { @@ -1293,17 +1297,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) - continue; - - /* An ISO_LINK hcon with BDADDR_ANY as destination - * address is a Broadcast connection. A Broadcast - * slave connection is associated with a PA train, - * so the sync_handle can be used to differentiate - * from unicast. - */ - if (bacmp(&c->dst, BDADDR_ANY) && - c->sync_handle == HCI_SYNC_HANDLE_INVALID) + if (c->type != BIS_LINK) continue; if (handle == c->iso_qos.bcast.big) { @@ -1327,7 +1321,7 @@ hci_conn_hash_lookup_big_sync_pend(struct hci_dev *hdev, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; if (handle == c->iso_qos.bcast.big && num_bis == c->num_bis) { @@ -1350,8 +1344,8 @@ hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK || - c->state != state) + if (c->type != BIS_LINK || bacmp(&c->dst, BDADDR_ANY) || + c->state != state) continue; if (handle == c->iso_qos.bcast.big) { @@ -1374,8 +1368,8 @@ hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK || - !test_bit(HCI_CONN_PA_SYNC, &c->flags)) + if (c->type != BIS_LINK || + !test_bit(HCI_CONN_PA_SYNC, &c->flags)) continue; if (c->iso_qos.bcast.big == big) { @@ -1397,7 +1391,7 @@ hci_conn_hash_lookup_pa_sync_handle(struct hci_dev *hdev, __u16 sync_handle) rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != ISO_LINK) + if (c->type != BIS_LINK) continue; /* Ignore the listen hcon, we are looking @@ -1524,8 +1518,6 @@ bool hci_setup_sync(struct hci_conn *conn, __u16 handle); void hci_sco_setup(struct hci_conn *conn, __u8 status); bool hci_iso_setup_path(struct hci_conn *conn); int hci_le_create_cis_pending(struct hci_dev *hdev); -int hci_pa_create_sync_pending(struct hci_dev *hdev); -int hci_le_big_create_sync_pending(struct hci_dev *hdev); int hci_conn_check_create_cis(struct hci_conn *conn); struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, @@ -1566,9 +1558,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]); +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]); int hci_conn_check_link_mode(struct hci_conn *conn); int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level); int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type, @@ -1800,6 +1792,7 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, void hci_uuids_clear(struct hci_dev *hdev); void hci_link_keys_clear(struct hci_dev *hdev); +u8 *hci_conn_key_enc_size(struct hci_conn *conn); struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr); struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, @@ -2013,7 +2006,8 @@ static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, case ESCO_LINK: return sco_connect_ind(hdev, bdaddr, flags); - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: return iso_connect_ind(hdev, bdaddr, flags); default: @@ -2404,7 +2398,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 7e2cf0cca939a1..72558c826aa1b4 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -185,3 +185,6 @@ int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, struct hci_conn_params *params); + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/include/net/checksum.h b/include/net/checksum.h index 243f972267b8d1..be9356d4b67a1e 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -164,7 +164,7 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr); + __wsum diff, bool pseudohdr, bool ipv6); static __always_inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 5927910ec06e55..6e68e359ad18c2 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -45,6 +45,8 @@ struct fib_rule { struct fib_rule_port_range dport_range; u16 sport_mask; u16 dport_mask; + u8 iif_is_l3_master; + u8 oif_is_l3_master; struct rcu_head rcu; }; diff --git a/include/net/flow.h b/include/net/flow.h index 335bbc52171c10..2a3f0c42f09250 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -38,6 +38,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_L3MDEV_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index f7fe796e8429a5..1eb8dad18f7e35 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && + fl->flowi_l3mdev == iifindex; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && + fl->flowi_l3mdev == oifindex; +} + void l3mdev_update_flow(struct net *net, struct flowi *fl); int l3mdev_master_ifindex_rcu(const struct net_device *dev); @@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } + +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return false; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return false; +} + static inline void l3mdev_update_flow(struct net *net, struct flowi *fl) { diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index c316b551df8d61..0ee5bc7668103b 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,6 +98,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +#define netdev_lock_dereference(p, dev) \ + rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock)) + int netdev_debug_event(struct notifier_block *nb, unsigned long event, void *ptr); diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 825141d675e583..d01c82983b4d6d 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -103,6 +103,12 @@ struct netdev_stat_ops { struct netdev_queue_stats_tx *tx); }; +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum); + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 6e202ed5e63f3c..7370fba844efcf 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -2,6 +2,7 @@ #ifndef _NFT_FIB_H_ #define _NFT_FIB_H_ +#include #include struct nft_fib { @@ -39,6 +40,14 @@ static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt) return nft_fib_is_loopback(pkt->skb, indev); } +static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt, + const struct net_device *iif) +{ + const struct net_device *dev = iif ? iif : pkt->skb->dev; + + return l3mdev_master_ifindex_rcu(dev); +} + int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 36eb57d73abc6c..431b593de70937 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA @@ -33,6 +34,9 @@ #define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM) +/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */ +#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) + /* * Fast allocation side cache array/stack * @@ -221,6 +225,8 @@ struct page_pool { void *mp_priv; const struct memory_provider_ops *mp_ops; + struct xarray dma_mapped; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d48c657191cd01..1c05fed05f2bc2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1031,6 +1031,21 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) return skb; } +static inline struct sk_buff *qdisc_dequeue_internal(struct Qdisc *sch, bool direct) +{ + struct sk_buff *skb; + + skb = __skb_dequeue(&sch->gso_skb); + if (skb) { + sch->q.qlen--; + return skb; + } + if (direct) + return __qdisc_dequeue_head(&sch->q); + else + return sch->dequeue(sch); +} + static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 31248cfdfb235f..dcd288fa1bb6fb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -775,6 +775,7 @@ struct sctp_transport { /* Reference counting. */ refcount_t refcnt; + __u32 dead:1, /* RTO-Pending : A flag used to track if one of the DATA * chunks sent to this address is currently being * used to compute a RTT. If this flag is 0, @@ -784,7 +785,7 @@ struct sctp_transport { * calculation completes (i.e. the DATA chunk * is SACK'd) clear this flag. */ - __u32 rto_pending:1, + rto_pending:1, /* * hb_sent : a flag that signals that we have a pending diff --git a/include/net/sock.h b/include/net/sock.h index 8daf1b3b12c607..99470c6d24de8b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -339,6 +339,8 @@ struct sk_filter; * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. + * @sk_owner: reference to the real owner of the socket that calls + * sock_lock_init_class_and_name(). */ struct sock { /* @@ -547,6 +549,10 @@ struct sock { struct rcu_head sk_rcu; netns_tracker ns_tracker; struct xarray sk_user_frags; + +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) + struct module *sk_owner; +#endif }; struct sock_bh_locked { @@ -1583,6 +1589,35 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) sk_mem_reclaim(sk); } +#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES) +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ + __module_get(owner); + sk->sk_owner = owner; +} + +static inline void sk_owner_clear(struct sock *sk) +{ + sk->sk_owner = NULL; +} + +static inline void sk_owner_put(struct sock *sk) +{ + module_put(sk->sk_owner); +} +#else +static inline void sk_owner_set(struct sock *sk, struct module *owner) +{ +} + +static inline void sk_owner_clear(struct sock *sk) +{ +} + +static inline void sk_owner_put(struct sock *sk) +{ +} +#endif /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. @@ -1592,13 +1627,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ + sk_owner_set(sk, THIS_MODULE); \ sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ - sizeof((sk)->sk_lock)); \ + sizeof((sk)->sk_lock)); \ lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ - (skey), (sname)); \ + (skey), (sname)); \ lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ } while (0) @@ -2942,8 +2978,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { - if (sk->sk_prot->sock_is_readable) - return sk->sk_prot->sock_is_readable(sk); + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->sock_is_readable) + return prot->sock_is_readable(sk); + return false; } #endif /* _SOCK_H */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index a58ae7589d1212..e8bd6ddb7b1275 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -71,9 +71,6 @@ struct xdp_sock { */ u32 tx_budget_spent; - /* Protects generic receive. */ - spinlock_t rx_lock; - /* Statistics */ u64 rx_dropped; u64 rx_queue_full; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 39365fd2ea175c..1f1861c57e2ad0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -147,8 +147,19 @@ enum { }; struct xfrm_dev_offload { + /* The device for this offload. + * Device drivers should not use this directly, as that will prevent + * them from working with bonding device. Instead, the device passed + * to the add/delete callbacks should be used. + */ struct net_device *dev; netdevice_tracker dev_tracker; + /* This is a private pointer used by the bonding driver (and eventually + * should be moved there). Device drivers should not use it. + * Protected by xfrm_state.lock AND bond.ipsec_lock in most cases, + * except in the .xdo_dev_state_del() flow, where only xfrm_state.lock + * is held. + */ struct net_device *real_dev; unsigned long offload_handle; u8 dir : 2; @@ -236,7 +247,6 @@ struct xfrm_state { /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - struct sock __rcu *encap_sk; /* NAT keepalive */ u32 nat_keepalive_interval; /* seconds */ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 1dcd4d71468a5f..cac56e6b0869b4 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -53,6 +53,8 @@ struct xsk_buff_pool { refcount_t users; struct xdp_umem *umem; struct work_struct work; + /* Protects generic receive in shared and non-shared umem mode. */ + spinlock_t rx_lock; struct list_head free_list; struct list_head xskb_list; u32 heads_cnt; @@ -238,8 +240,8 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb, return orig_addr; offset = xskb->xdp.data - xskb->xdp.data_hard_start; - orig_addr -= offset; offset += pool->headroom; + orig_addr -= offset; return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a822..901353796fbbf6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4790,7 +4790,14 @@ void roce_del_all_netdev_gids(struct ib_device *ib_dev, struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index b098ceadbe74bf..9a70048adbc069 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -223,7 +223,7 @@ struct hdac_driver { struct device_driver driver; int type; const struct hda_device_id *id_table; - int (*match)(struct hdac_device *dev, struct hdac_driver *drv); + int (*match)(struct hdac_device *dev, const struct hdac_driver *drv); void (*unsol_event)(struct hdac_device *dev, unsigned int event); /* fields used by ext bus APIs */ @@ -235,7 +235,7 @@ struct hdac_driver { #define drv_to_hdac_driver(_drv) container_of(_drv, struct hdac_driver, driver) const struct hda_device_id * -hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv); +hdac_get_device_id(struct hdac_device *hdev, const struct hdac_driver *drv); /* * Bus verb operators diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 4c7a40e149a594..7de390022ac268 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -22,6 +22,7 @@ void snd_hdac_ext_bus_ppcap_enable(struct hdac_bus *chip, bool enable); void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_bus *chip, bool enable); int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus); +struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_id(struct hdac_bus *bus, u32 id); struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_addr(struct hdac_bus *bus, int addr); struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_name(struct hdac_bus *bus, const char *codec_name); @@ -97,12 +98,17 @@ struct hdac_ext_link { void __iomem *ml_addr; /* link output stream reg pointer */ u32 lcaps; /* link capablities */ u16 lsdiid; /* link sdi identifier */ + u32 id; + u8 slcount; int ref_count; struct list_head list; }; +#define hdac_ext_link_alt(link) ((link)->lcaps & AZX_ML_HDA_LCAP_ALT) +#define hdac_ext_link_ofls(link) ((link)->lcaps & AZX_ML_HDA_LCAP_OFLS) + int snd_hdac_ext_bus_link_power_up(struct hdac_ext_link *hlink); int snd_hdac_ext_bus_link_power_down(struct hdac_ext_link *hlink); int snd_hdac_ext_bus_link_power_up_all(struct hdac_bus *bus); diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 8becb450488736..8582d22f381847 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -1404,6 +1404,8 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_s #define snd_pcm_lib_mmap_iomem NULL #endif +void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime); + /** * snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer * @dma: DMA number diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 36a4a1e1d8ca28..d8bd5d37131aa0 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -226,6 +226,7 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card, bool playback); int asoc_sdw_cs_spk_feedback_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai); +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix); /* MAXIM codec support */ int asoc_sdw_maxim_init(struct snd_soc_card *card, diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index d099ae27f8491a..682499b871eac4 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -19,7 +19,7 @@ struct ump_cvt_to_ump_bank { /* context for converting from MIDI1 byte stream to UMP packet */ struct ump_cvt_to_ump { /* MIDI1 intermediate buffer */ - unsigned char buf[4]; + unsigned char buf[6]; /* up to 6 bytes for SysEx */ int len; int cmd_bytes; diff --git a/include/sound/ump_msg.h b/include/sound/ump_msg.h index 72f60ddfea7535..9556b4755a1ed8 100644 --- a/include/sound/ump_msg.h +++ b/include/sound/ump_msg.h @@ -604,7 +604,7 @@ struct snd_ump_stream_msg_ep_info { } __packed; /* UMP Stream Message: Device Info Notification (128bit) */ -struct snd_ump_stream_msg_devince_info { +struct snd_ump_stream_msg_device_info { #ifdef __BIG_ENDIAN_BITFIELD /* 0 */ u32 type:4; @@ -754,7 +754,7 @@ struct snd_ump_stream_msg_fb_name { union snd_ump_stream_msg { struct snd_ump_stream_msg_ep_discovery ep_discovery; struct snd_ump_stream_msg_ep_info ep_info; - struct snd_ump_stream_msg_devince_info device_info; + struct snd_ump_stream_msg_device_info device_info; struct snd_ump_stream_msg_stream_cfg stream_cfg; struct snd_ump_stream_msg_fb_discovery fb_discovery; struct snd_ump_stream_msg_fb_info fb_info; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 549ab3b4196180..3efc00cc1bcd29 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -1928,7 +1928,7 @@ DECLARE_EVENT_CLASS(btrfs__prelim_ref, TP_PROTO(const struct btrfs_fs_info *fs_info, const struct prelim_ref *oldref, const struct prelim_ref *newref, u64 tree_size), - TP_ARGS(fs_info, newref, oldref, tree_size), + TP_ARGS(fs_info, oldref, newref, tree_size), TP_STRUCT__entry_btrfs( __field( u64, root_id ) diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f880835f7695ed..4175eec40048ad 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -30,6 +30,7 @@ EM(netfs_write_trace_dio_write, "DIO-WRITE") \ EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ EM(netfs_write_trace_writeback, "WRITEBACK") \ + EM(netfs_write_trace_writeback_single, "WB-SINGLE") \ E_(netfs_write_trace_writethrough, "WRITETHRU") #define netfs_rreq_origins \ @@ -38,6 +39,7 @@ EM(NETFS_READ_GAPS, "RG") \ EM(NETFS_READ_SINGLE, "R1") \ EM(NETFS_READ_FOR_WRITE, "RW") \ + EM(NETFS_UNBUFFERED_READ, "UR") \ EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITEBACK_SINGLE, "W1") \ @@ -128,17 +130,15 @@ #define netfs_rreq_ref_traces \ EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ - EM(netfs_rreq_trace_get_work, "GET WORK ") \ EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_rreq_trace_put_no_submit, "PUT NO-SUBM") \ EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ - EM(netfs_rreq_trace_put_work, "PUT WORK ") \ - EM(netfs_rreq_trace_put_work_complete, "PUT WORK CP") \ - EM(netfs_rreq_trace_put_work_nq, "PUT WORK NQ") \ + EM(netfs_rreq_trace_put_work_ip, "PUT WORK IP ") \ EM(netfs_rreq_trace_see_work, "SEE WORK ") \ + EM(netfs_rreq_trace_see_work_complete, "SEE WORK CP") \ E_(netfs_rreq_trace_new, "NEW ") #define netfs_sreq_ref_traces \ diff --git a/include/uapi/cxl/features.h b/include/uapi/cxl/features.h index d6db8984889fa6..490606d7694b97 100644 --- a/include/uapi/cxl/features.h +++ b/include/uapi/cxl/features.h @@ -8,10 +8,19 @@ #define _UAPI_CXL_FEATURES_H_ #include -#ifndef __KERNEL__ -#include -#else + +typedef unsigned char __uapi_uuid_t[16]; + +#ifdef __KERNEL__ #include +/* + * Note, __uapi_uuid_t is 1-byte aligned on modern compilers and 4-byte + * aligned on others. Ensure that __uapi_uuid_t in a struct is placed at + * a 4-byte aligned offset, or the structure is packed, to ensure + * consistent padding. + */ +static_assert(sizeof(__uapi_uuid_t) == sizeof(uuid_t)); +#define __uapi_uuid_t uuid_t #endif /* @@ -60,7 +69,7 @@ struct cxl_mbox_get_sup_feats_in { * Get Supported Features Supported Feature Entry */ struct cxl_feat_entry { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 id; __le16 get_feat_size; __le16 set_feat_size; @@ -110,7 +119,7 @@ struct cxl_mbox_get_sup_feats_out { * CXL spec r3.2 section 8.2.9.6.2 Table 8-99 */ struct cxl_mbox_get_feat_in { - uuid_t uuid; + __uapi_uuid_t uuid; __le16 offset; __le16 count; __u8 selection; @@ -143,7 +152,7 @@ enum cxl_get_feat_selection { */ struct cxl_mbox_set_feat_in { __struct_group(cxl_mbox_set_feat_hdr, hdr, /* no attrs */, - uuid_t uuid; + __uapi_uuid_t uuid; __le32 flags; __le16 offset; __u8 version; diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 746c43bd3eb698..2f24103f45339b 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation */ #ifndef __UAPI_IVPU_DRM_H__ @@ -147,7 +147,7 @@ struct drm_ivpu_param { * platform type when executing on a simulator or emulator (read-only) * * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: - * Current PLL frequency (read-only) + * Maximum frequency of the NPU data processing unit clock (read-only) * * %DRM_IVPU_PARAM_NUM_CONTEXTS: * Maximum number of simultaneously existing contexts (read-only) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 616916985e3f30..5e5442d03f8789 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1206,6 +1206,11 @@ struct drm_xe_vm_bind { * there is no need to explicitly set that. When a queue of type * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * The user is expected to query the PXP status via the query ioctl (see + * %DRM_XE_DEVICE_QUERY_PXP_STATUS) and to wait for PXP to be ready before + * attempting to create a queue with this property. When a queue is created + * before PXP is ready, the ioctl will return -EBUSY if init is still in + * progress or -EIO if init failed. * Given that going into a power-saving state kills PXP HWDRM sessions, * runtime PM will be blocked while queues of this type are alive. * All PXP queues will be killed if a PXP invalidation event occurs. diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e10679d..a04afef9efca42 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae677849f..fe5df2a9fe8ee6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2051,7 +2051,8 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -4968,6 +4969,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The @@ -6065,6 +6069,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index fe24c3459ac0f2..30c8dad6214e9a 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -31,11 +31,6 @@ enum ethtool_header_flags { ETHTOOL_FLAG_STATS = 4, }; -enum { - ETHTOOL_PHY_UPSTREAM_TYPE_MAC, - ETHTOOL_PHY_UPSTREAM_TYPE_PHY, -}; - enum ethtool_tcp_data_split { ETHTOOL_TCP_DATA_SPLIT_UNKNOWN, ETHTOOL_TCP_DATA_SPLIT_DISABLED, diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ed2beb4def3f6a..8f1fc12bac4620 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -1010,7 +1010,9 @@ struct io_uring_zcrx_ifq_reg { __u64 region_ptr; /* struct io_uring_region_desc * */ struct io_uring_zcrx_offsets offsets; - __u64 __resv[4]; + __u32 zcrx_id; + __u32 __resv2; + __u64 __resv[3]; }; #ifdef __cplusplus diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index d9d0cb827117db..f030adc462ee74 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -53,43 +53,70 @@ struct landlock_ruleset_attr { __u64 scoped; }; -/* - * sys_landlock_create_ruleset() flags: +/** + * DOC: landlock_create_ruleset_flags + * + * **Flags** * - * - %LANDLOCK_CREATE_RULESET_VERSION: Get the highest supported Landlock ABI - * version. - * - %LANDLOCK_CREATE_RULESET_ERRATA: Get a bitmask of fixed issues. + * %LANDLOCK_CREATE_RULESET_VERSION + * Get the highest supported Landlock ABI version (starting at 1). + * + * %LANDLOCK_CREATE_RULESET_ERRATA + * Get a bitmask of fixed issues for the current Landlock ABI version. */ /* clang-format off */ #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0) #define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) /* clang-format on */ -/* - * sys_landlock_restrict_self() flags: - * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF: Do not create any log related to the - * enforced restrictions. This should only be set by tools launching unknown - * or untrusted programs (e.g. a sandbox tool, container runtime, system - * service manager). Because programs sandboxing themselves should fix any - * denied access, they should not set this flag to be aware of potential - * issues reported by system's logs (i.e. audit). - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON: Explicitly ask to continue - * logging denied access requests even after an :manpage:`execve(2)` call. - * This flag should only be set if all the programs than can legitimately be - * executed will not try to request a denied access (which could spam audit - * logs). - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF: Do not create any log related - * to the enforced restrictions coming from future nested domains created by - * the caller or its descendants. This should only be set according to a - * runtime configuration (i.e. not hardcoded) by programs launching other - * unknown or untrusted programs that may create their own Landlock domains - * and spam logs. The main use case is for container runtimes to enable users - * to mute buggy sandboxed programs for a specific container image. Other use - * cases include sandboxer tools and init systems. Unlike - * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF, - * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF does not impact the requested - * restriction (if any) but only the future nested domains. +/** + * DOC: landlock_restrict_self_flags + * + * **Flags** + * + * By default, denied accesses originating from programs that sandbox themselves + * are logged via the audit subsystem. Such events typically indicate unexpected + * behavior, such as bugs or exploitation attempts. However, to avoid excessive + * logging, access requests denied by a domain not created by the originating + * program are not logged by default. The rationale is that programs should know + * their own behavior, but not necessarily the behavior of other programs. This + * default configuration is suitable for most programs that sandbox themselves. + * For specific use cases, the following flags allow programs to modify this + * default logging behavior. + * + * The %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF and + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON flags apply to the newly created + * Landlock domain. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * Disables logging of denied accesses originating from the thread creating + * the Landlock domain, as well as its children, as long as they continue + * running the same executable code (i.e., without an intervening + * :manpage:`execve(2)` call). This is intended for programs that execute + * unknown code without invoking :manpage:`execve(2)`, such as script + * interpreters. Programs that only sandbox themselves should not set this + * flag, so users can be notified of unauthorized access attempts via system + * logs. + * + * %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * Enables logging of denied accesses after an :manpage:`execve(2)` call, + * providing visibility into unauthorized access attempts by newly executed + * programs within the created Landlock domain. This flag is recommended + * only when all potential executables in the domain are expected to comply + * with the access restrictions, as excessive audit log entries could make + * it more difficult to identify critical events. + * + * %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * Disables logging of denied accesses originating from nested Landlock + * domains created by the caller or its descendants. This flag should be set + * according to runtime configuration, not hardcoded, to avoid suppressing + * important security events. It is useful for container runtimes or + * sandboxing tools that may launch programs which themselves create + * Landlock domains and could otherwise generate excessive logs. Unlike + * ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, this flag only affects + * future nested domains, not the one being created. It can also be used + * with a @ruleset_fd value of -1 to mute subdomain logs without creating a + * domain. */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 95762232e01863..5929030d4e8b06 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 15 +#define TASKSTATS_VERSION 16 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -72,8 +72,6 @@ struct taskstats { */ __u64 cpu_count __attribute__((aligned(8))); __u64 cpu_delay_total; - __u64 cpu_delay_max; - __u64 cpu_delay_min; /* Following four fields atomically updated using task->delays->lock */ @@ -82,14 +80,10 @@ struct taskstats { */ __u64 blkio_count; __u64 blkio_delay_total; - __u64 blkio_delay_max; - __u64 blkio_delay_min; /* Delay waiting for page fault I/O (swap in only) */ __u64 swapin_count; __u64 swapin_delay_total; - __u64 swapin_delay_max; - __u64 swapin_delay_min; /* cpu "wall-clock" running time * On some architectures, value will adjust for cpu time stolen @@ -172,14 +166,11 @@ struct taskstats { /* Delay waiting for memory reclaim */ __u64 freepages_count; __u64 freepages_delay_total; - __u64 freepages_delay_max; - __u64 freepages_delay_min; + /* Delay waiting for thrashing page */ __u64 thrashing_count; __u64 thrashing_delay_total; - __u64 thrashing_delay_max; - __u64 thrashing_delay_min; /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ @@ -187,8 +178,6 @@ struct taskstats { /* v11: Delay waiting for memory compact */ __u64 compact_count; __u64 compact_delay_total; - __u64 compact_delay_max; - __u64 compact_delay_min; /* v12 begin */ __u32 ac_tgid; /* thread group ID */ @@ -210,15 +199,37 @@ struct taskstats { /* v13: Delay waiting for write-protect copy */ __u64 wpcopy_count; __u64 wpcopy_delay_total; - __u64 wpcopy_delay_max; - __u64 wpcopy_delay_min; /* v14: Delay waiting for IRQ/SOFTIRQ */ __u64 irq_count; __u64 irq_delay_total; - __u64 irq_delay_max; - __u64 irq_delay_min; - /* v15: add Delay max */ + + /* v15: add Delay max and Delay min */ + + /* v16: move Delay max and Delay min to the end of taskstat */ + __u64 cpu_delay_max; + __u64 cpu_delay_min; + + __u64 blkio_delay_max; + __u64 blkio_delay_min; + + __u64 swapin_delay_max; + __u64 swapin_delay_min; + + __u64 freepages_delay_max; + __u64 freepages_delay_min; + + __u64 thrashing_delay_max; + __u64 thrashing_delay_min; + + __u64 compact_delay_max; + __u64 compact_delay_min; + + __u64 wpcopy_delay_max; + __u64 wpcopy_delay_min; + + __u64 irq_delay_max; + __u64 irq_delay_min; }; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index b95dd84eef2db2..d4b3e2ae1314d1 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -28,10 +28,10 @@ /* Set current process as the (exclusive) owner of this file descriptor. This * must be called before any other vhost command. Further calls to - * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ + * VHOST_SET_OWNER fail until VHOST_RESET_OWNER is called. */ #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) /* Give up ownership, and reset the device to default values. - * Allows subsequent call to VHOST_OWNER_SET to succeed. */ + * Allows subsequent call to VHOST_SET_OWNER to succeed. */ #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) /* Set up/modify memory layout */ diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 8549d457125714..c691ac210ce2ef 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 /* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 /* Transitional device admin command. */ diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 8a24ed59ec46f0..1c47136d8715f3 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -180,7 +180,8 @@ enum attr_idn { QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D, QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E, QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE = 0x1F, - QUERY_ATTR_IDN_TIMESTAMP = 0x30 + QUERY_ATTR_IDN_TIMESTAMP = 0x30, + QUERY_ATTR_IDN_DEV_LVL_EXCEPTION_ID = 0x34, }; /* Descriptor idn for Query requests */ @@ -390,6 +391,7 @@ enum { UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), + UFS_DEV_LVL_EXCEPTION_SUP = BIT(12), }; #define UFS_DEV_HPB_SUPPORT_VERSION 0x310 @@ -419,6 +421,7 @@ enum { MASK_EE_TOO_LOW_TEMP = BIT(4), MASK_EE_WRITEBOOSTER_EVENT = BIT(5), MASK_EE_PERFORMANCE_THROTTLING = BIT(6), + MASK_EE_DEV_LVL_EXCEPTION = BIT(7), MASK_EE_HEALTH_CRITICAL = BIT(9), }; #define MASK_EE_URGENT_TEMP (MASK_EE_TOO_HIGH_TEMP | MASK_EE_TOO_LOW_TEMP) diff --git a/include/ufs/ufs_quirks.h b/include/ufs/ufs_quirks.h index 41ff44dfa1db3f..f52de5ed1b3b6e 100644 --- a/include/ufs/ufs_quirks.h +++ b/include/ufs/ufs_quirks.h @@ -107,4 +107,10 @@ struct ufs_dev_quirk { */ #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11) +/* + * Some ufs devices may need more time to be in hibern8 before exiting. + * Enable this quirk to give it an additional 100us. + */ +#define UFS_DEVICE_QUIRK_PA_HIBER8TIME (1 << 12) + #endif /* UFS_QUIRKS_H_ */ diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index e3909cc691b2a8..e928ed0265ffc8 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -246,7 +246,7 @@ struct ufs_query { struct ufs_dev_cmd { enum dev_cmd_type type; struct mutex lock; - struct completion *complete; + struct completion complete; struct ufs_query query; }; @@ -968,6 +968,9 @@ enum ufshcd_mcq_opr { * @pm_qos_req: PM QoS request handle * @pm_qos_enabled: flag to check if pm qos is enabled * @critical_health_count: count of critical health exceptions + * @dev_lvl_exception_count: count of device level exceptions since last reset + * @dev_lvl_exception_id: vendor specific information about the + * device level exception event. */ struct ufs_hba { void __iomem *mmio_base; @@ -1138,6 +1141,8 @@ struct ufs_hba { bool pm_qos_enabled; int critical_health_count; + atomic_t dev_lvl_exception_count; + u64 dev_lvl_exception_id; }; /** diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h index eee3d2a4dbe4d3..ff0c06b6513eff 100644 --- a/include/vdso/unaligned.h +++ b/include/vdso/unaligned.h @@ -2,14 +2,14 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ + __get_pptr->x; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ + __put_pptr->x = (val); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ diff --git a/init/Kconfig b/init/Kconfig index dd2ea3b9a79920..c524858118d27e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -87,11 +87,6 @@ config CC_CAN_LINK default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) -config CC_CAN_LINK_STATIC - bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) - # Fixed in GCC 14, 13.3, 12.4 and 11.5 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 config GCC_ASM_GOTO_OUTPUT_BROKEN @@ -140,6 +135,9 @@ config LD_CAN_USE_KEEP_IN_OVERLAY config RUSTC_HAS_COERCE_POINTEE def_bool RUSTC_VERSION >= 108400 +config RUSTC_HAS_UNNECESSARY_TRANSMUTES + def_bool RUSTC_VERSION >= 108800 + config PAHOLE_VERSION int default $(shell,$(srctree)/scripts/pahole-version.sh $(PAHOLE)) @@ -165,6 +163,10 @@ config THREAD_INFO_IN_TASK menu "General setup" +config CACHY + bool "Some kernel tweaks by CachyOS" + default y + config BROKEN bool @@ -1332,6 +1334,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + default y + depends on USER_NS + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say Y. + config PID_NS bool "PID Namespaces" default y @@ -1481,6 +1499,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE with the "-O2" compiler flag for best performance and most helpful compile-time warnings. +config CC_OPTIMIZE_FOR_PERFORMANCE_O3 + bool "Optimize more for performance (-O3)" + help + Choosing this option will pass "-O3" to your compiler to optimize + the kernel yet more for performance. + config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size (-Os)" help @@ -1555,6 +1579,16 @@ config SYSCTL_ARCH_UNALIGN_ALLOW the unaligned access emulation. see arch/parisc/kernel/unaligned.c for reference +config SYSFS_SYSCALL + bool "Sysfs syscall support" + default n + help + sys_sysfs is an obsolete system call no longer supported in libc. + Note that disabling this option is more secure but might break + compatibility with some systems. + + If unsure say N here. + config HAVE_PCSPKR_PLATFORM bool @@ -1599,16 +1633,6 @@ config SGETMASK_SYSCALL If unsure, leave the default option here. -config SYSFS_SYSCALL - bool "Sysfs syscall support" if EXPERT - default y - help - sys_sysfs is an obsolete system call no longer supported in libc. - Note that disabling this option is more secure but might break - compatibility with some systems. - - If unsure say Y here. - config FHANDLE bool "open by fhandle syscalls" if EXPERT select EXPORTFS diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index f60d0a9d505e25..f948917f7f7071 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -86,13 +86,8 @@ static inline void napi_show_fdinfo(struct io_ring_ctx *ctx, } #endif -/* - * Caller holds a reference to the file already, we don't need to do - * anything else to get an extra reference. - */ -__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) +static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { - struct io_ring_ctx *ctx = file->private_data; struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; struct rusage sq_usage; @@ -106,7 +101,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) unsigned int sq_entries, cq_entries; int sq_pid = -1, sq_cpu = -1; u64 sq_total_time = 0, sq_work_time = 0; - bool has_lock; unsigned int i; if (ctx->flags & IORING_SETUP_CQE32) @@ -123,11 +117,11 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "SqMask:\t0x%x\n", sq_mask); seq_printf(m, "SqHead:\t%u\n", sq_head); seq_printf(m, "SqTail:\t%u\n", sq_tail); - seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); + seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head)); seq_printf(m, "CqMask:\t0x%x\n", cq_mask); seq_printf(m, "CqHead:\t%u\n", cq_head); seq_printf(m, "CqTail:\t%u\n", cq_tail); - seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); + seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail)); seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head); sq_entries = min(sq_tail - sq_head, ctx->sq_entries); for (i = 0; i < sq_entries; i++) { @@ -176,28 +170,28 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "\n"); } - /* - * Avoid ABBA deadlock between the seq lock and the io_uring mutex, - * since fdinfo case grabs it in the opposite direction of normal use - * cases. If we fail to get the lock, we just don't iterate any - * structures that could be going away outside the io_uring mutex. - */ - has_lock = mutex_trylock(&ctx->uring_lock); - - if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { + if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sq = ctx->sq_data; + struct task_struct *tsk; + rcu_read_lock(); + tsk = rcu_dereference(sq->thread); /* * sq->thread might be NULL if we raced with the sqpoll * thread termination. */ - if (sq->thread) { + if (tsk) { + get_task_struct(tsk); + rcu_read_unlock(); + getrusage(tsk, RUSAGE_SELF, &sq_usage); + put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - getrusage(sq->thread, RUSAGE_SELF, &sq_usage); sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec); sq_work_time = sq->work_time; + } else { + rcu_read_unlock(); } } @@ -206,7 +200,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time); seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time); seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr); - for (i = 0; has_lock && i < ctx->file_table.data.nr; i++) { + for (i = 0; i < ctx->file_table.data.nr; i++) { struct file *f = NULL; if (ctx->file_table.data.nodes[i]) @@ -218,7 +212,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) } } seq_printf(m, "UserBufs:\t%u\n", ctx->buf_table.nr); - for (i = 0; has_lock && i < ctx->buf_table.nr; i++) { + for (i = 0; i < ctx->buf_table.nr; i++) { struct io_mapped_ubuf *buf = NULL; if (ctx->buf_table.nodes[i]) @@ -228,7 +222,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) else seq_printf(m, "%5u: \n", i); } - if (has_lock && !xa_empty(&ctx->personalities)) { + if (!xa_empty(&ctx->personalities)) { unsigned long index; const struct cred *cred; @@ -238,7 +232,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) } seq_puts(m, "PollList:\n"); - for (i = 0; has_lock && i < (1U << ctx->cancel_table.hash_bits); i++) { + for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; struct io_kiocb *req; @@ -247,9 +241,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) task_work_pending(req->tctx->task)); } - if (has_lock) - mutex_unlock(&ctx->uring_lock); - seq_puts(m, "CqOverflowList:\n"); spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { @@ -262,4 +253,23 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) spin_unlock(&ctx->completion_lock); napi_show_fdinfo(ctx, m); } + +/* + * Caller holds a reference to the file already, we don't need to do + * anything else to get an extra reference. + */ +__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) +{ + struct io_ring_ctx *ctx = file->private_data; + + /* + * Avoid ABBA deadlock between the seq lock and the io_uring mutex, + * since fdinfo case grabs it in the opposite direction of normal use + * cases. + */ + if (mutex_trylock(&ctx->uring_lock)) { + __io_uring_show_fdinfo(ctx, m); + mutex_unlock(&ctx->uring_lock); + } +} #endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c6209fe44cb175..e5466f65682699 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -448,24 +448,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) return req->link; } -static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) -{ - if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) - return NULL; - return __io_prep_linked_timeout(req); -} - -static noinline void __io_arm_ltimeout(struct io_kiocb *req) -{ - io_queue_linked_timeout(__io_prep_linked_timeout(req)); -} - -static inline void io_arm_ltimeout(struct io_kiocb *req) -{ - if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) - __io_arm_ltimeout(req); -} - static void io_prep_async_work(struct io_kiocb *req) { const struct io_issue_def *def = &io_issue_defs[req->opcode]; @@ -518,7 +500,6 @@ static void io_prep_async_link(struct io_kiocb *req) static void io_queue_iowq(struct io_kiocb *req) { - struct io_kiocb *link = io_prep_linked_timeout(req); struct io_uring_task *tctx = req->tctx; BUG_ON(!tctx); @@ -543,8 +524,6 @@ static void io_queue_iowq(struct io_kiocb *req) trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); io_wq_enqueue(tctx->io_wq, &req->work); - if (link) - io_queue_linked_timeout(link); } static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw) @@ -558,18 +537,30 @@ void io_req_queue_iowq(struct io_kiocb *req) io_req_task_work_add(req); } +static bool io_drain_defer_seq(struct io_kiocb *req, u32 seq) +{ + struct io_ring_ctx *ctx = req->ctx; + + return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; +} + static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx) { + bool drain_seen = false, first = true; + spin_lock(&ctx->completion_lock); while (!list_empty(&ctx->defer_list)) { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); - if (req_need_defer(de->req, de->seq)) + drain_seen |= de->req->flags & REQ_F_IO_DRAIN; + if ((drain_seen || first) && io_drain_defer_seq(de->req, de->seq)) break; + list_del_init(&de->list); io_req_task_queue(de->req); kfree(de); + first = false; } spin_unlock(&ctx->completion_lock); } @@ -657,6 +648,7 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool dying) * to care for a non-real case. */ if (need_resched()) { + ctx->cqe_sentinel = ctx->cqe_cached; io_cq_unlock_post(ctx); mutex_unlock(&ctx->uring_lock); cond_resched(); @@ -869,13 +861,26 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) struct io_ring_ctx *ctx = req->ctx; bool posted; + /* + * If multishot has already posted deferred completions, ensure that + * those are flushed first before posting this one. If not, CQEs + * could get reordered. + */ + if (!wq_list_empty(&ctx->submit_state.compl_reqs)) + __io_submit_flush_completions(ctx); + lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); - __io_cq_lock(ctx); - posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + if (!ctx->lockless_cq) { + spin_lock(&ctx->completion_lock); + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + spin_unlock(&ctx->completion_lock); + } else { + posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); + } + ctx->submit_state.cq_flush = true; - __io_cq_unlock_post(ctx); return posted; } @@ -1078,21 +1083,22 @@ static __cold void __io_fallback_tw(struct llist_node *node, bool sync) while (node) { req = container_of(node, struct io_kiocb, io_task_work.node); node = node->next; - if (sync && last_ctx != req->ctx) { + if (last_ctx != req->ctx) { if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } last_ctx = req->ctx; percpu_ref_get(&last_ctx->refs); } - if (llist_add(&req->io_task_work.node, - &req->ctx->fallback_llist)) - schedule_delayed_work(&req->ctx->fallback_work, 1); + if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist)) + schedule_delayed_work(&last_ctx->fallback_work, 1); } if (last_ctx) { - flush_delayed_work(&last_ctx->fallback_work); + if (sync) + flush_delayed_work(&last_ctx->fallback_work); percpu_ref_put(&last_ctx->refs); } } @@ -1718,15 +1724,22 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def, return !!req->file; } +#define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT) + static inline int __io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags, const struct io_issue_def *def) { const struct cred *creds = NULL; + struct io_kiocb *link = NULL; int ret; - if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) - creds = override_creds(req->creds); + if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) { + if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) + creds = override_creds(req->creds); + if (req->flags & REQ_F_ARM_LTIMEOUT) + link = __io_prep_linked_timeout(req); + } if (!def->audit_skip) audit_uring_entry(req->opcode); @@ -1736,8 +1749,12 @@ static inline int __io_issue_sqe(struct io_kiocb *req, if (!def->audit_skip) audit_uring_exit(!ret, ret); - if (creds) - revert_creds(creds); + if (unlikely(creds || link)) { + if (creds) + revert_creds(creds); + if (link) + io_queue_linked_timeout(link); + } return ret; } @@ -1763,7 +1780,6 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) if (ret == IOU_ISSUE_SKIP_COMPLETE) { ret = 0; - io_arm_ltimeout(req); /* If the op doesn't have a file, we're not polling for it */ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) @@ -1818,8 +1834,6 @@ void io_wq_submit_work(struct io_wq_work *work) else req_ref_get(req); - io_arm_ltimeout(req); - /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) { fail: @@ -1935,15 +1949,11 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd) static void io_queue_async(struct io_kiocb *req, int ret) __must_hold(&req->ctx->uring_lock) { - struct io_kiocb *linked_timeout; - if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { io_req_defer_failed(req, ret); return; } - linked_timeout = io_prep_linked_timeout(req); - switch (io_arm_poll_handler(req, 0)) { case IO_APOLL_READY: io_kbuf_recycle(req, 0); @@ -1956,9 +1966,6 @@ static void io_queue_async(struct io_kiocb *req, int ret) case IO_APOLL_OK: break; } - - if (linked_timeout) - io_queue_linked_timeout(linked_timeout); } static inline void io_queue_sqe(struct io_kiocb *req) @@ -2906,7 +2913,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) struct task_struct *tsk; io_sq_thread_park(sqd); - tsk = sqd->thread; + tsk = sqpoll_task_locked(sqd); if (tsk && tsk->io_uring && tsk->io_uring->io_wq) io_wq_cancel_cb(tsk->io_uring->io_wq, io_cancel_ctx_cb, ctx, true); @@ -3143,7 +3150,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) s64 inflight; DEFINE_WAIT(wait); - WARN_ON_ONCE(sqd && sqd->thread != current); + WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); if (!current->io_uring) return; diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 09810925967138..953d5e74256916 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -504,6 +504,8 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe p->nbufs = tmp; p->addr = READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len); + if (!p->len) + return -EINVAL; if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, &size)) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 76fcc79656b002..07f8a5cbd37ec7 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -116,7 +116,7 @@ static int io_region_init_ptr(struct io_mapped_region *mr) void *ptr; if (io_check_coalesce_buffer(mr->pages, mr->nr_pages, &ifd)) { - if (ifd.nr_folios == 1) { + if (ifd.nr_folios == 1 && !PageHighMem(mr->pages[0])) { mr->ptr = page_address(mr->pages[0]); return 0; } diff --git a/io_uring/net.c b/io_uring/net.c index 24040bc3916a1b..27f37fa2ef7936 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -827,18 +827,24 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, cflags |= IORING_CQE_F_SOCK_NONEMPTY; if (sr->flags & IORING_RECVSEND_BUNDLE) { - cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), + size_t this_ret = *ret - sr->done_io; + + cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret), issue_flags); if (sr->retry) cflags = req->cqe.flags | (cflags & CQE_F_MASK); /* bundle with no more immediate buffers, we're done */ if (req->flags & REQ_F_BL_EMPTY) goto finish; - /* if more is available, retry and append to this one */ - if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) { + /* + * If more is available AND it was a full transfer, retry and + * append to this one + */ + if (!sr->retry && kmsg->msg.msg_inq > 0 && this_ret > 0 && + !iov_iter_count(&kmsg->msg.msg_iter)) { req->cqe.flags = cflags & ~CQE_F_MASK; sr->len = kmsg->msg.msg_inq; - sr->done_io += *ret; + sr->done_io += this_ret; sr->retry = true; return false; } diff --git a/io_uring/register.c b/io_uring/register.c index cc23a4c205cd43..a59589249fce7a 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { + struct task_struct *tsk; + /* * Observe the correct sqd->lock -> ctx->uring_lock * ordering. Fine to drop uring_lock here, we hold @@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); - if (sqd->thread) - tctx = sqd->thread->io_uring; + tsk = sqpoll_task_locked(sqd); + if (tsk) + tctx = tsk->io_uring; } } else { tctx = current->io_uring; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 5e64a8bb30a451..f80a77c4973f30 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -175,6 +175,18 @@ void io_rsrc_cache_free(struct io_ring_ctx *ctx) io_alloc_cache_free(&ctx->imu_cache, kfree); } +static void io_clear_table_tags(struct io_rsrc_data *data) +{ + int i; + + for (i = 0; i < data->nr; i++) { + struct io_rsrc_node *node = data->nodes[i]; + + if (node) + node->tag = 0; + } +} + __cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data) { @@ -583,6 +595,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, io_file_table_set_alloc_range(ctx, 0, ctx->file_table.data.nr); return 0; fail: + io_clear_table_tags(&ctx->file_table.data); io_sqe_files_unregister(ctx); return ret; } @@ -902,8 +915,10 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, } ctx->buf_table = data; - if (ret) + if (ret) { + io_clear_table_tags(&ctx->buf_table); io_sqe_buffers_unregister(ctx); + } return ret; } @@ -1017,10 +1032,33 @@ static int validate_fixed_range(u64 buf_addr, size_t len, return 0; } +static int io_import_kbuf(int ddir, struct iov_iter *iter, + struct io_mapped_ubuf *imu, size_t len, size_t offset) +{ + size_t count = len + offset; + + iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, count); + iov_iter_advance(iter, offset); + + if (count < imu->len) { + const struct bio_vec *bvec = iter->bvec; + + while (len > bvec->bv_len) { + len -= bvec->bv_len; + bvec++; + } + iter->nr_segs = 1 + bvec - iter->bvec; + } + return 0; +} + static int io_import_fixed(int ddir, struct iov_iter *iter, struct io_mapped_ubuf *imu, u64 buf_addr, size_t len) { + const struct bio_vec *bvec; + size_t folio_mask; + unsigned nr_segs; size_t offset; int ret; @@ -1032,56 +1070,35 @@ static int io_import_fixed(int ddir, struct iov_iter *iter, if (!(imu->dir & (1 << ddir))) return -EFAULT; - /* - * Might not be a start of buffer, set size appropriately - * and advance us to the beginning. - */ offset = buf_addr - imu->ubuf; - iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); - if (offset) { - /* - * Don't use iov_iter_advance() here, as it's really slow for - * using the latter parts of a big fixed buffer - it iterates - * over each segment manually. We can cheat a bit here for user - * registered nodes, because we know that: - * - * 1) it's a BVEC iter, we set it up - * 2) all bvecs are the same in size, except potentially the - * first and last bvec - * - * So just find our index, and adjust the iterator afterwards. - * If the offset is within the first bvec (or the whole first - * bvec, just use iov_iter_advance(). This makes it easier - * since we can just skip the first segment, which may not - * be folio_size aligned. - */ - const struct bio_vec *bvec = imu->bvec; - - /* - * Kernel buffer bvecs, on the other hand, don't necessarily - * have the size property of user registered ones, so we have - * to use the slow iter advance. - */ - if (offset < bvec->bv_len) { - iter->count -= offset; - iter->iov_offset = offset; - } else if (imu->is_kbuf) { - iov_iter_advance(iter, offset); - } else { - unsigned long seg_skip; + if (imu->is_kbuf) + return io_import_kbuf(ddir, iter, imu, len, offset); - /* skip first vec */ - offset -= bvec->bv_len; - seg_skip = 1 + (offset >> imu->folio_shift); + /* + * Don't use iov_iter_advance() here, as it's really slow for + * using the latter parts of a big fixed buffer - it iterates + * over each segment manually. We can cheat a bit here for user + * registered nodes, because we know that: + * + * 1) it's a BVEC iter, we set it up + * 2) all bvecs are the same in size, except potentially the + * first and last bvec + */ + folio_mask = (1UL << imu->folio_shift) - 1; + bvec = imu->bvec; + if (offset >= bvec->bv_len) { + unsigned long seg_skip; - iter->bvec += seg_skip; - iter->nr_segs -= seg_skip; - iter->count -= bvec->bv_len + offset; - iter->iov_offset = offset & ((1UL << imu->folio_shift) - 1); - } + /* skip first vec */ + offset -= bvec->bv_len; + seg_skip = 1 + (offset >> imu->folio_shift); + bvec += seg_skip; + offset &= folio_mask; } - + nr_segs = (offset + len + bvec->bv_offset + folio_mask) >> imu->folio_shift; + iov_iter_bvec(iter, ddir, bvec, nr_segs, len); + iter->iov_offset = offset; return 0; } diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index d037cc68e9d3ea..268d2fbe6160c2 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -20,7 +20,7 @@ #include "sqpoll.h" #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 -#define IORING_TW_CAP_ENTRIES_VALUE 8 +#define IORING_TW_CAP_ENTRIES_VALUE 32 enum { IO_SQ_THREAD_SHOULD_STOP = 0, @@ -30,7 +30,7 @@ enum { void io_sq_thread_unpark(struct io_sq_data *sqd) __releases(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(sqpoll_task_locked(sqd) == current); /* * Do the dance but not conditional clear_bit() because it'd race with @@ -46,24 +46,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(data_race(sqd->thread) == current); + struct task_struct *tsk; atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } } void io_sq_thread_stop(struct io_sq_data *sqd) { - WARN_ON_ONCE(sqd->thread == current); + struct task_struct *tsk; + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } mutex_unlock(&sqd->lock); wait_for_completion(&sqd->exited); } @@ -270,7 +278,8 @@ static int io_sq_thread(void *data) /* offload context creation failed, just exit */ if (!current->io_uring) { mutex_lock(&sqd->lock); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); mutex_unlock(&sqd->lock); goto err_out; } @@ -379,7 +388,8 @@ static int io_sq_thread(void *data) io_sq_tw(&retry_list, UINT_MAX); io_uring_cancel_generic(true, sqd); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); io_run_task_work(); @@ -484,7 +494,10 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, goto err_sqpoll; } - sqd->thread = tsk; + mutex_lock(&sqd->lock); + rcu_assign_pointer(sqd->thread, tsk); + mutex_unlock(&sqd->lock); + task_to_put = get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); @@ -495,9 +508,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, ret = -EINVAL; goto err; } - - if (task_to_put) - put_task_struct(task_to_put); return 0; err_sqpoll: complete(&ctx->sq_data->exited); @@ -515,10 +525,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, int ret = -EINVAL; if (sqd) { + struct task_struct *tsk; + io_sq_thread_park(sqd); /* Don't set affinity for a dying thread */ - if (sqd->thread) - ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask); + tsk = sqpoll_task_locked(sqd); + if (tsk) + ret = io_wq_cpu_affinity(tsk->io_uring, mask); io_sq_thread_unpark(sqd); } diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index 4171666b1cf4cc..b83dcdec9765fd 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -8,7 +8,7 @@ struct io_sq_data { /* ctx's that are using this sqd */ struct list_head ctx_list; - struct task_struct *thread; + struct task_struct __rcu *thread; struct wait_queue_head wait; unsigned sq_thread_idle; @@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); + +static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) +{ + return rcu_dereference_protected(sqd->thread, + lockdep_is_held(&sqd->lock)); +} diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index a9ea7d29cdd978..430ed620ddfe01 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -254,6 +254,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) return -EOPNOTSUPP; issue_flags |= IO_URING_F_IOPOLL; req->iopoll_completed = 0; + if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) { + /* make sure every req only blocks once */ + req->flags &= ~REQ_F_IOPOLL_STATE; + req->iopoll_start = ktime_get_ns(); + } } ret = file->f_op->uring_cmd(ioucmd, issue_flags); diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index b04686b6b5d249..e6a5142c890ea6 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -17,9 +17,3 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, struct io_uring_task *tctx, bool cancel_all); void io_cmd_cache_free(const void *entry); - -int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, - const struct iovec __user *uvec, - size_t uvec_segs, - int ddir, struct iov_iter *iter, - unsigned issue_flags); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 80d4a6f71d2931..fe86606b9f304d 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -26,6 +26,11 @@ #include "zcrx.h" #include "rsrc.h" +static inline struct io_zcrx_ifq *io_pp_to_ifq(struct page_pool *pp) +{ + return pp->mp_priv; +} + #define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, @@ -46,14 +51,21 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs); + area->is_mapped = false; } static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { int i; + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) + return 0; + for (i = 0; i < area->nia.num_niovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; dma_addr_t dma; @@ -181,7 +193,7 @@ static void io_zcrx_free_area(struct io_zcrx_area *area) kvfree(area->nia.niovs); kvfree(area->user_refs); if (area->pages) { - unpin_user_pages(area->pages, area->nia.num_niovs); + unpin_user_pages(area->pages, area->nr_folios); kvfree(area->pages); } kfree(area); @@ -192,7 +204,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, struct io_uring_zcrx_area_reg *area_reg) { struct io_zcrx_area *area; - int i, ret, nr_pages; + int i, ret, nr_pages, nr_iovs; struct iovec iov; if (area_reg->flags || area_reg->rq_area_token) @@ -220,27 +232,28 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, area->pages = NULL; goto err; } - area->nia.num_niovs = nr_pages; + area->nr_folios = nr_iovs = nr_pages; + area->nia.num_niovs = nr_iovs; - area->nia.niovs = kvmalloc_array(nr_pages, sizeof(area->nia.niovs[0]), + area->nia.niovs = kvmalloc_array(nr_iovs, sizeof(area->nia.niovs[0]), GFP_KERNEL | __GFP_ZERO); if (!area->nia.niovs) goto err; - area->freelist = kvmalloc_array(nr_pages, sizeof(area->freelist[0]), + area->freelist = kvmalloc_array(nr_iovs, sizeof(area->freelist[0]), GFP_KERNEL | __GFP_ZERO); if (!area->freelist) goto err; - for (i = 0; i < nr_pages; i++) + for (i = 0; i < nr_iovs; i++) area->freelist[i] = i; - area->user_refs = kvmalloc_array(nr_pages, sizeof(area->user_refs[0]), + area->user_refs = kvmalloc_array(nr_iovs, sizeof(area->user_refs[0]), GFP_KERNEL | __GFP_ZERO); if (!area->user_refs) goto err; - for (i = 0; i < nr_pages; i++) { + for (i = 0; i < nr_iovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; niov->owner = &area->nia; @@ -248,7 +261,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, atomic_set(&area->user_refs[i], 0); } - area->free_count = nr_pages; + area->free_count = nr_iovs; area->ifq = ifq; /* we're only supporting one area per ifq for now */ area->area_id = 0; @@ -274,6 +287,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) ifq->ctx = ctx; spin_lock_init(&ifq->lock); spin_lock_init(&ifq->rq_lock); + mutex_init(&ifq->dma_lock); return ifq; } @@ -323,6 +337,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) put_device(ifq->dev); io_free_rbuf_ring(ifq); + mutex_destroy(&ifq->dma_lock); kfree(ifq); } @@ -353,7 +368,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, return -EFAULT; if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd))) return -EFAULT; - if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) + if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)) || + reg.__resv2 || reg.zcrx_id) return -EINVAL; if (reg.if_rxq == -1 || !reg.rq_entries || reg.flags) return -EINVAL; @@ -393,10 +409,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; get_device(ifq->dev); - ret = io_zcrx_map_area(ifq, ifq->area); - if (ret) - goto err; - mp_param.mp_ops = &io_uring_pp_zc_ops; mp_param.mp_priv = ifq; ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param); @@ -584,7 +596,7 @@ static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq) static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp) { - struct io_zcrx_ifq *ifq = pp->mp_priv; + struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); /* pp should already be ensuring that */ if (unlikely(pp->alloc.count)) @@ -616,7 +628,8 @@ static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) static int io_pp_zc_init(struct page_pool *pp) { - struct io_zcrx_ifq *ifq = pp->mp_priv; + struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); + int ret; if (WARN_ON_ONCE(!ifq)) return -EINVAL; @@ -629,13 +642,17 @@ static int io_pp_zc_init(struct page_pool *pp) if (pp->p.dma_dir != DMA_FROM_DEVICE) return -EOPNOTSUPP; + ret = io_zcrx_map_area(ifq, ifq->area); + if (ret) + return ret; + percpu_ref_get(&ifq->ctx->refs); return 0; } static void io_pp_zc_destroy(struct page_pool *pp) { - struct io_zcrx_ifq *ifq = pp->mp_priv; + struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); struct io_zcrx_area *area = ifq->area; if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs)) @@ -664,6 +681,9 @@ static void io_pp_uninstall(void *mp_priv, struct netdev_rx_queue *rxq) struct io_zcrx_ifq *ifq = mp_priv; io_zcrx_drop_netdev(ifq); + if (ifq->area) + io_zcrx_unmap_area(ifq, ifq->area); + p->mp_ops = NULL; p->mp_priv = NULL; } @@ -790,7 +810,7 @@ static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq, niov = netmem_to_net_iov(frag->netmem); if (niov->pp->mp_ops != &io_uring_pp_zc_ops || - niov->pp->mp_priv != ifq) + io_pp_to_ifq(niov->pp) != ifq) return -EFAULT; if (!io_zcrx_queue_cqe(req, niov, ifq, off + skb_frag_off(frag), len)) diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 706cc7300780d6..f2bc811f022c67 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -15,6 +15,7 @@ struct io_zcrx_area { bool is_mapped; u16 area_id; struct page **pages; + unsigned long nr_folios; /* freelist */ spinlock_t freelist_lock ____cacheline_aligned_in_smp; @@ -26,17 +27,18 @@ struct io_zcrx_ifq { struct io_ring_ctx *ctx; struct io_zcrx_area *area; + spinlock_t rq_lock ____cacheline_aligned_in_smp; struct io_uring *rq_ring; struct io_uring_zcrx_rqe *rqes; - u32 rq_entries; u32 cached_rq_head; - spinlock_t rq_lock; + u32 rq_entries; u32 if_rxq; struct device *dev; struct net_device *netdev; netdevice_tracker netdev_tracker; spinlock_t lock; + struct mutex dma_lock; }; #if defined(CONFIG_IO_URING_ZCRX) diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index ce1435cb08b1ec..e1359db5561e67 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -40,6 +40,27 @@ choice on SMP and NUMA systems and exactly dividing by both PAL and NTSC frame rates for video and multimedia work. + config HZ_500 + bool "500 HZ" + help + 500 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + + config HZ_600 + bool "600 HZ" + help + 600 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + + config HZ_750 + bool "750 HZ" + help + 750 Hz is a balanced timer frequency. Provides fast interactivity + on desktops with good smoothness without increasing CPU power + consumption and sacrificing the battery life on laptops. + config HZ_1000 bool "1000 HZ" help @@ -53,6 +74,9 @@ config HZ default 100 if HZ_100 default 250 if HZ_250 default 300 if HZ_300 + default 500 if HZ_500 + default 600 if HZ_600 + default 750 if HZ_750 default 1000 if HZ_1000 config SCHED_HRTICK diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 54ea59ff8fbeb6..18f87e0dd137be 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -88,7 +88,7 @@ endchoice config PREEMPT_RT bool "Fully Preemptible Kernel (Real-Time)" - depends on EXPERT && ARCH_SUPPORTS_RT && !COMPILE_TEST + depends on ARCH_SUPPORTS_RT && !COMPILE_TEST select PREEMPTION help This option turns the kernel into a real-time kernel by replacing diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 367eaf2c78b7ed..0ebbbe37a60f02 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -347,12 +347,17 @@ static void audit_remove_parent_watches(struct audit_parent *parent) /* Get path information necessary for adding watches. */ static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct dentry *d = kern_path_locked(watch->path, parent); + struct dentry *d; + + d = kern_path_locked_negative(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - /* update watch filter fields */ - watch->dev = d->d_sb->s_dev; - watch->ino = d_backing_inode(d)->i_ino; + + if (d_is_positive(d)) { + /* update watch filter fields */ + watch->dev = d->d_sb->s_dev; + watch->ino = d_backing_inode(d)->i_ino; + } inode_unlock(d_backing_inode(parent->dentry)); dput(d); @@ -418,11 +423,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - if (ret && ret != -ENOENT) { + if (ret) { audit_put_watch(watch); return ret; } - ret = 0; /* either find an old parent or attach a new one */ parent = audit_find_parent(d_backing_inode(parent_path.dentry)); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba6b6118cf5040..c20babbf998f4e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2358,8 +2358,8 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, return 0; } -bool bpf_prog_map_compatible(struct bpf_map *map, - const struct bpf_prog *fp) +static bool __bpf_prog_map_compatible(struct bpf_map *map, + const struct bpf_prog *fp) { enum bpf_prog_type prog_type = resolve_prog_type(fp); bool ret; @@ -2368,14 +2368,6 @@ bool bpf_prog_map_compatible(struct bpf_map *map, if (fp->kprobe_override) return false; - /* XDP programs inserted into maps are not guaranteed to run on - * a particular netdev (and can run outside driver context entirely - * in the case of devmap and cpumap). Until device checks - * are implemented, prohibit adding dev-bound programs to program maps. - */ - if (bpf_prog_is_dev_bound(aux)) - return false; - spin_lock(&map->owner.lock); if (!map->owner.type) { /* There's no owner yet where we could check for @@ -2409,6 +2401,19 @@ bool bpf_prog_map_compatible(struct bpf_map *map, return ret; } +bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) +{ + /* XDP programs inserted into maps are not guaranteed to run on + * a particular netdev (and can run outside driver context entirely + * in the case of devmap and cpumap). Until device checks + * are implemented, prohibit adding dev-bound programs to program maps. + */ + if (bpf_prog_is_dev_bound(fp->aux)) + return false; + + return __bpf_prog_map_compatible(map, fp); +} + static int bpf_check_tail_call(const struct bpf_prog *fp) { struct bpf_prog_aux *aux = fp->aux; @@ -2421,7 +2426,7 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) if (!map_type_contains_progs(map)) continue; - if (!bpf_prog_map_compatible(map, fp)) { + if (!__bpf_prog_map_compatible(map, fp)) { ret = -EINVAL; goto out; } @@ -2469,7 +2474,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) /* In case of BPF to BPF calls, verifier did all the prep * work with regards to JITing, etc. */ - bool jit_needed = false; + bool jit_needed = fp->jit_requested; if (fp->bpf_func) goto finalize; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5a5adc66b8e224..92b606d600207c 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2189,7 +2189,7 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_ b = &htab->buckets[i]; rcu_read_lock(); head = &b->head; - hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { + hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { key = elem->key; if (is_percpu) { /* current cpu value for percpu map */ diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index 2fdf3c978db1bd..774e5a5388112e 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -89,5 +89,6 @@ static void __exit fini(void) } late_initcall(load); module_exit(fini); +MODULE_IMPORT_NS("BPF_INTERNAL"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Embedded BPF programs for introspection in bpffs"); diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index d869f51ea93a0e..9a5f94371e5065 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -9,13 +9,14 @@ #include #include #include "percpu_freelist.h" +#include #define QUEUE_STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) struct bpf_queue_stack { struct bpf_map map; - raw_spinlock_t lock; + rqspinlock_t lock; u32 head, tail; u32 size; /* max_entries + 1 */ @@ -78,7 +79,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) qs->size = size; - raw_spin_lock_init(&qs->lock); + raw_res_spin_lock_init(&qs->lock); return &qs->map; } @@ -98,12 +99,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete) int err = 0; void *ptr; - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, flags)) - return -EBUSY; - } else { - raw_spin_lock_irqsave(&qs->lock, flags); - } + if (raw_res_spin_lock_irqsave(&qs->lock, flags)) + return -EBUSY; if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -120,7 +117,7 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete) } out: - raw_spin_unlock_irqrestore(&qs->lock, flags); + raw_res_spin_unlock_irqrestore(&qs->lock, flags); return err; } @@ -133,12 +130,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) void *ptr; u32 index; - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, flags)) - return -EBUSY; - } else { - raw_spin_lock_irqsave(&qs->lock, flags); - } + if (raw_res_spin_lock_irqsave(&qs->lock, flags)) + return -EBUSY; if (queue_stack_map_is_empty(qs)) { memset(value, 0, qs->map.value_size); @@ -157,7 +150,7 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) qs->head = index; out: - raw_spin_unlock_irqrestore(&qs->lock, flags); + raw_res_spin_unlock_irqrestore(&qs->lock, flags); return err; } @@ -203,12 +196,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, if (flags & BPF_NOEXIST || flags > BPF_EXIST) return -EINVAL; - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) - return -EBUSY; - } else { - raw_spin_lock_irqsave(&qs->lock, irq_flags); - } + if (raw_res_spin_lock_irqsave(&qs->lock, irq_flags)) + return -EBUSY; if (queue_stack_map_is_full(qs)) { if (!replace) { @@ -227,7 +216,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, qs->head = 0; out: - raw_spin_unlock_irqrestore(&qs->lock, irq_flags); + raw_res_spin_unlock_irqrestore(&qs->lock, irq_flags); return err; } diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 1499d8caa9a351..719d73299397b5 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -11,6 +11,7 @@ #include #include #include +#include #define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE) @@ -29,7 +30,7 @@ struct bpf_ringbuf { u64 mask; struct page **pages; int nr_pages; - raw_spinlock_t spinlock ____cacheline_aligned_in_smp; + rqspinlock_t spinlock ____cacheline_aligned_in_smp; /* For user-space producer ring buffers, an atomic_t busy bit is used * to synchronize access to the ring buffers in the kernel, rather than * the spinlock that is used for kernel-producer ring buffers. This is @@ -173,7 +174,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) if (!rb) return NULL; - raw_spin_lock_init(&rb->spinlock); + raw_res_spin_lock_init(&rb->spinlock); atomic_set(&rb->busy, 0); init_waitqueue_head(&rb->waitq); init_irq_work(&rb->work, bpf_ringbuf_notify); @@ -416,12 +417,8 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) cons_pos = smp_load_acquire(&rb->consumer_pos); - if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&rb->spinlock, flags)) - return NULL; - } else { - raw_spin_lock_irqsave(&rb->spinlock, flags); - } + if (raw_res_spin_lock_irqsave(&rb->spinlock, flags)) + return NULL; pend_pos = rb->pending_pos; prod_pos = rb->producer_pos; @@ -446,7 +443,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) */ if (new_prod_pos - cons_pos > rb->mask || new_prod_pos - pend_pos > rb->mask) { - raw_spin_unlock_irqrestore(&rb->spinlock, flags); + raw_res_spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } @@ -458,7 +455,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) /* pairs with consumer's smp_load_acquire() */ smp_store_release(&rb->producer_pos, new_prod_pos); - raw_spin_unlock_irqrestore(&rb->spinlock, flags); + raw_res_spin_unlock_irqrestore(&rb->spinlock, flags); return (void *)hdr + BPF_RINGBUF_HDR_SZ; } diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index b896c4a75a5c9b..338305c8852cf6 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -253,7 +253,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask, }) #else #define RES_CHECK_TIMEOUT(ts, ret, mask) \ - ({ (ret) = check_timeout(&(ts)); }) + ({ (ret) = check_timeout((lock), (mask), &(ts)); }) #endif /* diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9794446bc8c6ca..64c3393e827000 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1583,7 +1583,7 @@ struct bpf_map *bpf_map_get(u32 ufd) return map; } -EXPORT_SYMBOL(bpf_map_get); +EXPORT_SYMBOL_NS(bpf_map_get, "BPF_INTERNAL"); struct bpf_map *bpf_map_get_with_uref(u32 ufd) { @@ -3364,7 +3364,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) bpf_link_inc(link); return link; } -EXPORT_SYMBOL(bpf_link_get_from_fd); +EXPORT_SYMBOL_NS(bpf_link_get_from_fd, "BPF_INTERNAL"); static void bpf_tracing_link_release(struct bpf_link *link) { @@ -6020,7 +6020,7 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) return ____bpf_sys_bpf(cmd, attr, size); } } -EXPORT_SYMBOL(kern_sys_bpf); +EXPORT_SYMBOL_NS(kern_sys_bpf, "BPF_INTERNAL"); static const struct bpf_func_proto bpf_sys_bpf_proto = { .func = bpf_sys_bpf, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 54c6953a8b84c2..efa70141171290 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4413,8 +4413,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * before it would be equally necessary to * propagate it to dreg. */ - bt_set_reg(bt, dreg); - bt_set_reg(bt, sreg); + if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK)) + bt_set_reg(bt, sreg); + if (!hist || !(hist->flags & INSN_F_DST_REG_STACK)) + bt_set_reg(bt, dreg); } else if (BPF_SRC(insn->code) == BPF_K) { /* dreg K * Only dreg still needs precision before @@ -16377,6 +16379,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *eq_branch_regs; struct linked_regs linked_regs = {}; u8 opcode = BPF_OP(insn->code); + int insn_flags = 0; bool is_jmp32; int pred = -1; int err; @@ -16435,6 +16438,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, insn->src_reg); return -EACCES; } + + if (src_reg->type == PTR_TO_STACK) + insn_flags |= INSN_F_SRC_REG_STACK; } else { if (insn->src_reg != BPF_REG_0) { verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); @@ -16446,6 +16452,14 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, __mark_reg_known(src_reg, insn->imm); } + if (dst_reg->type == PTR_TO_STACK) + insn_flags |= INSN_F_DST_REG_STACK; + if (insn_flags) { + err = push_insn_history(env, this_branch, insn_flags, 0); + if (err) + return err; + } + is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); if (pred >= 0) { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 27f08aa17b56d9..63e5b90da1f303 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -90,7 +90,7 @@ DEFINE_MUTEX(cgroup_mutex); DEFINE_SPINLOCK(css_set_lock); -#ifdef CONFIG_PROVE_RCU +#if (defined CONFIG_PROVE_RCU || defined CONFIG_LOCKDEP) EXPORT_SYMBOL_GPL(cgroup_mutex); EXPORT_SYMBOL_GPL(css_set_lock); #endif @@ -2353,9 +2353,37 @@ static struct file_system_type cgroup2_fs_type = { }; #ifdef CONFIG_CPUSETS_V1 +enum cpuset_param { + Opt_cpuset_v2_mode, +}; + +static const struct fs_parameter_spec cpuset_fs_parameters[] = { + fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), + {} +}; + +static int cpuset_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, cpuset_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_cpuset_v2_mode: + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; + return 0; + } + return -EINVAL; +} + static const struct fs_context_operations cpuset_fs_context_ops = { .get_tree = cgroup1_get_tree, .free = cgroup_fs_context_free, + .parse_param = cpuset_parse_param, }; /* @@ -2392,6 +2420,7 @@ static int cpuset_init_fs_context(struct fs_context *fc) static struct file_system_type cpuset_fs_type = { .name = "cpuset", .init_fs_context = cpuset_init_fs_context, + .parameters = cpuset_fs_parameters, .fs_flags = FS_USERNS_MOUNT, }; #endif @@ -5923,6 +5952,12 @@ static void kill_css(struct cgroup_subsys_state *css) if (css->flags & CSS_DYING) return; + /* + * Call css_killed(), if defined, before setting the CSS_DYING flag + */ + if (css->ss->css_killed) + css->ss->css_killed(css); + css->flags |= CSS_DYING; /* diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h index 976a8bc3ff6031..383963e28ac69c 100644 --- a/kernel/cgroup/cpuset-internal.h +++ b/kernel/cgroup/cpuset-internal.h @@ -33,6 +33,7 @@ enum prs_errcode { PERR_CPUSEMPTY, PERR_HKEEPING, PERR_ACCESS, + PERR_REMOTE, }; /* bits in struct cpuset flags field */ diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 39c1fc643d770d..24b70ea3e6ce97 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -61,10 +61,17 @@ static const char * const perr_strings[] = { [PERR_CPUSEMPTY] = "cpuset.cpus and cpuset.cpus.exclusive are empty", [PERR_HKEEPING] = "partition config conflicts with housekeeping setup", [PERR_ACCESS] = "Enable partition not permitted", + [PERR_REMOTE] = "Have remote partition underneath", }; /* - * Exclusive CPUs distributed out to sub-partitions of top_cpuset + * For local partitions, update to subpartitions_cpus & isolated_cpus is done + * in update_parent_effective_cpumask(). For remote partitions, it is done in + * the remote_partition_*() and remote_cpus_update() helpers. + */ +/* + * Exclusive CPUs distributed out to local or remote sub-partitions of + * top_cpuset */ static cpumask_var_t subpartitions_cpus; @@ -86,7 +93,6 @@ static struct list_head remote_children; * A flag to force sched domain rebuild at the end of an operation. * It can be set in * - update_partition_sd_lb() - * - remote_partition_check() * - update_cpumasks_hier() * - cpuset_update_flag() * - cpuset_hotplug_update_tasks() @@ -1089,9 +1095,14 @@ void cpuset_reset_sched_domains(void) * * Iterate through each task of @cs updating its cpus_allowed to the * effective cpuset's. As this function is called with cpuset_mutex held, - * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask() - * is used instead of effective_cpus to make sure all offline CPUs are also - * included as hotplug code won't update cpumasks for tasks in top_cpuset. + * cpuset membership stays stable. + * + * For top_cpuset, task_cpu_possible_mask() is used instead of effective_cpus + * to make sure all offline CPUs are also included as hotplug code won't + * update cpumasks for tasks in top_cpuset. + * + * As task_cpu_possible_mask() can be task dependent in arm64, we have to + * do cpu masking per task instead of doing it once for all. */ void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) { @@ -1105,9 +1116,11 @@ void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) if (top_cs) { /* - * Percpu kthreads in top_cpuset are ignored + * PF_NO_SETAFFINITY tasks are ignored. + * All per cpu kthreads should have PF_NO_SETAFFINITY + * flag set, see kthread_set_per_cpu(). */ - if (kthread_is_per_cpu(task)) + if (task->flags & PF_NO_SETAFFINITY) continue; cpumask_andnot(new_cpus, possible_mask, subpartitions_cpus); } else { @@ -1151,7 +1164,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, * * Return: 0 if successful, an error code otherwise */ -static int update_partition_exclusive(struct cpuset *cs, int new_prs) +static int update_partition_exclusive_flag(struct cpuset *cs, int new_prs) { bool exclusive = (new_prs > PRS_MEMBER); @@ -1234,12 +1247,12 @@ static void reset_partition_data(struct cpuset *cs) } /* - * partition_xcpus_newstate - Exclusive CPUs state change + * isolated_cpus_update - Update the isolated_cpus mask * @old_prs: old partition_root_state * @new_prs: new partition_root_state * @xcpus: exclusive CPUs with state change */ -static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *xcpus) +static void isolated_cpus_update(int old_prs, int new_prs, struct cpumask *xcpus) { WARN_ON_ONCE(old_prs == new_prs); if (new_prs == PRS_ISOLATED) @@ -1273,8 +1286,8 @@ static bool partition_xcpus_add(int new_prs, struct cpuset *parent, isolcpus_updated = (new_prs != parent->partition_root_state); if (isolcpus_updated) - partition_xcpus_newstate(parent->partition_root_state, new_prs, - xcpus); + isolated_cpus_update(parent->partition_root_state, new_prs, + xcpus); cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus); return isolcpus_updated; @@ -1304,8 +1317,8 @@ static bool partition_xcpus_del(int old_prs, struct cpuset *parent, isolcpus_updated = (old_prs != parent->partition_root_state); if (isolcpus_updated) - partition_xcpus_newstate(old_prs, parent->partition_root_state, - xcpus); + isolated_cpus_update(old_prs, parent->partition_root_state, + xcpus); cpumask_and(xcpus, xcpus, cpu_active_mask); cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); @@ -1340,20 +1353,57 @@ EXPORT_SYMBOL_GPL(cpuset_cpu_is_isolated); * compute_effective_exclusive_cpumask - compute effective exclusive CPUs * @cs: cpuset * @xcpus: effective exclusive CPUs value to be set - * Return: true if xcpus is not empty, false otherwise. + * @real_cs: the real cpuset (can be NULL) + * Return: 0 if there is no sibling conflict, > 0 otherwise * - * Starting with exclusive_cpus (cpus_allowed if exclusive_cpus is not set), - * it must be a subset of parent's effective_xcpus. + * If exclusive_cpus isn't explicitly set or a real_cs is provided, we have to + * scan the sibling cpusets and exclude their exclusive_cpus or effective_xcpus + * as well. The provision of real_cs means that a cpumask is being changed and + * the given cs is a trial one. */ -static bool compute_effective_exclusive_cpumask(struct cpuset *cs, - struct cpumask *xcpus) +static int compute_effective_exclusive_cpumask(struct cpuset *cs, + struct cpumask *xcpus, + struct cpuset *real_cs) { + struct cgroup_subsys_state *css; struct cpuset *parent = parent_cs(cs); + struct cpuset *sibling; + int retval = 0; if (!xcpus) xcpus = cs->effective_xcpus; - return cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus); + cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus); + + if (!real_cs) { + if (!cpumask_empty(cs->exclusive_cpus)) + return 0; + } else { + cs = real_cs; + } + + /* + * Exclude exclusive CPUs from siblings + */ + rcu_read_lock(); + cpuset_for_each_child(sibling, css, parent) { + if (sibling == cs) + continue; + + if (!cpumask_empty(sibling->exclusive_cpus) && + cpumask_intersects(xcpus, sibling->exclusive_cpus)) { + cpumask_andnot(xcpus, xcpus, sibling->exclusive_cpus); + retval++; + continue; + } + if (!cpumask_empty(sibling->effective_xcpus) && + cpumask_intersects(xcpus, sibling->effective_xcpus)) { + cpumask_andnot(xcpus, xcpus, sibling->effective_xcpus); + retval++; + } + } + rcu_read_unlock(); + return retval; } static inline bool is_remote_partition(struct cpuset *cs) @@ -1395,7 +1445,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, * remote partition root underneath it, its exclusive_cpus must * have overlapped with subpartitions_cpus. */ - compute_effective_exclusive_cpumask(cs, tmp->new_cpus); + compute_effective_exclusive_cpumask(cs, tmp->new_cpus, NULL); if (cpumask_empty(tmp->new_cpus) || cpumask_intersects(tmp->new_cpus, subpartitions_cpus) || cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus)) @@ -1404,8 +1454,11 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, spin_lock_irq(&callback_lock); isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus); list_add(&cs->remote_sibling, &remote_children); + cpumask_copy(cs->effective_xcpus, tmp->new_cpus); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); + cpuset_force_rebuild(); + cs->prs_err = 0; /* * Propagate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1428,20 +1481,24 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) { bool isolcpus_updated; - compute_effective_exclusive_cpumask(cs, tmp->new_cpus); WARN_ON_ONCE(!is_remote_partition(cs)); - WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); + WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); spin_lock_irq(&callback_lock); list_del_init(&cs->remote_sibling); isolcpus_updated = partition_xcpus_del(cs->partition_root_state, - NULL, tmp->new_cpus); - cs->partition_root_state = -cs->partition_root_state; - if (!cs->prs_err) - cs->prs_err = PERR_INVCPUS; + NULL, cs->effective_xcpus); + if (cs->prs_err) + cs->partition_root_state = -cs->partition_root_state; + else + cs->partition_root_state = PRS_MEMBER; + + /* effective_xcpus may need to be changed */ + compute_effective_exclusive_cpumask(cs, NULL, NULL); reset_partition_data(cs); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); + cpuset_force_rebuild(); /* * Propagate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1453,14 +1510,15 @@ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) /* * remote_cpus_update - cpus_exclusive change of remote partition * @cs: the cpuset to be updated - * @newmask: the new effective_xcpus mask + * @xcpus: the new exclusive_cpus mask, if non-NULL + * @excpus: the new effective_xcpus mask * @tmp: temporary masks * * top_cpuset and subpartitions_cpus will be updated or partition can be * invalidated. */ -static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, - struct tmpmasks *tmp) +static void remote_cpus_update(struct cpuset *cs, struct cpumask *xcpus, + struct cpumask *excpus, struct tmpmasks *tmp) { bool adding, deleting; int prs = cs->partition_root_state; @@ -1471,29 +1529,45 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); - if (cpumask_empty(newmask)) + if (cpumask_empty(excpus)) { + cs->prs_err = PERR_CPUSEMPTY; goto invalidate; + } - adding = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus); - deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask); + adding = cpumask_andnot(tmp->addmask, excpus, cs->effective_xcpus); + deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, excpus); /* * Additions of remote CPUs is only allowed if those CPUs are * not allocated to other partitions and there are effective_cpus * left in the top cpuset. */ - if (adding && (!capable(CAP_SYS_ADMIN) || - cpumask_intersects(tmp->addmask, subpartitions_cpus) || - cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))) - goto invalidate; + if (adding) { + if (!capable(CAP_SYS_ADMIN)) + cs->prs_err = PERR_ACCESS; + else if (cpumask_intersects(tmp->addmask, subpartitions_cpus) || + cpumask_subset(top_cpuset.effective_cpus, tmp->addmask)) + cs->prs_err = PERR_NOCPUS; + if (cs->prs_err) + goto invalidate; + } spin_lock_irq(&callback_lock); if (adding) isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask); if (deleting) isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask); + /* + * Need to update effective_xcpus and exclusive_cpus now as + * update_sibling_cpumasks() below may iterate back to the same cs. + */ + cpumask_copy(cs->effective_xcpus, excpus); + if (xcpus) + cpumask_copy(cs->exclusive_cpus, xcpus); spin_unlock_irq(&callback_lock); update_unbound_workqueue_cpumask(isolcpus_updated); + if (adding || deleting) + cpuset_force_rebuild(); /* * Propagate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1506,47 +1580,6 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, remote_partition_disable(cs, tmp); } -/* - * remote_partition_check - check if a child remote partition needs update - * @cs: the cpuset to be updated - * @newmask: the new effective_xcpus mask - * @delmask: temporary mask for deletion (not in tmp) - * @tmp: temporary masks - * - * This should be called before the given cs has updated its cpus_allowed - * and/or effective_xcpus. - */ -static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask, - struct cpumask *delmask, struct tmpmasks *tmp) -{ - struct cpuset *child, *next; - int disable_cnt = 0; - - /* - * Compute the effective exclusive CPUs that will be deleted. - */ - if (!cpumask_andnot(delmask, cs->effective_xcpus, newmask) || - !cpumask_intersects(delmask, subpartitions_cpus)) - return; /* No deletion of exclusive CPUs in partitions */ - - /* - * Searching the remote children list to look for those that will - * be impacted by the deletion of exclusive CPUs. - * - * Since a cpuset must be removed from the remote children list - * before it can go offline and holding cpuset_mutex will prevent - * any change in cpuset status. RCU read lock isn't needed. - */ - lockdep_assert_held(&cpuset_mutex); - list_for_each_entry_safe(child, next, &remote_children, remote_sibling) - if (cpumask_intersects(child->effective_cpus, delmask)) { - remote_partition_disable(child, tmp); - disable_cnt++; - } - if (disable_cnt) - cpuset_force_rebuild(); -} - /* * prstate_housekeeping_conflict - check for partition & housekeeping conflicts * @prstate: partition root state to be checked @@ -1601,7 +1634,7 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus) * The partcmd_update command is used by update_cpumasks_hier() with newmask * NULL and update_cpumask() with newmask set. The partcmd_invalidate is used * by update_cpumask() with NULL newmask. In both cases, the callers won't - * check for error and so partition_root_state and prs_error will be updated + * check for error and so partition_root_state and prs_err will be updated * directly. */ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, @@ -1614,11 +1647,12 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, int old_prs, new_prs; int part_error = PERR_NONE; /* Partition error? */ int subparts_delta = 0; - struct cpumask *xcpus; /* cs effective_xcpus */ int isolcpus_updated = 0; + struct cpumask *xcpus = user_xcpus(cs); bool nocpu; lockdep_assert_held(&cpuset_mutex); + WARN_ON_ONCE(is_remote_partition(cs)); /* * new_prs will only be changed for the partcmd_update and @@ -1626,7 +1660,6 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, */ adding = deleting = false; old_prs = new_prs = cs->partition_root_state; - xcpus = user_xcpus(cs); if (cmd == partcmd_invalidate) { if (is_prs_invalid(old_prs)) @@ -1660,13 +1693,20 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, nocpu = tasks_nocpu_error(parent, cs, xcpus); if ((cmd == partcmd_enable) || (cmd == partcmd_enablei)) { + /* + * Need to call compute_effective_exclusive_cpumask() in case + * exclusive_cpus not set. Sibling conflict should only happen + * if exclusive_cpus isn't set. + */ + xcpus = tmp->new_cpus; + if (compute_effective_exclusive_cpumask(cs, xcpus, NULL)) + WARN_ON_ONCE(!cpumask_empty(cs->exclusive_cpus)); + /* * Enabling partition root is not allowed if its - * effective_xcpus is empty or doesn't overlap with - * parent's effective_xcpus. + * effective_xcpus is empty. */ - if (cpumask_empty(xcpus) || - !cpumask_intersects(xcpus, parent->effective_xcpus)) + if (cpumask_empty(xcpus)) return PERR_INVCPUS; if (prstate_housekeeping_conflict(new_prs, xcpus)) @@ -1679,19 +1719,22 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, if (nocpu) return PERR_NOCPUS; - cpumask_copy(tmp->delmask, xcpus); - deleting = true; - subparts_delta++; + deleting = cpumask_and(tmp->delmask, xcpus, parent->effective_xcpus); + if (deleting) + subparts_delta++; new_prs = (cmd == partcmd_enable) ? PRS_ROOT : PRS_ISOLATED; } else if (cmd == partcmd_disable) { /* - * May need to add cpus to parent's effective_cpus for - * valid partition root. + * May need to add cpus back to parent's effective_cpus + * (and maybe removed from subpartitions_cpus/isolated_cpus) + * for valid partition root. xcpus may contain CPUs that + * shouldn't be removed from the two global cpumasks. */ - adding = !is_prs_invalid(old_prs) && - cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); - if (adding) + if (is_partition_valid(cs)) { + cpumask_copy(tmp->addmask, cs->effective_xcpus); + adding = true; subparts_delta--; + } new_prs = PRS_MEMBER; } else if (newmask) { /* @@ -1701,6 +1744,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_CPUSEMPTY; goto write_error; } + /* Check newmask again, whether cpus are available for parent/cs */ nocpu |= tasks_nocpu_error(parent, cs, newmask); @@ -1829,7 +1873,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, * CPU lists in cs haven't been updated yet. So defer it to later. */ if ((old_prs != new_prs) && (cmd != partcmd_update)) { - int err = update_partition_exclusive(cs, new_prs); + int err = update_partition_exclusive_flag(cs, new_prs); if (err) return err; @@ -1867,7 +1911,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, update_unbound_workqueue_cpumask(isolcpus_updated); if ((old_prs != new_prs) && (cmd == partcmd_update)) - update_partition_exclusive(cs, new_prs); + update_partition_exclusive_flag(cs, new_prs); if (adding || deleting) { cpuset_update_tasks_cpumask(parent, tmp->addmask); @@ -1917,7 +1961,7 @@ static void compute_partition_effective_cpumask(struct cpuset *cs, * 2) All the effective_cpus will be used up and cp * has tasks */ - compute_effective_exclusive_cpumask(cs, new_ecpus); + compute_effective_exclusive_cpumask(cs, new_ecpus, NULL); cpumask_and(new_ecpus, new_ecpus, cpu_active_mask); rcu_read_lock(); @@ -1925,6 +1969,11 @@ static void compute_partition_effective_cpumask(struct cpuset *cs, if (!is_partition_valid(child)) continue; + /* + * There shouldn't be a remote partition underneath another + * partition root. + */ + WARN_ON_ONCE(is_remote_partition(child)); child->prs_err = 0; if (!cpumask_subset(child->effective_xcpus, cs->effective_xcpus)) @@ -1980,32 +2029,39 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, bool remote = is_remote_partition(cp); bool update_parent = false; + old_prs = new_prs = cp->partition_root_state; + /* - * Skip descendent remote partition that acquires CPUs - * directly from top cpuset unless it is cs. + * For child remote partition root (!= cs), we need to call + * remote_cpus_update() if effective_xcpus will be changed. + * Otherwise, we can skip the whole subtree. + * + * remote_cpus_update() will reuse tmp->new_cpus only after + * its value is being processed. */ if (remote && (cp != cs)) { - pos_css = css_rightmost_descendant(pos_css); - continue; - } + compute_effective_exclusive_cpumask(cp, tmp->new_cpus, NULL); + if (cpumask_equal(cp->effective_xcpus, tmp->new_cpus)) { + pos_css = css_rightmost_descendant(pos_css); + continue; + } + rcu_read_unlock(); + remote_cpus_update(cp, NULL, tmp->new_cpus, tmp); + rcu_read_lock(); - /* - * Update effective_xcpus if exclusive_cpus set. - * The case when exclusive_cpus isn't set is handled later. - */ - if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) { - spin_lock_irq(&callback_lock); - compute_effective_exclusive_cpumask(cp, NULL); - spin_unlock_irq(&callback_lock); + /* Remote partition may be invalidated */ + new_prs = cp->partition_root_state; + remote = (new_prs == old_prs); } - old_prs = new_prs = cp->partition_root_state; - if (remote || (is_partition_valid(parent) && - is_partition_valid(cp))) + if (remote || (is_partition_valid(parent) && is_partition_valid(cp))) compute_partition_effective_cpumask(cp, tmp->new_cpus); else compute_effective_cpumask(tmp->new_cpus, cp, parent); + if (remote) + goto get_css; /* Ready to update cpuset data */ + /* * A partition with no effective_cpus is allowed as long as * there is no task associated with it. Call @@ -2025,9 +2081,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, if (is_in_v2_mode() && !remote && cpumask_empty(tmp->new_cpus)) cpumask_copy(tmp->new_cpus, parent->effective_cpus); - if (remote) - goto get_css; - /* * Skip the whole subtree if * 1) the cpumask remains the same, @@ -2088,6 +2141,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, tmp->new_cpus); cp->partition_root_state = new_prs; + if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) + compute_effective_exclusive_cpumask(cp, NULL, NULL); + /* * Make sure effective_xcpus is properly set for a valid * partition root. @@ -2174,7 +2230,14 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, parent); if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus)) continue; + } else if (is_remote_partition(sibling)) { + /* + * Change in a sibling cpuset won't affect a remote + * partition root. + */ + continue; } + if (!css_tryget_online(&sibling->css)) continue; @@ -2231,8 +2294,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * trialcs->effective_xcpus is used as a temporary cpumask * for checking validity of the partition root. */ + trialcs->partition_root_state = PRS_MEMBER; if (!cpumask_empty(trialcs->exclusive_cpus) || is_partition_valid(cs)) - compute_effective_exclusive_cpumask(trialcs, NULL); + compute_effective_exclusive_cpumask(trialcs, NULL, cs); } /* Nothing to do if the cpus didn't change */ @@ -2305,19 +2369,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Call remote_cpus_update() to handle valid remote partition */ if (is_remote_partition(cs)) - remote_cpus_update(cs, xcpus, &tmp); + remote_cpus_update(cs, NULL, xcpus, &tmp); else if (invalidate) update_parent_effective_cpumask(cs, partcmd_invalidate, NULL, &tmp); else update_parent_effective_cpumask(cs, partcmd_update, xcpus, &tmp); - } else if (!cpumask_empty(cs->exclusive_cpus)) { - /* - * Use trialcs->effective_cpus as a temp cpumask - */ - remote_partition_check(cs, trialcs->effective_xcpus, - trialcs->effective_cpus, &tmp); } spin_lock_irq(&callback_lock); @@ -2369,8 +2427,15 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus)) return 0; - if (*buf) - compute_effective_exclusive_cpumask(trialcs, NULL); + if (*buf) { + trialcs->partition_root_state = PRS_MEMBER; + /* + * Reject the change if there is exclusive CPUs conflict with + * the siblings. + */ + if (compute_effective_exclusive_cpumask(trialcs, NULL, cs)) + return -EINVAL; + } /* * Check all the descendants in update_cpumasks_hier() if @@ -2401,8 +2466,8 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (invalidate) remote_partition_disable(cs, &tmp); else - remote_cpus_update(cs, trialcs->effective_xcpus, - &tmp); + remote_cpus_update(cs, trialcs->exclusive_cpus, + trialcs->effective_xcpus, &tmp); } else if (invalidate) { update_parent_effective_cpumask(cs, partcmd_invalidate, NULL, &tmp); @@ -2410,12 +2475,6 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, update_parent_effective_cpumask(cs, partcmd_update, trialcs->effective_xcpus, &tmp); } - } else if (!cpumask_empty(trialcs->exclusive_cpus)) { - /* - * Use trialcs->effective_cpus as a temp cpumask - */ - remote_partition_check(cs, trialcs->effective_xcpus, - trialcs->effective_cpus, &tmp); } spin_lock_irq(&callback_lock); cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus); @@ -2782,7 +2841,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) int err = PERR_NONE, old_prs = cs->partition_root_state; struct cpuset *parent = parent_cs(cs); struct tmpmasks tmpmask; - bool new_xcpus_state = false; + bool isolcpus_updated = false; if (old_prs == new_prs) return 0; @@ -2796,18 +2855,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) if (alloc_cpumasks(NULL, &tmpmask)) return -ENOMEM; - /* - * Setup effective_xcpus if not properly set yet, it will be cleared - * later if partition becomes invalid. - */ - if ((new_prs > 0) && cpumask_empty(cs->exclusive_cpus)) { - spin_lock_irq(&callback_lock); - cpumask_and(cs->effective_xcpus, - cs->cpus_allowed, parent->effective_xcpus); - spin_unlock_irq(&callback_lock); - } - - err = update_partition_exclusive(cs, new_prs); + err = update_partition_exclusive_flag(cs, new_prs); if (err) goto out; @@ -2820,6 +2868,19 @@ static int update_prstate(struct cpuset *cs, int new_prs) goto out; } + /* + * We don't support the creation of a new local partition with + * a remote partition underneath it. This unsupported + * setting can happen only if parent is the top_cpuset because + * a remote partition cannot be created underneath an existing + * local or remote partition. + */ + if ((parent == &top_cpuset) && + cpumask_intersects(cs->exclusive_cpus, subpartitions_cpus)) { + err = PERR_REMOTE; + goto out; + } + /* * If parent is valid partition, enable local partiion. * Otherwise, enable a remote partition. @@ -2835,8 +2896,9 @@ static int update_prstate(struct cpuset *cs, int new_prs) } else if (old_prs && new_prs) { /* * A change in load balance state only, no change in cpumasks. + * Need to update isolated_cpus. */ - new_xcpus_state = true; + isolcpus_updated = true; } else { /* * Switching back to member is always allowed even if it @@ -2860,7 +2922,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) */ if (err) { new_prs = -new_prs; - update_partition_exclusive(cs, new_prs); + update_partition_exclusive_flag(cs, new_prs); } spin_lock_irq(&callback_lock); @@ -2868,14 +2930,18 @@ static int update_prstate(struct cpuset *cs, int new_prs) WRITE_ONCE(cs->prs_err, err); if (!is_partition_valid(cs)) reset_partition_data(cs); - else if (new_xcpus_state) - partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus); + else if (isolcpus_updated) + isolated_cpus_update(old_prs, new_prs, cs->effective_xcpus); spin_unlock_irq(&callback_lock); - update_unbound_workqueue_cpumask(new_xcpus_state); + update_unbound_workqueue_cpumask(isolcpus_updated); - /* Force update if switching back to member */ + /* Force update if switching back to member & update effective_xcpus */ update_cpumasks_hier(cs, &tmpmask, !new_prs); + /* A newly created partition must have effective_xcpus set */ + WARN_ON_ONCE(!old_prs && (new_prs > 0) + && cpumask_empty(cs->effective_xcpus)); + /* Update sched domains and load balance flag */ update_partition_sd_lb(cs, old_prs); @@ -3208,7 +3274,7 @@ int cpuset_common_seq_show(struct seq_file *sf, void *v) return ret; } -static int sched_partition_show(struct seq_file *seq, void *v) +static int cpuset_partition_show(struct seq_file *seq, void *v) { struct cpuset *cs = css_cs(seq_css(seq)); const char *err, *type = NULL; @@ -3239,7 +3305,7 @@ static int sched_partition_show(struct seq_file *seq, void *v) return 0; } -static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, +static ssize_t cpuset_partition_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cpuset *cs = css_cs(of_css(of)); @@ -3260,11 +3326,8 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, css_get(&cs->css); cpus_read_lock(); mutex_lock(&cpuset_mutex); - if (!is_cpuset_online(cs)) - goto out_unlock; - - retval = update_prstate(cs, val); -out_unlock: + if (is_cpuset_online(cs)) + retval = update_prstate(cs, val); mutex_unlock(&cpuset_mutex); cpus_read_unlock(); css_put(&cs->css); @@ -3308,8 +3371,8 @@ static struct cftype dfl_files[] = { { .name = "cpus.partition", - .seq_show = sched_partition_show, - .write = sched_partition_write, + .seq_show = cpuset_partition_show, + .write = cpuset_partition_write, .private = FILE_PARTITION_ROOT, .flags = CFTYPE_NOT_ON_ROOT, .file_offset = offsetof(struct cpuset, partition_file), @@ -3475,9 +3538,6 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpus_read_lock(); mutex_lock(&cpuset_mutex); - if (is_partition_valid(cs)) - update_prstate(cs, 0); - if (!cpuset_v2() && is_sched_load_balance(cs)) cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); @@ -3488,6 +3548,22 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpus_read_unlock(); } +static void cpuset_css_killed(struct cgroup_subsys_state *css) +{ + struct cpuset *cs = css_cs(css); + + cpus_read_lock(); + mutex_lock(&cpuset_mutex); + + /* Reset valid partition back to member */ + if (is_partition_valid(cs)) + update_prstate(cs, PRS_MEMBER); + + mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); + +} + static void cpuset_css_free(struct cgroup_subsys_state *css) { struct cpuset *cs = css_cs(css); @@ -3609,6 +3685,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, .css_offline = cpuset_css_offline, + .css_killed = cpuset_css_killed, .css_free = cpuset_css_free, .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, @@ -3739,10 +3816,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) if (remote && cpumask_empty(&new_cpus) && partition_is_populated(cs, NULL)) { + cs->prs_err = PERR_HOTPLUG; remote_partition_disable(cs, tmp); compute_effective_cpumask(&new_cpus, cs, parent); remote = false; - cpuset_force_rebuild(); } /* diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 4bb587d5d34f97..b2239156b7def9 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -318,10 +318,11 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp) might_sleep(); for_each_possible_cpu(cpu) { - struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu); + struct cgroup *pos; /* Reacquire for each CPU to avoid disabling IRQs too long */ __cgroup_rstat_lock(cgrp, cpu); + pos = cgroup_rstat_updated_list(cgrp, cpu); for (; pos; pos = pos->rstat_flush_next) { struct cgroup_subsys_state *css; diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 3b2bdca9f1d4b0..77c8d9487a9ab1 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -336,16 +336,22 @@ static phys_addr_t dma_reserved_default_memory_size __initdata; static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { - if (!rmem->priv) { - struct dma_coherent_mem *mem; + struct dma_coherent_mem *mem = rmem->priv; + if (!mem) { mem = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size, true); if (IS_ERR(mem)) return PTR_ERR(mem); rmem->priv = mem; } - dma_assign_coherent_memory(dev, rmem->priv); + + /* Warn if the device potentially can't use the reserved memory */ + if (mem->device_base + rmem->size - 1 > + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit)) + dev_warn(dev, "reserved memory is beyond device's set DMA address range\n"); + + dma_assign_coherent_memory(dev, mem); return 0; } diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 055da410ac71d6..8df0dfaaca18ee 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -64,8 +64,7 @@ struct cma *dma_contiguous_default_area; * Users, who want to set the size of global CMA area for their system * should use cma= kernel parameter. */ -static const phys_addr_t size_bytes __initconst = - (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M; +#define size_bytes ((phys_addr_t)CMA_SIZE_MBYTES * SZ_1M) static phys_addr_t size_cmdline __initdata = -1; static phys_addr_t base_cmdline __initdata; static phys_addr_t limit_cmdline __initdata; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index cda127027e48a7..051a32988040ff 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -910,6 +910,19 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); +static bool __dma_addressing_limited(struct device *dev) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < + dma_get_required_mask(dev)) + return true; + + if (unlikely(ops) || use_dma_iommu(dev)) + return false; + return !dma_direct_all_ram_mapped(dev); +} + /** * dma_addressing_limited - return if the device is addressing limited * @dev: device to check @@ -920,15 +933,11 @@ EXPORT_SYMBOL(dma_set_coherent_mask); */ bool dma_addressing_limited(struct device *dev) { - const struct dma_map_ops *ops = get_dma_ops(dev); - - if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) < - dma_get_required_mask(dev)) - return true; - - if (unlikely(ops) || use_dma_iommu(dev)) + if (!__dma_addressing_limited(dev)) return false; - return !dma_direct_all_ram_mapped(dev); + + dev_dbg(dev, "device is DMA addressing limited\n"); + return true; } EXPORT_SYMBOL_GPL(dma_addressing_limited); diff --git a/kernel/events/core.c b/kernel/events/core.c index 128db74e9eab8c..e97bc9220fd1a8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3943,7 +3943,7 @@ static int merge_sched_in(struct perf_event *event, void *data) perf_event_set_state(event, PERF_EVENT_STATE_ERROR); if (*perf_event_fasync(event)) - event->pending_kill = POLL_HUP; + event->pending_kill = POLL_ERR; perf_event_wakeup(event); } else { @@ -5518,30 +5518,6 @@ static bool exclusive_event_installable(struct perf_event *event, static void perf_free_addr_filters(struct perf_event *event); -static void perf_pending_task_sync(struct perf_event *event) -{ - struct callback_head *head = &event->pending_task; - - if (!event->pending_work) - return; - /* - * If the task is queued to the current task's queue, we - * obviously can't wait for it to complete. Simply cancel it. - */ - if (task_work_cancel(current, head)) { - event->pending_work = 0; - local_dec(&event->ctx->nr_no_switch_fast); - return; - } - - /* - * All accesses related to the event are within the same RCU section in - * perf_pending_task(). The RCU grace period before the event is freed - * will make sure all those accesses are complete by then. - */ - rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE); -} - /* vs perf_event_alloc() error */ static void __free_event(struct perf_event *event) { @@ -5599,7 +5575,6 @@ static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending_irq); irq_work_sync(&event->pending_disable_irq); - perf_pending_task_sync(event); unaccount_event(event); @@ -5692,10 +5667,17 @@ static void perf_remove_from_owner(struct perf_event *event) static void put_event(struct perf_event *event) { + struct perf_event *parent; + if (!atomic_long_dec_and_test(&event->refcount)) return; + parent = event->parent; _free_event(event); + + /* Matches the refcount bump in inherit_event() */ + if (parent) + put_event(parent); } /* @@ -5779,11 +5761,6 @@ int perf_event_release_kernel(struct perf_event *event) if (tmp == child) { perf_remove_from_context(child, DETACH_GROUP); list_move(&child->child_list, &free_list); - /* - * This matches the refcount bump in inherit_event(); - * this can't be the last reference. - */ - put_event(event); } else { var = &ctx->refcount; } @@ -5809,7 +5786,8 @@ int perf_event_release_kernel(struct perf_event *event) void *var = &child->ctx->refcount; list_del(&child->child_list); - free_event(child); + /* Last reference unless ->pending_task work is pending */ + put_event(child); /* * Wake any perf_event_free_task() waiting for this event to be @@ -5820,7 +5798,11 @@ int perf_event_release_kernel(struct perf_event *event) } no_ctx: - put_event(event); /* Must be the 'last' reference */ + /* + * Last reference unless ->pending_task work is pending on this event + * or any of its children. + */ + put_event(event); return 0; } EXPORT_SYMBOL_GPL(perf_event_release_kernel); @@ -6093,7 +6075,7 @@ static __poll_t perf_poll(struct file *file, poll_table *wait) if (unlikely(READ_ONCE(event->state) == PERF_EVENT_STATE_ERROR && event->attr.pinned)) - return events; + return EPOLLERR; /* * Pin the event->rb by taking event->mmap_mutex; otherwise @@ -6257,6 +6239,9 @@ static int perf_event_set_output(struct perf_event *event, static int perf_event_set_filter(struct perf_event *event, void __user *arg); static int perf_copy_attr(struct perf_event_attr __user *uattr, struct perf_event_attr *attr); +static int __perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie); static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) { @@ -6319,7 +6304,7 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon if (IS_ERR(prog)) return PTR_ERR(prog); - err = perf_event_set_bpf_prog(event, prog, 0); + err = __perf_event_set_bpf_prog(event, prog, 0); if (err) { bpf_prog_put(prog); return err; @@ -7235,12 +7220,6 @@ static void perf_pending_task(struct callback_head *head) struct perf_event *event = container_of(head, struct perf_event, pending_task); int rctx; - /* - * All accesses to the event must belong to the same implicit RCU read-side - * critical section as the ->pending_work reset. See comment in - * perf_pending_task_sync(). - */ - rcu_read_lock(); /* * If we 'fail' here, that's OK, it means recursion is already disabled * and we won't recurse 'further'. @@ -7251,9 +7230,8 @@ static void perf_pending_task(struct callback_head *head) event->pending_work = 0; perf_sigtrap(event); local_dec(&event->ctx->nr_no_switch_fast); - rcuwait_wake_up(&event->pending_work_wait); } - rcu_read_unlock(); + put_event(event); if (rctx >= 0) perf_swevent_put_recursion_context(rctx); @@ -10054,14 +10032,14 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle) hwc->interrupts = 1; } else { hwc->interrupts++; - if (unlikely(throttle && - hwc->interrupts > max_samples_per_tick)) { - __this_cpu_inc(perf_throttled_count); - tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); - hwc->interrupts = MAX_INTERRUPTS; - perf_log_throttle(event, 0); - ret = 1; - } + } + + if (unlikely(throttle && hwc->interrupts >= max_samples_per_tick)) { + __this_cpu_inc(perf_throttled_count); + tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); + hwc->interrupts = MAX_INTERRUPTS; + perf_log_throttle(event, 0); + ret = 1; } if (event->attr.freq) { @@ -10248,6 +10226,7 @@ static int __perf_event_overflow(struct perf_event *event, !task_work_add(current, &event->pending_task, notify_mode)) { event->pending_work = pending_id; local_inc(&event->ctx->nr_no_switch_fast); + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); event->pending_addr = 0; if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) @@ -11093,8 +11072,9 @@ static inline bool perf_event_is_tracing(struct perf_event *event) return false; } -int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, - u64 bpf_cookie) +static int __perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) { bool is_kprobe, is_uprobe, is_tracepoint, is_syscall_tp; @@ -11132,6 +11112,20 @@ int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, return perf_event_attach_bpf_prog(event, prog, bpf_cookie); } +int perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) +{ + struct perf_event_context *ctx; + int ret; + + ctx = perf_event_ctx_lock(event); + ret = __perf_event_set_bpf_prog(event, prog, bpf_cookie); + perf_event_ctx_unlock(event, ctx); + + return ret; +} + void perf_event_free_bpf_prog(struct perf_event *event) { if (!event->prog) @@ -11154,7 +11148,15 @@ static void perf_event_free_filter(struct perf_event *event) { } -int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, +static int __perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, + u64 bpf_cookie) +{ + return -ENOENT; +} + +int perf_event_set_bpf_prog(struct perf_event *event, + struct bpf_prog *prog, u64 bpf_cookie) { return -ENOENT; @@ -12610,7 +12612,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_irq_work(&event->pending_irq, perf_pending_irq); event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable); init_task_work(&event->pending_task, perf_pending_task); - rcuwait_init(&event->pending_work_wait); mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock); @@ -13747,8 +13748,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) * Kick perf_poll() for is_event_hup(); */ perf_event_wakeup(parent_event); - free_event(event); - put_event(parent_event); + put_event(event); return; } @@ -13872,13 +13872,11 @@ static void perf_free_event(struct perf_event *event, list_del_init(&event->child_list); mutex_unlock(&parent->child_mutex); - put_event(parent); - raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); - free_event(event); + put_event(event); } /* @@ -14016,6 +14014,9 @@ inherit_event(struct perf_event *parent_event, if (IS_ERR(child_event)) return child_event; + get_ctx(child_ctx); + child_event->ctx = child_ctx; + pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); if (IS_ERR(pmu_ctx)) { free_event(child_event); @@ -14037,8 +14038,6 @@ inherit_event(struct perf_event *parent_event, return NULL; } - get_ctx(child_ctx); - /* * Make the child state follow the state of the parent event, * not its attr.disabled bit. We hold the parent's mutex, @@ -14059,7 +14058,6 @@ inherit_event(struct perf_event *parent_event, local64_set(&hwc->period_left, sample_period); } - child_event->ctx = child_ctx; child_event->overflow_handler = parent_event->overflow_handler; child_event->overflow_handler_context = parent_event->overflow_handler_context; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 615b4e6d22c7b1..8d783b5882b6a3 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1956,6 +1956,9 @@ static void free_ret_instance(struct uprobe_task *utask, * to-be-reused return instances for future uretprobes. If ri_timer() * happens to be running right now, though, we fallback to safety and * just perform RCU-delated freeing of ri. + * Admittedly, this is a rather simple use of seqcount, but it nicely + * abstracts away all the necessary memory barriers, so we use + * a well-supported kernel primitive here. */ if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) { /* immediate reuse of ri without RCU GP is OK */ @@ -2016,12 +2019,20 @@ static void ri_timer(struct timer_list *timer) /* RCU protects return_instance from freeing. */ guard(rcu)(); - write_seqcount_begin(&utask->ri_seqcount); + /* + * See free_ret_instance() for notes on seqcount use. + * We also employ raw API variants to avoid lockdep false-positive + * warning complaining about enabled preemption. The timer can only be + * invoked once for a uprobe_task. Therefore there can only be one + * writer. The reader does not require an even sequence count to make + * progress, so it is OK to remain preemptible on PREEMPT_RT. + */ + raw_write_seqcount_begin(&utask->ri_seqcount); for_each_ret_instance_rcu(ri, utask->return_instances) hprobe_expire(&ri->hprobe, false); - write_seqcount_end(&utask->ri_seqcount); + raw_write_seqcount_end(&utask->ri_seqcount); } static struct uprobe_task *alloc_utask(void) diff --git a/kernel/fork.c b/kernel/fork.c index c4b26cd8998b8e..39e47bfaea5894 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,10 @@ #include #include +#ifdef CONFIG_USER_NS +#include +#endif + #include #include #include @@ -498,10 +502,6 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) vma_numab_state_init(new); dup_anon_vma_name(orig, new); - /* track_pfn_copy() will later take care of copying internal state. */ - if (unlikely(new->vm_flags & VM_PFNMAP)) - untrack_pfn_clear(new); - return new; } @@ -672,6 +672,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, tmp = vm_area_dup(mpnt); if (!tmp) goto fail_nomem; + + /* track_pfn_copy() will later take care of copying internal state. */ + if (unlikely(tmp->vm_flags & VM_PFNMAP)) + untrack_pfn_clear(tmp); + retval = vma_dup_policy(mpnt, tmp); if (retval) goto fail_nomem_policy; @@ -2193,6 +2198,10 @@ __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -3353,6 +3362,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 5c8d43cdb0a318..c05ba7ca00faad 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -761,7 +761,7 @@ static int msi_domain_translate(struct irq_domain *domain, struct irq_fwspec *fw static void msi_domain_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - struct msi_desc *desc = irq_data_get_msi_desc(irqd); + struct msi_desc *desc = irqd ? irq_data_get_msi_desc(irqd) : NULL; if (!desc) return; diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2ddb827e3bea03..464049c4af3f4c 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -747,6 +747,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) struct task_struct *new, *owner; unsigned long flags, new_flags; enum owner_state state; + int i = 0; lockdep_assert_preemption_disabled(); @@ -783,7 +784,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) break; } - cpu_relax(); + if (i++ > 1000) + cpu_relax(); } return state; diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index d7762ef5949a2c..39278737bb68fd 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -192,6 +192,11 @@ config GENDWARFKSYMS depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT # Requires ELF object files. depends on !LTO + # To avoid conflicts with the discarded __gendwarfksyms_ptr symbols on + # X86, requires pahole before commit 47dcb534e253 ("btf_encoder: Stop + # indexing symbols for VARs") or after commit 9810758003ce ("btf_encoder: + # Verify 0 address DWARF variables are in ELF section"). + depends on !X86 || !DEBUG_INFO_BTF || PAHOLE_VERSION < 128 || PAHOLE_VERSION > 129 help Calculate symbol versions from DWARF debugging information using gendwarfksyms. Requires DEBUG_INFO to be enabled. diff --git a/kernel/module/main.c b/kernel/module/main.c index a2859dc3eea66e..5c6ab20240a6d6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2829,6 +2829,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); + codetag_free_module_sections(mod); free_mod_mem(mod); } diff --git a/kernel/padata.c b/kernel/padata.c index b3d4eacc4f5d8c..7eee94166357a0 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -358,7 +358,8 @@ static void padata_reorder(struct parallel_data *pd) * To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish. */ padata_get_pd(pd); - queue_work(pinst->serial_wq, &pd->reorder_work); + if (!queue_work(pinst->serial_wq, &pd->reorder_work)) + padata_put_pd(pd); } } diff --git a/kernel/params.c b/kernel/params.c index 2509f216c9f3cf..b92d64161b758d 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -760,38 +760,35 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static struct module_kobject * __init locate_module_kobject(const char *name) +struct module_kobject __modinit * lookup_or_create_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; int err; kobj = kset_find_obj(module_kset, name); - if (kobj) { - mk = to_module_kobject(kobj); - } else { - mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); - BUG_ON(!mk); - - mk->mod = THIS_MODULE; - mk->kobj.kset = module_kset; - err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, - "%s", name); -#ifdef CONFIG_MODULES - if (!err) - err = sysfs_create_file(&mk->kobj, &module_uevent.attr); -#endif - if (err) { - kobject_put(&mk->kobj); - pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", - name, err); - return NULL; - } + if (kobj) + return to_module_kobject(kobj); - /* So that we hold reference in both cases. */ - kobject_get(&mk->kobj); + mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); + if (!mk) + return NULL; + + mk->mod = THIS_MODULE; + mk->kobj.kset = module_kset; + err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name); + if (IS_ENABLED(CONFIG_MODULES) && !err) + err = sysfs_create_file(&mk->kobj, &module_uevent.attr); + if (err) { + kobject_put(&mk->kobj); + pr_crit("Adding module '%s' to sysfs failed (%d), the system may be unstable.\n", + name, err); + return NULL; } + /* So that we hold reference in both cases. */ + kobject_get(&mk->kobj); + return mk; } @@ -802,7 +799,7 @@ static void __init kernel_add_sysfs_param(const char *name, struct module_kobject *mk; int err; - mk = locate_module_kobject(name); + mk = lookup_or_create_module_kobject(name); if (!mk) return; @@ -873,7 +870,7 @@ static void __init version_sysfs_builtin(void) int err; for (vattr = __start___modver; vattr < __stop___modver; vattr++) { - mk = locate_module_kobject(vattr->module_name); + mk = lookup_or_create_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); WARN_ON_ONCE(err); @@ -946,7 +943,9 @@ struct kset *module_kset; static void module_kobj_release(struct kobject *kobj) { struct module_kobject *mk = to_module_kobject(kobj); - complete(mk->kobj_completion); + + if (mk->kobj_completion) + complete(mk->kobj_completion); } const struct kobj_type module_ktype = { diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index d9b7e2b38c7a9f..41606247c27763 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -233,6 +233,10 @@ static int em_compute_costs(struct device *dev, struct em_perf_state *table, unsigned long prev_cost = ULONG_MAX; int i, ret; + /* This is needed only for CPUs and EAS skip other devices */ + if (!_is_cpu_device(dev)) + return 0; + /* Compute the cost of each performance state. */ for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res, cost; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 23c0f4e6cb2ffe..5af9c7ee98cd4a 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -90,6 +90,11 @@ void hibernate_release(void) atomic_inc(&hibernate_atomic); } +bool hibernation_in_progress(void) +{ + return !atomic_read(&hibernate_atomic); +} + bool hibernation_available(void) { return nohibernate == 0 && diff --git a/kernel/power/main.c b/kernel/power/main.c index 6254814d481714..0622e7dacf1720 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -613,7 +613,8 @@ bool pm_debug_messages_on __read_mostly; bool pm_debug_messages_should_print(void) { - return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON; + return pm_debug_messages_on && (hibernation_in_progress() || + pm_suspend_target_state != PM_SUSPEND_ON); } EXPORT_SYMBOL_GPL(pm_debug_messages_should_print); diff --git a/kernel/power/power.h b/kernel/power/power.h index c352dea2f67b56..f8496f40b54fa5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -71,10 +71,14 @@ extern void enable_restore_image_protection(void); static inline void enable_restore_image_protection(void) {} #endif /* CONFIG_STRICT_KERNEL_RWX */ +extern bool hibernation_in_progress(void); + #else /* !CONFIG_HIBERNATION */ static inline void hibernate_reserved_size_init(void) {} static inline void hibernate_image_size_init(void) {} + +static inline bool hibernation_in_progress(void) { return false; } #endif /* !CONFIG_HIBERNATION */ #define power_attr(_name) \ diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 52571dcad768b9..4e941999a53ba6 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -49,6 +49,9 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) len += sysfs_emit_at(buf, len, "%s ", wl->name); } + if (len > 0) + --len; + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 659f83e7104869..80b10893b5038d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -801,6 +801,10 @@ static int rcu_watching_snap_save(struct rcu_data *rdp) return 0; } +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif + /* * Returns positive if the specified CPU has passed through a quiescent state * by virtue of being in or having passed through an dynticks idle state since @@ -936,9 +940,9 @@ static int rcu_watching_snap_recheck(struct rcu_data *rdp) rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu); rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu); rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); - rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu); - rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu); - rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu); + rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu); + rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu); + rsrp->nr_csw = nr_context_switches_cpu(cpu); rsrp->jiffies = jiffies; rsrp->gp_seq = rdp->gp_seq; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a9a811d9d7a372..1bba2225e7448b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -168,7 +168,7 @@ struct rcu_snap_record { u64 cputime_irq; /* Accumulated cputime of hard irqs */ u64 cputime_softirq;/* Accumulated cputime of soft irqs */ u64 cputime_system; /* Accumulated cputime of kernel tasks */ - unsigned long nr_hardirqs; /* Accumulated number of hard irqs */ + u64 nr_hardirqs; /* Accumulated number of hard irqs */ unsigned int nr_softirqs; /* Accumulated number of soft irqs */ unsigned long long nr_csw; /* Accumulated number of task switches */ unsigned long jiffies; /* Track jiffies value */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 925fcdad5dea22..56b21219442b65 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -435,8 +435,8 @@ static void print_cpu_stat_info(int cpu) rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); pr_err("\t hardirqs softirqs csw/system\n"); - pr_err("\t number: %8ld %10d %12lld\n", - kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs, + pr_err("\t number: %8lld %10d %12lld\n", + kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs, kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, nr_context_switches_cpu(cpu) - rsrp->nr_csw); pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n", diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c81cf642dba055..d593d6612ba07e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2283,6 +2283,12 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state * just go back and repeat. */ rq = task_rq_lock(p, &rf); + /* + * If task is sched_delayed, force dequeue it, to avoid always + * hitting the tick timeout in the queued case + */ + if (p->se.sched_delayed) + dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED); trace_sched_wait_task(p); running = task_on_cpu(rq, p); queued = task_on_rq_queued(p); @@ -6571,12 +6577,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * Otherwise marks the task's __state as RUNNING */ static bool try_to_block_task(struct rq *rq, struct task_struct *p, - unsigned long task_state) + unsigned long *task_state_p) { + unsigned long task_state = *task_state_p; int flags = DEQUEUE_NOCLOCK; if (signal_pending_state(task_state, p)) { WRITE_ONCE(p->__state, TASK_RUNNING); + *task_state_p = TASK_RUNNING; return false; } @@ -6713,7 +6721,7 @@ static void __sched notrace __schedule(int sched_mode) goto picked; } } else if (!preempt && prev_state) { - try_to_block_task(rq, prev, prev_state); + try_to_block_task(rq, prev, &prev_state); switch_count = &prev->nvcsw; } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 1a19d69b91ed3c..816f07f9d30f1a 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -81,9 +81,23 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) if (!cpufreq_this_cpu_can_update(sg_policy->policy)) return false; - if (unlikely(sg_policy->limits_changed)) { - sg_policy->limits_changed = false; - sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + if (unlikely(READ_ONCE(sg_policy->limits_changed))) { + WRITE_ONCE(sg_policy->limits_changed, false); + sg_policy->need_freq_update = true; + + /* + * The above limits_changed update must occur before the reads + * of policy limits in cpufreq_driver_resolve_freq() or a policy + * limits update might be missed, so use a memory barrier to + * ensure it. + * + * This pairs with the write memory barrier in sugov_limits(). + */ + smp_mb(); + + return true; + } else if (sg_policy->need_freq_update) { + /* ignore_dl_rate_limit() wants a new frequency to be found. */ return true; } @@ -95,10 +109,22 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { - if (sg_policy->need_freq_update) + if (sg_policy->need_freq_update) { sg_policy->need_freq_update = false; - else if (sg_policy->next_freq == next_freq) + /* + * The policy limits have changed, but if the return value of + * cpufreq_driver_resolve_freq() after applying the new limits + * is still equal to the previously selected frequency, the + * driver callback need not be invoked unless the driver + * specifically wants that to happen on every update of the + * policy limits. + */ + if (sg_policy->next_freq == next_freq && + !cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS)) + return false; + } else if (sg_policy->next_freq == next_freq) { return false; + } sg_policy->next_freq = next_freq; sg_policy->last_freq_update_time = time; @@ -365,7 +391,7 @@ static inline bool sugov_hold_freq(struct sugov_cpu *sg_cpu) { return false; } static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) { if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_min) - sg_cpu->sg_policy->limits_changed = true; + sg_cpu->sg_policy->need_freq_update = true; } static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, @@ -871,7 +897,16 @@ static void sugov_limits(struct cpufreq_policy *policy) mutex_unlock(&sg_policy->work_lock); } - sg_policy->limits_changed = true; + /* + * The limits_changed update below must take place before the updates + * of policy limits in cpufreq_set_policy() or a policy limits update + * might be missed, so use a memory barrier to ensure it. + * + * This pairs with the memory barrier in sugov_should_update_freq(). + */ + smp_wmb(); + + WRITE_ONCE(sg_policy->limits_changed, true); } struct cpufreq_governor schedutil_gov = { diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 66bcd40a28ca1d..f5133249fd4d92 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -163,7 +163,7 @@ enum scx_ops_flags { /* * CPU cgroup support flags */ - SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* cpu.weight */ + SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */ SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | SCX_OPS_ENQ_LAST | @@ -1118,8 +1118,38 @@ static void scx_kf_disallow(u32 mask) current->scx.kf_mask &= ~mask; } -#define SCX_CALL_OP(mask, op, args...) \ +/* + * Track the rq currently locked. + * + * This allows kfuncs to safely operate on rq from any scx ops callback, + * knowing which rq is already locked. + */ +static DEFINE_PER_CPU(struct rq *, locked_rq); + +static inline void update_locked_rq(struct rq *rq) +{ + /* + * Check whether @rq is actually locked. This can help expose bugs + * or incorrect assumptions about the context in which a kfunc or + * callback is executed. + */ + if (rq) + lockdep_assert_rq_held(rq); + __this_cpu_write(locked_rq, rq); +} + +/* + * Return the rq currently locked from an scx callback, or NULL if no rq is + * locked. + */ +static inline struct rq *scx_locked_rq(void) +{ + return __this_cpu_read(locked_rq); +} + +#define SCX_CALL_OP(mask, op, rq, args...) \ do { \ + update_locked_rq(rq); \ if (mask) { \ scx_kf_allow(mask); \ scx_ops.op(args); \ @@ -1127,11 +1157,14 @@ do { \ } else { \ scx_ops.op(args); \ } \ + update_locked_rq(NULL); \ } while (0) -#define SCX_CALL_OP_RET(mask, op, args...) \ +#define SCX_CALL_OP_RET(mask, op, rq, args...) \ ({ \ __typeof__(scx_ops.op(args)) __ret; \ + \ + update_locked_rq(rq); \ if (mask) { \ scx_kf_allow(mask); \ __ret = scx_ops.op(args); \ @@ -1139,6 +1172,7 @@ do { \ } else { \ __ret = scx_ops.op(args); \ } \ + update_locked_rq(NULL); \ __ret; \ }) @@ -1153,31 +1187,31 @@ do { \ * scx_kf_allowed_on_arg_tasks() to test whether the invocation is allowed on * the specific task. */ -#define SCX_CALL_OP_TASK(mask, op, task, args...) \ +#define SCX_CALL_OP_TASK(mask, op, rq, task, args...) \ do { \ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ current->scx.kf_tasks[0] = task; \ - SCX_CALL_OP(mask, op, task, ##args); \ + SCX_CALL_OP(mask, op, rq, task, ##args); \ current->scx.kf_tasks[0] = NULL; \ } while (0) -#define SCX_CALL_OP_TASK_RET(mask, op, task, args...) \ +#define SCX_CALL_OP_TASK_RET(mask, op, rq, task, args...) \ ({ \ __typeof__(scx_ops.op(task, ##args)) __ret; \ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ current->scx.kf_tasks[0] = task; \ - __ret = SCX_CALL_OP_RET(mask, op, task, ##args); \ + __ret = SCX_CALL_OP_RET(mask, op, rq, task, ##args); \ current->scx.kf_tasks[0] = NULL; \ __ret; \ }) -#define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...) \ +#define SCX_CALL_OP_2TASKS_RET(mask, op, rq, task0, task1, args...) \ ({ \ __typeof__(scx_ops.op(task0, task1, ##args)) __ret; \ BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL); \ current->scx.kf_tasks[0] = task0; \ current->scx.kf_tasks[1] = task1; \ - __ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args); \ + __ret = SCX_CALL_OP_RET(mask, op, rq, task0, task1, ##args); \ current->scx.kf_tasks[0] = NULL; \ current->scx.kf_tasks[1] = NULL; \ __ret; \ @@ -2172,7 +2206,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, WARN_ON_ONCE(*ddsp_taskp); *ddsp_taskp = p; - SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags); + SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, rq, p, enq_flags); *ddsp_taskp = NULL; if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) @@ -2269,7 +2303,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags add_nr_running(rq, 1); if (SCX_HAS_OP(runnable) && !task_on_rq_migrating(p)) - SCX_CALL_OP_TASK(SCX_KF_REST, runnable, p, enq_flags); + SCX_CALL_OP_TASK(SCX_KF_REST, runnable, rq, p, enq_flags); if (enq_flags & SCX_ENQ_WAKEUP) touch_core_sched(rq, p); @@ -2283,7 +2317,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags __scx_add_event(SCX_EV_SELECT_CPU_FALLBACK, 1); } -static void ops_dequeue(struct task_struct *p, u64 deq_flags) +static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags) { unsigned long opss; @@ -2304,7 +2338,7 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags) BUG(); case SCX_OPSS_QUEUED: if (SCX_HAS_OP(dequeue)) - SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, p, deq_flags); + SCX_CALL_OP_TASK(SCX_KF_REST, dequeue, rq, p, deq_flags); if (atomic_long_try_cmpxchg(&p->scx.ops_state, &opss, SCX_OPSS_NONE)) @@ -2337,7 +2371,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags return true; } - ops_dequeue(p, deq_flags); + ops_dequeue(rq, p, deq_flags); /* * A currently running task which is going off @rq first gets dequeued @@ -2353,11 +2387,11 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags */ if (SCX_HAS_OP(stopping) && task_current(rq, p)) { update_curr_scx(rq); - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, false); + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, false); } if (SCX_HAS_OP(quiescent) && !task_on_rq_migrating(p)) - SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, p, deq_flags); + SCX_CALL_OP_TASK(SCX_KF_REST, quiescent, rq, p, deq_flags); if (deq_flags & SCX_DEQ_SLEEP) p->scx.flags |= SCX_TASK_DEQD_FOR_SLEEP; @@ -2377,7 +2411,7 @@ static void yield_task_scx(struct rq *rq) struct task_struct *p = rq->curr; if (SCX_HAS_OP(yield)) - SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, p, NULL); + SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, p, NULL); else p->scx.slice = 0; } @@ -2387,7 +2421,7 @@ static bool yield_to_task_scx(struct rq *rq, struct task_struct *to) struct task_struct *from = rq->curr; if (SCX_HAS_OP(yield)) - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, from, to); + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, yield, rq, from, to); else return false; } @@ -2945,7 +2979,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) * emitted in switch_class(). */ if (SCX_HAS_OP(cpu_acquire)) - SCX_CALL_OP(SCX_KF_REST, cpu_acquire, cpu_of(rq), NULL); + SCX_CALL_OP(SCX_KF_REST, cpu_acquire, rq, cpu_of(rq), NULL); rq->scx.cpu_released = false; } @@ -2990,7 +3024,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev) do { dspc->nr_tasks = 0; - SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, cpu_of(rq), + SCX_CALL_OP(SCX_KF_DISPATCH, dispatch, rq, cpu_of(rq), prev_on_scx ? prev : NULL); flush_dispatch_buf(rq); @@ -3104,7 +3138,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first) * Core-sched might decide to execute @p before it is * dispatched. Call ops_dequeue() to notify the BPF scheduler. */ - ops_dequeue(p, SCX_DEQ_CORE_SCHED_EXEC); + ops_dequeue(rq, p, SCX_DEQ_CORE_SCHED_EXEC); dispatch_dequeue(rq, p); } @@ -3112,7 +3146,7 @@ static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first) /* see dequeue_task_scx() on why we skip when !QUEUED */ if (SCX_HAS_OP(running) && (p->scx.flags & SCX_TASK_QUEUED)) - SCX_CALL_OP_TASK(SCX_KF_REST, running, p); + SCX_CALL_OP_TASK(SCX_KF_REST, running, rq, p); clr_task_runnable(p, true); @@ -3193,8 +3227,7 @@ static void switch_class(struct rq *rq, struct task_struct *next) .task = next, }; - SCX_CALL_OP(SCX_KF_CPU_RELEASE, - cpu_release, cpu_of(rq), &args); + SCX_CALL_OP(SCX_KF_CPU_RELEASE, cpu_release, rq, cpu_of(rq), &args); } rq->scx.cpu_released = true; } @@ -3207,7 +3240,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p, /* see dequeue_task_scx() on why we skip when !QUEUED */ if (SCX_HAS_OP(stopping) && (p->scx.flags & SCX_TASK_QUEUED)) - SCX_CALL_OP_TASK(SCX_KF_REST, stopping, p, true); + SCX_CALL_OP_TASK(SCX_KF_REST, stopping, rq, p, true); if (p->scx.flags & SCX_TASK_QUEUED) { set_task_runnable(rq, p); @@ -3348,7 +3381,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, * verifier. */ if (SCX_HAS_OP(core_sched_before) && !scx_rq_bypassing(task_rq(a))) - return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, + return SCX_CALL_OP_2TASKS_RET(SCX_KF_REST, core_sched_before, NULL, (struct task_struct *)a, (struct task_struct *)b); else @@ -3385,7 +3418,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag *ddsp_taskp = p; cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU, - select_cpu, p, prev_cpu, wake_flags); + select_cpu, NULL, p, prev_cpu, wake_flags); p->scx.selected_cpu = cpu; *ddsp_taskp = NULL; if (ops_cpu_valid(cpu, "from ops.select_cpu()")) @@ -3430,8 +3463,8 @@ static void set_cpus_allowed_scx(struct task_struct *p, * designation pointless. Cast it away when calling the operation. */ if (SCX_HAS_OP(set_cpumask)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, - (struct cpumask *)p->cpus_ptr); + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, NULL, + p, (struct cpumask *)p->cpus_ptr); } static void handle_hotplug(struct rq *rq, bool online) @@ -3444,9 +3477,9 @@ static void handle_hotplug(struct rq *rq, bool online) scx_idle_update_selcpu_topology(&scx_ops); if (online && SCX_HAS_OP(cpu_online)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_online, NULL, cpu); else if (!online && SCX_HAS_OP(cpu_offline)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, cpu); + SCX_CALL_OP(SCX_KF_UNLOCKED, cpu_offline, NULL, cpu); else scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, "cpu %d going %s, exiting scheduler", cpu, @@ -3550,7 +3583,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued) curr->scx.slice = 0; touch_core_sched(rq, curr); } else if (SCX_HAS_OP(tick)) { - SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr); + SCX_CALL_OP_TASK(SCX_KF_REST, tick, rq, curr); } if (!curr->scx.slice) @@ -3627,7 +3660,7 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool .fork = fork, }; - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, p, &args); + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init_task, NULL, p, &args); if (unlikely(ret)) { ret = ops_sanitize_err("init_task", ret); return ret; @@ -3668,9 +3701,10 @@ static int scx_ops_init_task(struct task_struct *p, struct task_group *tg, bool static void scx_ops_enable_task(struct task_struct *p) { + struct rq *rq = task_rq(p); u32 weight; - lockdep_assert_rq_held(task_rq(p)); + lockdep_assert_rq_held(rq); /* * Set the weight before calling ops.enable() so that the scheduler @@ -3684,20 +3718,22 @@ static void scx_ops_enable_task(struct task_struct *p) p->scx.weight = sched_weight_to_cgroup(weight); if (SCX_HAS_OP(enable)) - SCX_CALL_OP_TASK(SCX_KF_REST, enable, p); + SCX_CALL_OP_TASK(SCX_KF_REST, enable, rq, p); scx_set_task_state(p, SCX_TASK_ENABLED); if (SCX_HAS_OP(set_weight)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); } static void scx_ops_disable_task(struct task_struct *p) { - lockdep_assert_rq_held(task_rq(p)); + struct rq *rq = task_rq(p); + + lockdep_assert_rq_held(rq); WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); if (SCX_HAS_OP(disable)) - SCX_CALL_OP_TASK(SCX_KF_REST, disable, p); + SCX_CALL_OP_TASK(SCX_KF_REST, disable, rq, p); scx_set_task_state(p, SCX_TASK_READY); } @@ -3726,7 +3762,7 @@ static void scx_ops_exit_task(struct task_struct *p) } if (SCX_HAS_OP(exit_task)) - SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args); + SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, task_rq(p), p, &args); scx_set_task_state(p, SCX_TASK_NONE); } @@ -3835,7 +3871,7 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p, p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight)); if (SCX_HAS_OP(set_weight)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, p, p->scx.weight); + SCX_CALL_OP_TASK(SCX_KF_REST, set_weight, rq, p, p->scx.weight); } static void prio_changed_scx(struct rq *rq, struct task_struct *p, int oldprio) @@ -3851,8 +3887,8 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p) * different scheduler class. Keep the BPF scheduler up-to-date. */ if (SCX_HAS_OP(set_cpumask)) - SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, p, - (struct cpumask *)p->cpus_ptr); + SCX_CALL_OP_TASK(SCX_KF_REST, set_cpumask, rq, + p, (struct cpumask *)p->cpus_ptr); } static void switched_from_scx(struct rq *rq, struct task_struct *p) @@ -3899,35 +3935,6 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; -static bool cgroup_warned_missing_weight; -static bool cgroup_warned_missing_idle; - -static void scx_cgroup_warn_missing_weight(struct task_group *tg) -{ - if (scx_ops_enable_state() == SCX_OPS_DISABLED || - cgroup_warned_missing_weight) - return; - - if ((scx_ops.flags & SCX_OPS_HAS_CGROUP_WEIGHT) || !tg->css.parent) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.weight\n", - scx_ops.name); - cgroup_warned_missing_weight = true; -} - -static void scx_cgroup_warn_missing_idle(struct task_group *tg) -{ - if (!scx_cgroup_enabled || cgroup_warned_missing_idle) - return; - - if (!tg->idle) - return; - - pr_warn("sched_ext: \"%s\" does not implement cgroup cpu.idle\n", - scx_ops.name); - cgroup_warned_missing_idle = true; -} int scx_tg_online(struct task_group *tg) { @@ -3937,14 +3944,12 @@ int scx_tg_online(struct task_group *tg) percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_weight(tg); - if (scx_cgroup_enabled) { if (SCX_HAS_OP(cgroup_init)) { struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, tg->css.cgroup, &args); if (ret) ret = ops_sanitize_err("cgroup_init", ret); @@ -3966,7 +3971,7 @@ void scx_tg_offline(struct task_group *tg) percpu_down_read(&scx_cgroup_rwsem); if (SCX_HAS_OP(cgroup_exit) && (tg->scx_flags & SCX_TG_INITED)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, tg->css.cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, tg->css.cgroup); tg->scx_flags &= ~(SCX_TG_ONLINE | SCX_TG_INITED); percpu_up_read(&scx_cgroup_rwsem); @@ -3999,7 +4004,7 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset) continue; if (SCX_HAS_OP(cgroup_prep_move)) { - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_prep_move, NULL, p, from, css->cgroup); if (ret) goto err; @@ -4013,8 +4018,8 @@ int scx_cgroup_can_attach(struct cgroup_taskset *tset) err: cgroup_taskset_for_each(p, css, tset) { if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, - p->scx.cgrp_moving_from, css->cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, + p, p->scx.cgrp_moving_from, css->cgroup); p->scx.cgrp_moving_from = NULL; } @@ -4032,8 +4037,8 @@ void scx_cgroup_move_task(struct task_struct *p) * cgrp_moving_from set. */ if (SCX_HAS_OP(cgroup_move) && !WARN_ON_ONCE(!p->scx.cgrp_moving_from)) - SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, p, - p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); + SCX_CALL_OP_TASK(SCX_KF_UNLOCKED, cgroup_move, NULL, + p, p->scx.cgrp_moving_from, tg_cgrp(task_group(p))); p->scx.cgrp_moving_from = NULL; } @@ -4052,8 +4057,8 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) cgroup_taskset_for_each(p, css, tset) { if (SCX_HAS_OP(cgroup_cancel_move) && p->scx.cgrp_moving_from) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, p, - p->scx.cgrp_moving_from, css->cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_cancel_move, NULL, + p, p->scx.cgrp_moving_from, css->cgroup); p->scx.cgrp_moving_from = NULL; } out_unlock: @@ -4066,7 +4071,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) if (scx_cgroup_enabled && tg->scx_weight != weight) { if (SCX_HAS_OP(cgroup_set_weight)) - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_set_weight, NULL, tg_cgrp(tg), weight); tg->scx_weight = weight; } @@ -4076,9 +4081,7 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) void scx_group_set_idle(struct task_group *tg, bool idle) { - percpu_down_read(&scx_cgroup_rwsem); - scx_cgroup_warn_missing_idle(tg); - percpu_up_read(&scx_cgroup_rwsem); + /* TODO: Implement ops->cgroup_set_idle() */ } static void scx_cgroup_lock(void) @@ -4257,7 +4260,7 @@ static void scx_cgroup_exit(void) continue; rcu_read_unlock(); - SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, css->cgroup); + SCX_CALL_OP(SCX_KF_UNLOCKED, cgroup_exit, NULL, css->cgroup); rcu_read_lock(); css_put(css); @@ -4272,9 +4275,6 @@ static int scx_cgroup_init(void) percpu_rwsem_assert_held(&scx_cgroup_rwsem); - cgroup_warned_missing_weight = false; - cgroup_warned_missing_idle = false; - /* * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk * cgroups and init, all online cgroups are initialized. @@ -4284,9 +4284,6 @@ static int scx_cgroup_init(void) struct task_group *tg = css_tg(css); struct scx_cgroup_init_args args = { .weight = tg->scx_weight }; - scx_cgroup_warn_missing_weight(tg); - scx_cgroup_warn_missing_idle(tg); - if ((tg->scx_flags & (SCX_TG_ONLINE | SCX_TG_INITED)) != SCX_TG_ONLINE) continue; @@ -4300,7 +4297,7 @@ static int scx_cgroup_init(void) continue; rcu_read_unlock(); - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, NULL, css->cgroup, &args); if (ret) { css_put(css); @@ -4623,7 +4620,7 @@ static void scx_ops_bypass(bool bypass) static void free_exit_info(struct scx_exit_info *ei) { - kfree(ei->dump); + kvfree(ei->dump); kfree(ei->msg); kfree(ei->bt); kfree(ei); @@ -4639,7 +4636,7 @@ static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len) ei->bt = kcalloc(SCX_EXIT_BT_LEN, sizeof(ei->bt[0]), GFP_KERNEL); ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL); - ei->dump = kzalloc(exit_dump_len, GFP_KERNEL); + ei->dump = kvzalloc(exit_dump_len, GFP_KERNEL); if (!ei->bt || !ei->msg || !ei->dump) { free_exit_info(ei); @@ -4797,7 +4794,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work) } if (scx_ops.exit) - SCX_CALL_OP(SCX_KF_UNLOCKED, exit, ei); + SCX_CALL_OP(SCX_KF_UNLOCKED, exit, NULL, ei); cancel_delayed_work_sync(&scx_watchdog_work); @@ -5004,7 +5001,7 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx, if (SCX_HAS_OP(dump_task)) { ops_dump_init(s, " "); - SCX_CALL_OP(SCX_KF_REST, dump_task, dctx, p); + SCX_CALL_OP(SCX_KF_REST, dump_task, NULL, dctx, p); ops_dump_exit(); } @@ -5051,7 +5048,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) if (SCX_HAS_OP(dump)) { ops_dump_init(&s, ""); - SCX_CALL_OP(SCX_KF_UNLOCKED, dump, &dctx); + SCX_CALL_OP(SCX_KF_UNLOCKED, dump, NULL, &dctx); ops_dump_exit(); } @@ -5108,7 +5105,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) used = seq_buf_used(&ns); if (SCX_HAS_OP(dump_cpu)) { ops_dump_init(&ns, " "); - SCX_CALL_OP(SCX_KF_REST, dump_cpu, &dctx, cpu, idle); + SCX_CALL_OP(SCX_KF_REST, dump_cpu, NULL, &dctx, cpu, idle); ops_dump_exit(); } @@ -5252,6 +5249,9 @@ static int validate_ops(const struct sched_ext_ops *ops) return -EINVAL; } + if (ops->flags & SCX_OPS_HAS_CGROUP_WEIGHT) + pr_warn("SCX_OPS_HAS_CGROUP_WEIGHT is deprecated and a noop\n"); + return 0; } @@ -5364,7 +5364,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) scx_idle_enable(ops); if (scx_ops.init) { - ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init); + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init, NULL); if (ret) { ret = ops_sanitize_err("init", ret); cpus_read_unlock(); @@ -6827,6 +6827,12 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, BUILD_BUG_ON(__alignof__(struct bpf_iter_scx_dsq_kern) != __alignof__(struct bpf_iter_scx_dsq)); + /* + * next() and destroy() will be called regardless of the return value. + * Always clear $kit->dsq. + */ + kit->dsq = NULL; + if (flags & ~__SCX_DSQ_ITER_USER_FLAGS) return -EINVAL; @@ -7113,13 +7119,32 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf) } if (ops_cpu_valid(cpu, NULL)) { - struct rq *rq = cpu_rq(cpu); + struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq(); + struct rq_flags rf; + + /* + * When called with an rq lock held, restrict the operation + * to the corresponding CPU to prevent ABBA deadlocks. + */ + if (locked_rq && rq != locked_rq) { + scx_ops_error("Invalid target CPU %d", cpu); + return; + } + + /* + * If no rq lock is held, allow to operate on any CPU by + * acquiring the corresponding rq lock. + */ + if (!locked_rq) { + rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); + } rq->scx.cpuperf_target = perf; + cpufreq_update_util(rq, 0); - rcu_read_lock_sched_notrace(); - cpufreq_update_util(cpu_rq(cpu), 0); - rcu_read_unlock_sched_notrace(); + if (!locked_rq) + rq_unlock_irqrestore(rq, &rf); } } @@ -7350,12 +7375,6 @@ BTF_ID_FLAGS(func, scx_bpf_nr_cpu_ids) BTF_ID_FLAGS(func, scx_bpf_get_possible_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_get_online_cpumask, KF_ACQUIRE) BTF_ID_FLAGS(func, scx_bpf_put_cpumask, KF_RELEASE) -BTF_ID_FLAGS(func, scx_bpf_get_idle_cpumask, KF_ACQUIRE) -BTF_ID_FLAGS(func, scx_bpf_get_idle_smtmask, KF_ACQUIRE) -BTF_ID_FLAGS(func, scx_bpf_put_idle_cpumask, KF_RELEASE) -BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle) -BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU) -BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_running, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_task_cpu, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_cpu_rq) diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index cb343ca889e025..50b9f3af810d93 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -131,6 +131,7 @@ static s32 pick_idle_cpu_in_node(const struct cpumask *cpus_allowed, int node, u goto retry; } +#ifdef CONFIG_NUMA /* * Tracks nodes that have not yet been visited when searching for an idle * CPU across all available nodes. @@ -179,6 +180,13 @@ static s32 pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, i return cpu; } +#else +static inline s32 +pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u64 flags) +{ + return -EBUSY; +} +#endif /* * Find an idle CPU in the system, starting from @node. @@ -674,7 +682,7 @@ void __scx_update_idle(struct rq *rq, bool idle, bool do_notify) * managed by put_prev_task_idle()/set_next_task_idle(). */ if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq)) - SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle); + SCX_CALL_OP(SCX_KF_REST, update_idle, rq, cpu_of(rq), idle); /* * Update the idle masks: diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e43993a4e5807e..1e8c138c797f22 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -76,10 +76,19 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; * * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds) */ +#ifdef CONFIG_CACHY +unsigned int sysctl_sched_base_slice = 350000ULL; +static unsigned int normalized_sysctl_sched_base_slice = 350000ULL; +#else unsigned int sysctl_sched_base_slice = 700000ULL; static unsigned int normalized_sysctl_sched_base_slice = 700000ULL; +#endif /* CONFIG_CACHY */ +#ifdef CONFIG_CACHY +__read_mostly unsigned int sysctl_sched_migration_cost = 300000UL; +#else __read_mostly unsigned int sysctl_sched_migration_cost = 500000UL; +#endif static int __init setup_sched_thermal_decay_shift(char *str) { @@ -124,8 +133,12 @@ int __weak arch_asym_cpu_priority(int cpu) * * (default: 5 msec, units: microseconds) */ +#ifdef CONFIG_CACHY +static unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; +#else static unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; #endif +#endif #ifdef CONFIG_NUMA_BALANCING /* Restrict the NUMA promotion throughput (MB/s) for each target node. */ @@ -7081,9 +7094,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) h_nr_idle = task_has_idle_policy(p); if (task_sleep || task_delayed || !se->sched_delayed) h_nr_runnable = 1; - } else { - cfs_rq = group_cfs_rq(se); - slice = cfs_rq_min_slice(cfs_rq); } for_each_sched_entity(se) { @@ -7093,6 +7103,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) if (p && &p->se == se) return -1; + slice = cfs_rq_min_slice(cfs_rq); break; } @@ -7198,6 +7209,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) return true; } +static inline unsigned int cfs_h_nr_delayed(struct rq *rq) +{ + return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable); +} + #ifdef CONFIG_SMP /* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */ @@ -7359,8 +7375,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync) if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu)) return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu; - if (sync && cpu_rq(this_cpu)->nr_running == 1) - return this_cpu; + if (sync) { + struct rq *rq = cpu_rq(this_cpu); + + if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1) + return this_cpu; + } if (available_idle_cpu(prev_cpu)) return prev_cpu; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 47972f34ea7014..0892942cf913d4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2790,7 +2790,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags); -#ifdef CONFIG_PREEMPT_RT +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_CACHY) # define SCHED_NR_MIGRATE_BREAK 8 #else # define SCHED_NR_MIGRATE_BREAK 32 diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 51e38f5f47018c..c5cc616484badd 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_ } EXPORT_SYMBOL_GPL(add_wait_queue_priority); +void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive_lifo); + void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -258,6 +269,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent } EXPORT_SYMBOL(prepare_to_wait_exclusive); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + if (list_empty(&wq_entry->entry)) + __add_wait_queue(wq_head, wq_entry); + set_current_state(state); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo); + void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) { wq_entry->flags = flags; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3b7a7308e35b09..fe7bec454345fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -70,6 +70,9 @@ #ifdef CONFIG_RT_MUTEXES #include #endif +#ifdef CONFIG_USER_NS +#include +#endif /* shared constants to be used in various sysctls */ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; @@ -1595,6 +1598,15 @@ static const struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 517ee2590a29e5..30899a8cc52c0a 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -366,7 +366,7 @@ static const struct debug_obj_descr hrtimer_debug_descr; static void *hrtimer_debug_hint(void *addr) { - return ((struct hrtimer *) addr)->function; + return ACCESS_PRIVATE((struct hrtimer *)addr, function); } /* diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 50e8d04ab661f4..2e5b89d7d86605 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1405,6 +1405,15 @@ void run_posix_cpu_timers(void) lockdep_assert_irqs_disabled(); + /* + * Ensure that release_task(tsk) can't happen while + * handle_posix_cpu_timers() is running. Otherwise, a concurrent + * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and + * miss timer->it.cpu.firing != 0. + */ + if (tsk->exit_state) + return; + /* * If the actual expiry is deferred to task work context and the * work is already scheduled there is no point to do anything here. diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a47bcf71defcf5..9a3859443c042c 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -509,6 +509,7 @@ void tick_resume(void) #ifdef CONFIG_SUSPEND static DEFINE_RAW_SPINLOCK(tick_freeze_lock); +static DEFINE_WAIT_OVERRIDE_MAP(tick_freeze_map, LD_WAIT_SLEEP); static unsigned int tick_freeze_depth; /** @@ -528,9 +529,22 @@ void tick_freeze(void) if (tick_freeze_depth == num_online_cpus()) { trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), true); + /* + * All other CPUs have their interrupts disabled and are + * suspended to idle. Other tasks have been frozen so there + * is no scheduling happening. This means that there is no + * concurrency in the system at this point. Therefore it is + * okay to acquire a sleeping lock on PREEMPT_RT, such as a + * spinlock, because the lock cannot be held by other CPUs + * or threads and acquiring it cannot block. + * + * Inform lockdep about the situation. + */ + lock_map_acquire_try(&tick_freeze_map); system_state = SYSTEM_SUSPEND; sched_clock_suspend(); timekeeping_suspend(); + lock_map_release(&tick_freeze_map); } else { tick_suspend_local(); } @@ -552,8 +566,16 @@ void tick_unfreeze(void) raw_spin_lock(&tick_freeze_lock); if (tick_freeze_depth == num_online_cpus()) { + /* + * Similar to tick_freeze(). On resumption the first CPU may + * acquire uncontended sleeping locks while other CPUs block on + * tick_freeze_lock. + */ + lock_map_acquire_try(&tick_freeze_map); timekeeping_resume(); sched_clock_resume(); + lock_map_release(&tick_freeze_map); + system_state = SYSTEM_RUNNING; trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1e67d076f1955a..a009c91f7b05fc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -164,10 +164,34 @@ static inline struct timespec64 tk_xtime(const struct timekeeper *tk) return ts; } +static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk) +{ + struct timespec64 ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = tk->coarse_nsec; + return ts; +} + +/* + * Update the nanoseconds part for the coarse time keepers. They can't rely + * on xtime_nsec because xtime_nsec could be adjusted by a small negative + * amount when the multiplication factor of the clock is adjusted, which + * could cause the coarse clocks to go slightly backwards. See + * timekeeping_apply_adjustment(). Thus we keep a separate copy for the coarse + * clockids which only is updated when the clock has been set or we have + * accumulated time. + */ +static inline void tk_update_coarse_nsecs(struct timekeeper *tk) +{ + tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; +} + static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; + tk_update_coarse_nsecs(tk); } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) @@ -175,6 +199,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) tk->xtime_sec += ts->tv_sec; tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; tk_normalize_xtime(tk); + tk_update_coarse_nsecs(tk); } static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) @@ -708,6 +733,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk_normalize_xtime(tk); delta -= incr; } + tk_update_coarse_nsecs(tk); } /** @@ -804,8 +830,8 @@ EXPORT_SYMBOL_GPL(ktime_get_with_offset); ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) { struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; ktime_t base, *offset = offsets[offs]; + unsigned int seq; u64 nsecs; WARN_ON(timekeeping_suspended); @@ -813,7 +839,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) do { seq = read_seqcount_begin(&tk_core.seq); base = ktime_add(tk->tkr_mono.base, *offset); - nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsecs = tk->coarse_nsec; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -2161,7 +2187,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) struct timekeeper *real_tk = &tk_core.timekeeper; unsigned int clock_set = 0; int shift = 0, maxshift; - u64 offset; + u64 offset, orig_offset; guard(raw_spinlock_irqsave)(&tk_core.lock); @@ -2172,7 +2198,7 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), tk->tkr_mono.cycle_last, tk->tkr_mono.mask, tk->tkr_mono.clock->max_raw_delta); - + orig_offset = offset; /* Check if there's really nothing to do */ if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) return false; @@ -2205,6 +2231,14 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) */ clock_set |= accumulate_nsecs_to_secs(tk); + /* + * To avoid inconsistencies caused adjtimex TK_ADV_FREQ calls + * making small negative adjustments to the base xtime_nsec + * value, only update the coarse clocks if we accumulated time + */ + if (orig_offset != offset) + tk_update_coarse_nsecs(tk); + timekeeping_update_from_shadow(&tk_core, clock_set); return !!clock_set; @@ -2248,7 +2282,7 @@ void ktime_get_coarse_real_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); } while (read_seqcount_retry(&tk_core.seq, seq)); } EXPORT_SYMBOL(ktime_get_coarse_real_ts64); @@ -2271,7 +2305,7 @@ void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - *ts = tk_xtime(tk); + *ts = tk_xtime_coarse(tk); offset = tk_core.timekeeper.offs_real; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -2350,12 +2384,12 @@ void ktime_get_coarse_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - now = tk_xtime(tk); + now = tk_xtime_coarse(tk); mono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec, - now.tv_nsec + mono.tv_nsec); + now.tv_nsec + mono.tv_nsec); } EXPORT_SYMBOL(ktime_get_coarse_ts64); diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 01c2ab1e897193..32ef27c71b57aa 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -98,12 +98,12 @@ void update_vsyscall(struct timekeeper *tk) /* CLOCK_REALTIME_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; vdso_ts->sec = tk->xtime_sec; - vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + vdso_ts->nsec = tk->coarse_nsec; /* CLOCK_MONOTONIC_COARSE */ vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec = tk->coarse_nsec; nsec = nsec + tk->wall_to_monotonic.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 187dc37d61d4a3..090cdab38f0ccd 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1858,7 +1858,7 @@ static struct pt_regs *get_bpf_raw_tp_regs(void) struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); - if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { + if (nest_level > ARRAY_SIZE(tp_regs->regs)) { this_cpu_dec(bpf_raw_tp_nest_level); return ERR_PTR(-EBUSY); } @@ -2987,6 +2987,9 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; + if (attr->link_create.flags) + return -EINVAL; + if (!is_kprobe_multi(prog)) return -EINVAL; @@ -3376,6 +3379,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; + if (attr->link_create.flags) + return -EINVAL; + if (!is_uprobe_multi(prog)) return -EINVAL; @@ -3417,7 +3423,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr } if (pid) { + rcu_read_lock(); task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); + rcu_read_unlock(); if (!task) { err = -ESRCH; goto error_path_put; diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 33082c4e8154ea..ba7ff14f5339b5 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -89,8 +89,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node) { lockdep_assert_held(&fprobe_mutex); - WRITE_ONCE(node->fp, NULL); - hlist_del_rcu(&node->hlist); + /* Avoid double deleting */ + if (READ_ONCE(node->fp) != NULL) { + WRITE_ONCE(node->fp, NULL); + hlist_del_rcu(&node->hlist); + } return !!find_first_fprobe_node(node->addr); } @@ -411,6 +414,103 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } +#ifdef CONFIG_MODULES + +#define FPROBE_IPS_BATCH_INIT 8 +/* instruction pointer address list */ +struct fprobe_addr_list { + int index; + int size; + unsigned long *addrs; +}; + +static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long addr) +{ + unsigned long *addrs; + + if (alist->index >= alist->size) + return -ENOMEM; + + alist->addrs[alist->index++] = addr; + if (alist->index < alist->size) + return 0; + + /* Expand the address list */ + addrs = kcalloc(alist->size * 2, sizeof(*addrs), GFP_KERNEL); + if (!addrs) + return -ENOMEM; + + memcpy(addrs, alist->addrs, alist->size * sizeof(*addrs)); + alist->size *= 2; + kfree(alist->addrs); + alist->addrs = addrs; + + return 0; +} + +static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head, + struct fprobe_addr_list *alist) +{ + struct fprobe_hlist_node *node; + int ret = 0; + + hlist_for_each_entry_rcu(node, head, hlist, + lockdep_is_held(&fprobe_mutex)) { + if (!within_module(node->addr, mod)) + continue; + if (delete_fprobe_node(node)) + continue; + /* + * If failed to update alist, just continue to update hlist. + * Therefore, at list user handler will not hit anymore. + */ + if (!ret) + ret = fprobe_addr_list_add(alist, node->addr); + } +} + +/* Handle module unloading to manage fprobe_ip_table. */ +static int fprobe_module_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT}; + struct module *mod = data; + int i; + + if (val != MODULE_STATE_GOING) + return NOTIFY_DONE; + + alist.addrs = kcalloc(alist.size, sizeof(*alist.addrs), GFP_KERNEL); + /* If failed to alloc memory, we can not remove ips from hash. */ + if (!alist.addrs) + return NOTIFY_DONE; + + mutex_lock(&fprobe_mutex); + for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++) + fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist); + + if (alist.index < alist.size && alist.index > 0) + ftrace_set_filter_ips(&fprobe_graph_ops.ops, + alist.addrs, alist.index, 1, 0); + mutex_unlock(&fprobe_mutex); + + kfree(alist.addrs); + + return NOTIFY_DONE; +} + +static struct notifier_block fprobe_module_nb = { + .notifier_call = fprobe_module_callback, + .priority = 0, +}; + +static int __init init_fprobe_module(void) +{ + return register_module_notifier(&fprobe_module_nb); +} +early_initcall(init_fprobe_module); +#endif + static int symbols_cmp(const void *a, const void *b) { const char **str_a = (const char **) a; @@ -445,6 +545,7 @@ struct filter_match_data { size_t index; size_t size; unsigned long *addrs; + struct module **mods; }; static int filter_match_callback(void *data, const char *name, unsigned long addr) @@ -458,30 +559,47 @@ static int filter_match_callback(void *data, const char *name, unsigned long add if (!ftrace_location(addr)) return 0; - if (match->addrs) - match->addrs[match->index] = addr; + if (match->addrs) { + struct module *mod = __module_text_address(addr); + if (mod && !try_module_get(mod)) + return 0; + + match->mods[match->index] = mod; + match->addrs[match->index] = addr; + } match->index++; return match->index == match->size; } /* * Make IP list from the filter/no-filter glob patterns. - * Return the number of matched symbols, or -ENOENT. + * Return the number of matched symbols, or errno. + * If @addrs == NULL, this just counts the number of matched symbols. If @addrs + * is passed with an array, we need to pass the an @mods array of the same size + * to increment the module refcount for each symbol. + * This means we also need to call `module_put` for each element of @mods after + * using the @addrs. */ -static int ip_list_from_filter(const char *filter, const char *notfilter, - unsigned long *addrs, size_t size) +static int get_ips_from_filter(const char *filter, const char *notfilter, + unsigned long *addrs, struct module **mods, + size_t size) { struct filter_match_data match = { .filter = filter, .notfilter = notfilter, - .index = 0, .size = size, .addrs = addrs}; + .index = 0, .size = size, .addrs = addrs, .mods = mods}; int ret; + if (addrs && !mods) + return -EINVAL; + ret = kallsyms_on_each_symbol(filter_match_callback, &match); if (ret < 0) return ret; - ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); - if (ret < 0) - return ret; + if (IS_ENABLED(CONFIG_MODULES)) { + ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match); + if (ret < 0) + return ret; + } return match.index ?: -ENOENT; } @@ -543,24 +661,35 @@ static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num) */ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) { - unsigned long *addrs; - int ret; + unsigned long *addrs __free(kfree) = NULL; + struct module **mods __free(kfree) = NULL; + int ret, num; if (!fp || !filter) return -EINVAL; - ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX); - if (ret < 0) - return ret; + num = get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX); + if (num < 0) + return num; - addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL); + addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL); if (!addrs) return -ENOMEM; - ret = ip_list_from_filter(filter, notfilter, addrs, ret); - if (ret > 0) - ret = register_fprobe_ips(fp, addrs, ret); - kfree(addrs); + mods = kcalloc(num, sizeof(*mods), GFP_KERNEL); + if (!mods) + return -ENOMEM; + + ret = get_ips_from_filter(filter, notfilter, addrs, mods, num); + if (ret < 0) + return ret; + + ret = register_fprobe_ips(fp, addrs, ret); + + for (int i = 0; i < num; i++) { + if (mods[i]) + module_put(mods[i]); + } return ret; } EXPORT_SYMBOL_GPL(register_fprobe); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1a48aedb52552a..6981830c312859 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1297,6 +1297,8 @@ void ftrace_free_filter(struct ftrace_ops *ops) return; free_ftrace_hash(ops->func_hash->filter_hash); free_ftrace_hash(ops->func_hash->notrace_hash); + ops->func_hash->filter_hash = EMPTY_HASH; + ops->func_hash->notrace_hash = EMPTY_HASH; } EXPORT_SYMBOL_GPL(ftrace_free_filter); @@ -3255,6 +3257,31 @@ static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash, return 0; } +/* + * Remove functions from @hash that are in @notrace_hash + */ +static void remove_hash(struct ftrace_hash *hash, struct ftrace_hash *notrace_hash) +{ + struct ftrace_func_entry *entry; + struct hlist_node *tmp; + int size; + int i; + + /* If the notrace hash is empty, there's nothing to do */ + if (ftrace_hash_empty(notrace_hash)) + return; + + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { + if (!__ftrace_lookup_ip(notrace_hash, entry->ip)) + continue; + remove_hash_entry(hash, entry); + kfree(entry); + } + } +} + /* * Add to @hash only those that are in both @new_hash1 and @new_hash2 * @@ -3295,67 +3322,6 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has return 0; } -/* Return a new hash that has a union of all @ops->filter_hash entries */ -static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) -{ - struct ftrace_hash *new_hash = NULL; - struct ftrace_ops *subops; - int size_bits; - int ret; - - if (ops->func_hash->filter_hash) - size_bits = ops->func_hash->filter_hash->size_bits; - else - size_bits = FTRACE_HASH_DEFAULT_BITS; - - list_for_each_entry(subops, &ops->subop_list, list) { - ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits); - if (ret < 0) { - free_ftrace_hash(new_hash); - return NULL; - } - /* Nothing more to do if new_hash is empty */ - if (ftrace_hash_empty(new_hash)) - break; - } - /* Can't return NULL as that means this failed */ - return new_hash ? : EMPTY_HASH; -} - -/* Make @ops trace evenything except what all its subops do not trace */ -static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops) -{ - struct ftrace_hash *new_hash = NULL; - struct ftrace_ops *subops; - int size_bits; - int ret; - - list_for_each_entry(subops, &ops->subop_list, list) { - struct ftrace_hash *next_hash; - - if (!new_hash) { - size_bits = subops->func_hash->notrace_hash->size_bits; - new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash); - if (!new_hash) - return NULL; - continue; - } - size_bits = new_hash->size_bits; - next_hash = new_hash; - new_hash = alloc_ftrace_hash(size_bits); - ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash); - free_ftrace_hash(next_hash); - if (ret < 0) { - free_ftrace_hash(new_hash); - return NULL; - } - /* Nothing more to do if new_hash is empty */ - if (ftrace_hash_empty(new_hash)) - break; - } - return new_hash; -} - static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) { struct ftrace_func_entry *entry; @@ -3427,6 +3393,95 @@ static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_ return 0; } +static int add_first_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *func_hash) +{ + /* If the filter hash is not empty, simply remove the nohash from it */ + if (!ftrace_hash_empty(func_hash->filter_hash)) { + *filter_hash = copy_hash(func_hash->filter_hash); + if (!*filter_hash) + return -ENOMEM; + remove_hash(*filter_hash, func_hash->notrace_hash); + *notrace_hash = EMPTY_HASH; + + } else { + *notrace_hash = copy_hash(func_hash->notrace_hash); + if (!*notrace_hash) + return -ENOMEM; + *filter_hash = EMPTY_HASH; + } + return 0; +} + +static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *ops_hash, struct ftrace_ops_hash *subops_hash) +{ + int size_bits; + int ret; + + /* If the subops trace all functions so must the main ops */ + if (ftrace_hash_empty(ops_hash->filter_hash) || + ftrace_hash_empty(subops_hash->filter_hash)) { + *filter_hash = EMPTY_HASH; + } else { + /* + * The main ops filter hash is not empty, so its + * notrace_hash had better be, as the notrace hash + * is only used for empty main filter hashes. + */ + WARN_ON_ONCE(!ftrace_hash_empty(ops_hash->notrace_hash)); + + size_bits = max(ops_hash->filter_hash->size_bits, + subops_hash->filter_hash->size_bits); + + /* Copy the subops hash */ + *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); + if (!*filter_hash) + return -ENOMEM; + /* Remove any notrace functions from the copy */ + remove_hash(*filter_hash, subops_hash->notrace_hash); + + ret = append_hash(filter_hash, ops_hash->filter_hash, + size_bits); + if (ret < 0) { + free_ftrace_hash(*filter_hash); + *filter_hash = EMPTY_HASH; + return ret; + } + } + + /* + * Only process notrace hashes if the main filter hash is empty + * (tracing all functions), otherwise the filter hash will just + * remove the notrace hash functions, and the notrace hash is + * not needed. + */ + if (ftrace_hash_empty(*filter_hash)) { + /* + * Intersect the notrace functions. That is, if two + * subops are not tracing a set of functions, the + * main ops will only not trace the functions that are + * in both subops, but has to trace the functions that + * are only notrace in one of the subops, for the other + * subops to be able to trace them. + */ + size_bits = max(ops_hash->notrace_hash->size_bits, + subops_hash->notrace_hash->size_bits); + *notrace_hash = alloc_ftrace_hash(size_bits); + if (!*notrace_hash) + return -ENOMEM; + + ret = intersect_hash(notrace_hash, ops_hash->notrace_hash, + subops_hash->notrace_hash); + if (ret < 0) { + free_ftrace_hash(*notrace_hash); + *notrace_hash = EMPTY_HASH; + return ret; + } + } + return 0; +} + /** * ftrace_startup_subops - enable tracing for subops of an ops * @ops: Manager ops (used to pick all the functions of its subops) @@ -3439,11 +3494,10 @@ static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_ */ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) { - struct ftrace_hash *filter_hash; - struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash = EMPTY_HASH; + struct ftrace_hash *notrace_hash = EMPTY_HASH; struct ftrace_hash *save_filter_hash; struct ftrace_hash *save_notrace_hash; - int size_bits; int ret; if (unlikely(ftrace_disabled)) @@ -3467,14 +3521,14 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int /* For the first subops to ops just enable it normally */ if (list_empty(&ops->subop_list)) { - /* Just use the subops hashes */ - filter_hash = copy_hash(subops->func_hash->filter_hash); - notrace_hash = copy_hash(subops->func_hash->notrace_hash); - if (!filter_hash || !notrace_hash) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - return -ENOMEM; - } + + /* The ops was empty, should have empty hashes */ + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->filter_hash)); + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->notrace_hash)); + + ret = add_first_hash(&filter_hash, ¬race_hash, subops->func_hash); + if (ret < 0) + return ret; save_filter_hash = ops->func_hash->filter_hash; save_notrace_hash = ops->func_hash->notrace_hash; @@ -3500,48 +3554,16 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int /* * Here there's already something attached. Here are the rules: - * o If either filter_hash is empty then the final stays empty - * o Otherwise, the final is a superset of both hashes - * o If either notrace_hash is empty then the final stays empty - * o Otherwise, the final is an intersection between the hashes + * If the new subops and main ops filter hashes are not empty: + * o Make a copy of the subops filter hash + * o Remove all functions in the nohash from it. + * o Add in the main hash filter functions + * o Remove any of these functions from the main notrace hash */ - if (ftrace_hash_empty(ops->func_hash->filter_hash) || - ftrace_hash_empty(subops->func_hash->filter_hash)) { - filter_hash = EMPTY_HASH; - } else { - size_bits = max(ops->func_hash->filter_hash->size_bits, - subops->func_hash->filter_hash->size_bits); - filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); - if (!filter_hash) - return -ENOMEM; - ret = append_hash(&filter_hash, subops->func_hash->filter_hash, - size_bits); - if (ret < 0) { - free_ftrace_hash(filter_hash); - return ret; - } - } - if (ftrace_hash_empty(ops->func_hash->notrace_hash) || - ftrace_hash_empty(subops->func_hash->notrace_hash)) { - notrace_hash = EMPTY_HASH; - } else { - size_bits = max(ops->func_hash->filter_hash->size_bits, - subops->func_hash->filter_hash->size_bits); - notrace_hash = alloc_ftrace_hash(size_bits); - if (!notrace_hash) { - free_ftrace_hash(filter_hash); - return -ENOMEM; - } - - ret = intersect_hash(¬race_hash, ops->func_hash->filter_hash, - subops->func_hash->filter_hash); - if (ret < 0) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - return ret; - } - } + ret = add_next_hash(&filter_hash, ¬race_hash, ops->func_hash, subops->func_hash); + if (ret < 0) + return ret; list_add(&subops->list, &ops->subop_list); @@ -3557,6 +3579,45 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int return ret; } +static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops *ops) +{ + struct ftrace_ops_hash temp_hash; + struct ftrace_ops *subops; + bool first = true; + int ret; + + temp_hash.filter_hash = EMPTY_HASH; + temp_hash.notrace_hash = EMPTY_HASH; + + list_for_each_entry(subops, &ops->subop_list, list) { + *filter_hash = EMPTY_HASH; + *notrace_hash = EMPTY_HASH; + + if (first) { + ret = add_first_hash(filter_hash, notrace_hash, subops->func_hash); + if (ret < 0) + return ret; + first = false; + } else { + ret = add_next_hash(filter_hash, notrace_hash, + &temp_hash, subops->func_hash); + if (ret < 0) { + free_ftrace_hash(temp_hash.filter_hash); + free_ftrace_hash(temp_hash.notrace_hash); + return ret; + } + } + + free_ftrace_hash(temp_hash.filter_hash); + free_ftrace_hash(temp_hash.notrace_hash); + + temp_hash.filter_hash = *filter_hash; + temp_hash.notrace_hash = *notrace_hash; + } + return 0; +} + /** * ftrace_shutdown_subops - Remove a subops from a manager ops * @ops: A manager ops to remove @subops from @@ -3571,8 +3632,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int */ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) { - struct ftrace_hash *filter_hash; - struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash = EMPTY_HASH; + struct ftrace_hash *notrace_hash = EMPTY_HASH; int ret; if (unlikely(ftrace_disabled)) @@ -3605,14 +3666,9 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in } /* Rebuild the hashes without subops */ - filter_hash = append_hashes(ops); - notrace_hash = intersect_hashes(ops); - if (!filter_hash || !notrace_hash) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - list_add(&subops->list, &ops->subop_list); - return -ENOMEM; - } + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (ret < 0) + return ret; ret = ftrace_update_ops(ops, filter_hash, notrace_hash); if (ret < 0) { @@ -3628,11 +3684,11 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, struct ftrace_hash **orig_subhash, - struct ftrace_hash *hash, - int enable) + struct ftrace_hash *hash) { struct ftrace_ops *ops = subops->managed; - struct ftrace_hash **orig_hash; + struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash; struct ftrace_hash *save_hash; struct ftrace_hash *new_hash; int ret; @@ -3649,24 +3705,18 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, return -ENOMEM; } - /* Create a new_hash to hold the ops new functions */ - if (enable) { - orig_hash = &ops->func_hash->filter_hash; - new_hash = append_hashes(ops); - } else { - orig_hash = &ops->func_hash->notrace_hash; - new_hash = intersect_hashes(ops); + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (!ret) { + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); } - /* Move the hash over to the new hash */ - ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); - - free_ftrace_hash(new_hash); - if (ret) { /* Put back the original hash */ - free_ftrace_hash_rcu(*orig_subhash); + new_hash = *orig_subhash; *orig_subhash = save_hash; + free_ftrace_hash_rcu(new_hash); } else { free_ftrace_hash_rcu(save_hash); } @@ -4890,7 +4940,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, int enable) { if (ops->flags & FTRACE_OPS_FL_SUBOP) - return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable); + return ftrace_hash_move_and_update_subops(ops, orig_hash, hash); /* * If this ops is not enabled, it could be sharing its filters @@ -4909,7 +4959,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, list_for_each_entry(subops, &op->subop_list, list) { if ((subops->flags & FTRACE_OPS_FL_ENABLED) && subops->func_hash == ops->func_hash) { - return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable); + return ftrace_hash_move_and_update_subops(subops, orig_hash, hash); } } } while_for_each_ftrace_op(op); @@ -5914,9 +5964,10 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) /* Make a copy hash to place the new and the old entries in */ size = hash->count + direct_functions->count; - if (size > 32) - size = 32; - new_hash = alloc_ftrace_hash(fls(size)); + size = fls(size); + if (size > FTRACE_HASH_MAX_BITS) + size = FTRACE_HASH_MAX_BITS; + new_hash = alloc_ftrace_hash(size); if (!new_hash) goto out_unlock; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c0f877d39a241d..67707ff28fc519 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1887,10 +1887,12 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) head_page = cpu_buffer->head_page; - /* If both the head and commit are on the reader_page then we are done. */ - if (head_page == cpu_buffer->reader_page && - head_page == cpu_buffer->commit_page) + /* If the commit_buffer is the reader page, update the commit page */ + if (meta->commit_buffer == (unsigned long)cpu_buffer->reader_page->page) { + cpu_buffer->commit_page = cpu_buffer->reader_page; + /* Nothing more to do, the only page is the reader page */ goto done; + } /* Iterate until finding the commit page */ for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) { @@ -2847,6 +2849,12 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, if (nr_pages < 2) nr_pages = 2; + /* + * Keep CPUs from coming online while resizing to synchronize + * with new per CPU buffers being created. + */ + guard(cpus_read_lock)(); + /* prevent another thread from changing buffer sizes */ mutex_lock(&buffer->mutex); atomic_inc(&buffer->resizing); @@ -2891,7 +2899,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, cond_resched(); } - cpus_read_lock(); /* * Fire off all the required work handlers * We can't schedule on offline CPUs, but it's not necessary @@ -2931,7 +2938,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, cpu_buffer->nr_pages_to_update = 0; } - cpus_read_unlock(); } else { cpu_buffer = buffer->buffers[cpu_id]; @@ -2959,8 +2965,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, goto out_err; } - cpus_read_lock(); - /* Can't run something on an offline CPU. */ if (!cpu_online(cpu_id)) rb_update_pages(cpu_buffer); @@ -2979,7 +2983,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size, } cpu_buffer->nr_pages_to_update = 0; - cpus_read_unlock(); } out: @@ -6762,7 +6765,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) old_size = buffer->subbuf_size; /* prevent another thread from changing buffer sizes */ - mutex_lock(&buffer->mutex); + guard(mutex)(&buffer->mutex); atomic_inc(&buffer->record_disabled); /* Make sure all commits have finished */ @@ -6867,7 +6870,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) } atomic_dec(&buffer->record_disabled); - mutex_unlock(&buffer->mutex); return 0; @@ -6876,7 +6878,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) buffer->subbuf_size = old_size; atomic_dec(&buffer->record_disabled); - mutex_unlock(&buffer->mutex); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; @@ -7282,8 +7283,8 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) /* Check if any events were dropped */ missed_events = cpu_buffer->lost_events; - if (cpu_buffer->reader_page != cpu_buffer->commit_page) { - if (missed_events) { + if (missed_events) { + if (cpu_buffer->reader_page != cpu_buffer->commit_page) { struct buffer_data_page *bpage = reader->page; unsigned int commit; /* @@ -7304,13 +7305,23 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) local_add(RB_MISSED_STORED, &bpage->commit); } local_add(RB_MISSED_EVENTS, &bpage->commit); + } else if (!WARN_ONCE(cpu_buffer->reader_page == cpu_buffer->tail_page, + "Reader on commit with %ld missed events", + missed_events)) { + /* + * There shouldn't be any missed events if the tail_page + * is on the reader page. But if the tail page is not on the + * reader page and the commit_page is, that would mean that + * there's a commit_overrun (an interrupt preempted an + * addition of an event and then filled the buffer + * with new events). In this case it's not an + * error, but it should still be reported. + * + * TODO: Add missed events to the page for user space to know. + */ + pr_info("Ring buffer [%d] commit overrun lost %ld events at timestamp:%lld\n", + cpu, missed_events, cpu_buffer->reader_page->page->time_stamp); } - } else { - /* - * There really shouldn't be any missed events if the commit - * is on the reader page. - */ - WARN_ON_ONCE(missed_events); } cpu_buffer->lost_events = 0; diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 968c5c3b02464f..e4077500a91dbb 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -225,7 +225,12 @@ bool rv_is_nested_monitor(struct rv_monitor_def *mdef) */ bool rv_is_container_monitor(struct rv_monitor_def *mdef) { - struct rv_monitor_def *next = list_next_entry(mdef, list); + struct rv_monitor_def *next; + + if (list_is_last(&mdef->list, &rv_monitors_list)) + return false; + + next = list_next_entry(mdef, list); return next->parent == mdef->monitor || !mdef->monitor->enable; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b581e388a9d9f7..766cb3cd254e05 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6043,8 +6043,10 @@ unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) tscratch = tr->scratch; /* if there is no tscrach, module_delta must be NULL. */ module_delta = READ_ONCE(tr->module_delta); - if (!module_delta || tscratch->entries[0].mod_addr > addr) + if (!module_delta || !tscratch->nr_entries || + tscratch->entries[0].mod_addr > addr) { return addr + tr->text_delta; + } /* Note that entries must be sorted. */ nr_entries = tscratch->nr_entries; @@ -6821,13 +6823,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* Copy the data into the page, so we can start over. */ ret = trace_seq_to_buffer(&iter->seq, page_address(spd.pages[i]), - trace_seq_used(&iter->seq)); + min((size_t)trace_seq_used(&iter->seq), + (size_t)PAGE_SIZE)); if (ret < 0) { __free_page(spd.pages[i]); break; } spd.partial[i].offset = 0; - spd.partial[i].len = trace_seq_used(&iter->seq); + spd.partial[i].len = ret; trace_seq_init(&iter->seq); } @@ -9806,6 +9809,7 @@ static int instance_mkdir(const char *name) return ret; } +#ifdef CONFIG_MMU static u64 map_pages(unsigned long start, unsigned long size) { unsigned long vmap_start, vmap_end; @@ -9828,6 +9832,12 @@ static u64 map_pages(unsigned long start, unsigned long size) return (u64)vmap_start; } +#else +static inline u64 map_pages(unsigned long start, unsigned long size) +{ + return 0; +} +#endif /** * trace_array_get_by_name - Create/Lookup a trace array, given its name. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 79be1995db44c4..10ee434a9b755f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1772,6 +1772,9 @@ extern int event_enable_register_trigger(char *glob, extern void event_enable_unregister_trigger(char *glob, struct event_trigger_data *test, struct trace_event_file *file); +extern struct event_trigger_data * +trigger_data_alloc(struct event_command *cmd_ops, char *cmd, char *param, + void *private_data); extern void trigger_data_free(struct event_trigger_data *data); extern int event_trigger_init(struct event_trigger_data *data); extern int trace_event_trigger_enable_disable(struct trace_event_file *file, @@ -1798,11 +1801,6 @@ extern bool event_trigger_check_remove(const char *glob); extern bool event_trigger_empty_param(const char *param); extern int event_trigger_separate_filter(char *param_and_filter, char **param, char **filter, bool param_required); -extern struct event_trigger_data * -event_trigger_alloc(struct event_command *cmd_ops, - char *cmd, - char *param, - void *private_data); extern int event_trigger_parse_num(char *trigger, struct event_trigger_data *trigger_data); extern int event_trigger_set_filter(struct event_command *cmd_ops, diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index a322e4f249a503..5d64a18cacacc6 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -16,7 +16,7 @@ #include "trace_output.h" /* for trace_event_sem */ #include "trace_dynevent.h" -static DEFINE_MUTEX(dyn_event_ops_mutex); +DEFINE_MUTEX(dyn_event_ops_mutex); static LIST_HEAD(dyn_event_ops_list); bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call) @@ -116,6 +116,20 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type return ret; } +/* + * Locked version of event creation. The event creation must be protected by + * dyn_event_ops_mutex because of protecting trace_probe_log. + */ +int dyn_event_create(const char *raw_command, struct dyn_event_operations *type) +{ + int ret; + + mutex_lock(&dyn_event_ops_mutex); + ret = type->create(raw_command); + mutex_unlock(&dyn_event_ops_mutex); + return ret; +} + static int create_dyn_event(const char *raw_command) { struct dyn_event_operations *ops; diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h index 936477a111d3e7..beee3f8d754444 100644 --- a/kernel/trace/trace_dynevent.h +++ b/kernel/trace/trace_dynevent.h @@ -100,6 +100,7 @@ void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos); void dyn_event_seq_stop(struct seq_file *m, void *v); int dyn_events_release_all(struct dyn_event_operations *type); int dyn_event_release(const char *raw_command, struct dyn_event_operations *type); +int dyn_event_create(const char *raw_command, struct dyn_event_operations *type); /* * for_each_dyn_event - iterate over the dyn_event list diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index ee40d4e6ad1cc8..4ef4df6623a8d3 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -80,11 +80,11 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry, F_STRUCT( __field_struct( struct ftrace_graph_ent, graph_ent ) __field_packed( unsigned long, graph_ent, func ) - __field_packed( unsigned long, graph_ent, depth ) + __field_packed( unsigned int, graph_ent, depth ) __dynamic_array(unsigned long, args ) ), - F_printk("--> %ps (%lu)", (void *)__entry->func, __entry->depth) + F_printk("--> %ps (%u)", (void *)__entry->func, __entry->depth) ); #ifdef CONFIG_FUNCTION_GRAPH_RETADDR diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index c08355c3ef32b4..916555f0de811f 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -969,10 +969,13 @@ static int __trace_eprobe_create(int argc, const char *argv[]) goto error; } } + trace_probe_log_clear(); return ret; + parse_error: ret = -EINVAL; error: + trace_probe_log_clear(); trace_event_probe_cleanup(ep); return ret; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 0993dfc1c5c165..2048560264bb48 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -808,7 +808,7 @@ static __always_inline char *test_string(char *str) kstr = ubuf->buffer; /* For safety, do not trust the string pointer */ - if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) + if (strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE) < 0) return NULL; return kstr; } @@ -827,7 +827,7 @@ static __always_inline char *test_ustring(char *str) /* user space address? */ ustr = (char __user *)str; - if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) + if (strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE) < 0) return NULL; return kstr; diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1260c23cfa5fc4..86fd06812cdab4 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5246,17 +5246,94 @@ hist_trigger_actions(struct hist_trigger_data *hist_data, } } +/* + * The hist_pad structure is used to save information to create + * a histogram from the histogram trigger. It's too big to store + * on the stack, so when the histogram trigger is initialized + * a percpu array of 4 hist_pad structures is allocated. + * This will cover every context from normal, softirq, irq and NMI + * in the very unlikely event that a tigger happens at each of + * these contexts and interrupts a currently active trigger. + */ +struct hist_pad { + unsigned long entries[HIST_STACKTRACE_DEPTH]; + u64 var_ref_vals[TRACING_MAP_VARS_MAX]; + char compound_key[HIST_KEY_SIZE_MAX]; +}; + +static struct hist_pad __percpu *hist_pads; +static DEFINE_PER_CPU(int, hist_pad_cnt); +static refcount_t hist_pad_ref; + +/* One hist_pad for every context (normal, softirq, irq, NMI) */ +#define MAX_HIST_CNT 4 + +static int alloc_hist_pad(void) +{ + lockdep_assert_held(&event_mutex); + + if (refcount_read(&hist_pad_ref)) { + refcount_inc(&hist_pad_ref); + return 0; + } + + hist_pads = __alloc_percpu(sizeof(struct hist_pad) * MAX_HIST_CNT, + __alignof__(struct hist_pad)); + if (!hist_pads) + return -ENOMEM; + + refcount_set(&hist_pad_ref, 1); + return 0; +} + +static void free_hist_pad(void) +{ + lockdep_assert_held(&event_mutex); + + if (!refcount_dec_and_test(&hist_pad_ref)) + return; + + free_percpu(hist_pads); + hist_pads = NULL; +} + +static struct hist_pad *get_hist_pad(void) +{ + struct hist_pad *hist_pad; + int cnt; + + if (WARN_ON_ONCE(!hist_pads)) + return NULL; + + preempt_disable(); + + hist_pad = per_cpu_ptr(hist_pads, smp_processor_id()); + + if (this_cpu_read(hist_pad_cnt) == MAX_HIST_CNT) { + preempt_enable(); + return NULL; + } + + cnt = this_cpu_inc_return(hist_pad_cnt) - 1; + + return &hist_pad[cnt]; +} + +static void put_hist_pad(void) +{ + this_cpu_dec(hist_pad_cnt); + preempt_enable(); +} + static void event_hist_trigger(struct event_trigger_data *data, struct trace_buffer *buffer, void *rec, struct ring_buffer_event *rbe) { struct hist_trigger_data *hist_data = data->private_data; bool use_compound_key = (hist_data->n_keys > 1); - unsigned long entries[HIST_STACKTRACE_DEPTH]; - u64 var_ref_vals[TRACING_MAP_VARS_MAX]; - char compound_key[HIST_KEY_SIZE_MAX]; struct tracing_map_elt *elt = NULL; struct hist_field *key_field; + struct hist_pad *hist_pad; u64 field_contents; void *key = NULL; unsigned int i; @@ -5264,12 +5341,18 @@ static void event_hist_trigger(struct event_trigger_data *data, if (unlikely(!rbe)) return; - memset(compound_key, 0, hist_data->key_size); + hist_pad = get_hist_pad(); + if (!hist_pad) + return; + + memset(hist_pad->compound_key, 0, hist_data->key_size); for_each_hist_key_field(i, hist_data) { key_field = hist_data->fields[i]; if (key_field->flags & HIST_FIELD_FL_STACKTRACE) { + unsigned long *entries = hist_pad->entries; + memset(entries, 0, HIST_STACKTRACE_SIZE); if (key_field->field) { unsigned long *stack, n_entries; @@ -5293,26 +5376,31 @@ static void event_hist_trigger(struct event_trigger_data *data, } if (use_compound_key) - add_to_key(compound_key, key, key_field, rec); + add_to_key(hist_pad->compound_key, key, key_field, rec); } if (use_compound_key) - key = compound_key; + key = hist_pad->compound_key; if (hist_data->n_var_refs && - !resolve_var_refs(hist_data, key, var_ref_vals, false)) - return; + !resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, false)) + goto out; elt = tracing_map_insert(hist_data->map, key); if (!elt) - return; + goto out; - hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, var_ref_vals); + hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, hist_pad->var_ref_vals); - if (resolve_var_refs(hist_data, key, var_ref_vals, true)) - hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals); + if (resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, true)) { + hist_trigger_actions(hist_data, elt, buffer, rec, rbe, + key, hist_pad->var_ref_vals); + } hist_poll_wakeup(); + + out: + put_hist_pad(); } static void hist_trigger_stacktrace_print(struct seq_file *m, @@ -6157,6 +6245,9 @@ static int event_hist_trigger_init(struct event_trigger_data *data) { struct hist_trigger_data *hist_data = data->private_data; + if (alloc_hist_pad() < 0) + return -ENOMEM; + if (!data->ref && hist_data->attrs->name) save_named_trigger(hist_data->attrs->name, data); @@ -6201,6 +6292,7 @@ static void event_hist_trigger_free(struct event_trigger_data *data) destroy_hist_data(hist_data); } + free_hist_pad(); } static const struct event_trigger_ops event_hist_trigger_ops = { @@ -6216,9 +6308,7 @@ static int event_hist_trigger_named_init(struct event_trigger_data *data) save_named_trigger(data->named_data->name, data); - event_hist_trigger_init(data->named_data); - - return 0; + return event_hist_trigger_init(data->named_data); } static void event_hist_trigger_named_free(struct event_trigger_data *data) @@ -6705,7 +6795,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, return PTR_ERR(hist_data); } - trigger_data = event_trigger_alloc(cmd_ops, cmd, param, hist_data); + trigger_data = trigger_data_alloc(cmd_ops, cmd, param, hist_data); if (!trigger_data) { ret = -ENOMEM; goto out_free; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 969f48742d72c6..33cfbd4ed76dda 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -370,7 +370,6 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, union trace_synth_field *data = &entry->fields[n_u64]; trace_seq_printf(s, print_fmt, se->fields[i]->name, - STR_VAR_LEN_MAX, (char *)entry + data->as_dynamic.offset, i == se->n_fields - 1 ? "" : " "); n_u64++; diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index b66b6d235d9130..c443ed7649a896 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -804,7 +804,7 @@ int event_trigger_separate_filter(char *param_and_filter, char **param, } /** - * event_trigger_alloc - allocate and init event_trigger_data for a trigger + * trigger_data_alloc - allocate and init event_trigger_data for a trigger * @cmd_ops: The event_command operations for the trigger * @cmd: The cmd string * @param: The param string @@ -815,14 +815,14 @@ int event_trigger_separate_filter(char *param_and_filter, char **param, * trigger_ops to assign to the event_trigger_data. @private_data can * also be passed in and associated with the event_trigger_data. * - * Use event_trigger_free() to free an event_trigger_data object. + * Use trigger_data_free() to free an event_trigger_data object. * * Return: The trigger_data object success, NULL otherwise */ -struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops, - char *cmd, - char *param, - void *private_data) +struct event_trigger_data *trigger_data_alloc(struct event_command *cmd_ops, + char *cmd, + char *param, + void *private_data) { struct event_trigger_data *trigger_data; const struct event_trigger_ops *trigger_ops; @@ -989,13 +989,13 @@ event_trigger_parse(struct event_command *cmd_ops, return ret; ret = -ENOMEM; - trigger_data = event_trigger_alloc(cmd_ops, cmd, param, file); + trigger_data = trigger_data_alloc(cmd_ops, cmd, param, file); if (!trigger_data) goto out; if (remove) { event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); - kfree(trigger_data); + trigger_data_free(trigger_data); ret = 0; goto out; } @@ -1022,7 +1022,7 @@ event_trigger_parse(struct event_command *cmd_ops, out_free: event_trigger_reset_filter(cmd_ops, trigger_data); - kfree(trigger_data); + trigger_data_free(trigger_data); goto out; } @@ -1560,7 +1560,7 @@ stacktrace_trigger(struct event_trigger_data *data, struct trace_event_file *file = data->private_data; if (file) - __trace_stack(file->tr, tracing_gen_ctx(), STACK_SKIP); + __trace_stack(file->tr, tracing_gen_ctx_dec(), STACK_SKIP); else trace_dump_stack(STACK_SKIP); } @@ -1793,7 +1793,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops, enable_data->enable = enable; enable_data->file = event_enable_file; - trigger_data = event_trigger_alloc(cmd_ops, cmd, param, enable_data); + trigger_data = trigger_data_alloc(cmd_ops, cmd, param, enable_data); if (!trigger_data) { kfree(enable_data); goto out; diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 5d7ca80173ea2b..b40fa59159ac52 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -919,9 +919,15 @@ static void __find_tracepoint_module_cb(struct tracepoint *tp, struct module *mo struct __find_tracepoint_cb_data *data = priv; if (!data->tpoint && !strcmp(data->tp_name, tp->name)) { - data->tpoint = tp; - if (!data->mod) + /* If module is not specified, try getting module refcount. */ + if (!data->mod && mod) { + /* If failed to get refcount, ignore this tracepoint. */ + if (!try_module_get(mod)) + return; + data->mod = mod; + } + data->tpoint = tp; } } @@ -933,7 +939,11 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) data->tpoint = tp; } -/* Find a tracepoint from kernel and module. */ +/* + * Find a tracepoint from kernel and module. If the tracepoint is on the module, + * the module's refcount is incremented and returned as *@tp_mod. Thus, if it is + * not NULL, caller must call module_put(*tp_mod) after used the tracepoint. + */ static struct tracepoint *find_tracepoint(const char *tp_name, struct module **tp_mod) { @@ -962,7 +972,10 @@ static void reenable_trace_fprobe(struct trace_fprobe *tf) } } -/* Find a tracepoint from specified module. */ +/* + * Find a tracepoint from specified module. In this case, this does not get the + * module's refcount. The caller must ensure the module is not freed. + */ static struct tracepoint *find_tracepoint_in_module(struct module *mod, const char *tp_name) { @@ -1169,11 +1182,6 @@ static int trace_fprobe_create_internal(int argc, const char *argv[], if (is_tracepoint) { ctx->flags |= TPARG_FL_TPOINT; tpoint = find_tracepoint(symbol, &tp_mod); - /* lock module until register this tprobe. */ - if (tp_mod && !try_module_get(tp_mod)) { - tpoint = NULL; - tp_mod = NULL; - } if (tpoint) { ctx->funcname = kallsyms_lookup( (unsigned long)tpoint->probestub, diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 98ccf3f00c519d..4e37a0f6aaa384 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -633,11 +633,7 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip, static __always_inline void trace_stack(struct trace_array *tr) { - unsigned int trace_ctx; - - trace_ctx = tracing_gen_ctx(); - - __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP); + __trace_stack(tr, tracing_gen_ctx_dec(), FTRACE_STACK_SKIP); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 2f077d4158e5f6..0c357a89c58e01 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -880,8 +880,6 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr if (print_retval || print_retaddr) trace_seq_puts(s, " /*"); - else - trace_seq_putc(s, '\n'); } else { print_retaddr = false; trace_seq_printf(s, "} /* %ps", func); @@ -899,7 +897,7 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr } if (!entry || print_retval || print_retaddr) - trace_seq_puts(s, " */\n"); + trace_seq_puts(s, " */"); } #else @@ -975,7 +973,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, } else trace_seq_puts(s, "();"); } - trace_seq_printf(s, "\n"); + trace_seq_putc(s, '\n'); print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, cpu, iter->ent->pid, flags); @@ -1313,10 +1311,11 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s, * that if the funcgraph-tail option is enabled. */ if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) - trace_seq_puts(s, "}\n"); + trace_seq_puts(s, "}"); else - trace_seq_printf(s, "} /* %ps */\n", (void *)func); + trace_seq_printf(s, "} /* %ps */", (void *)func); } + trace_seq_putc(s, '\n'); /* Overrun */ if (flags & TRACE_GRAPH_PRINT_OVERRUN) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 2703b96d899064..3e5c47b6d7b290 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1089,7 +1089,7 @@ static int create_or_delete_trace_kprobe(const char *raw_command) if (raw_command[0] == '-') return dyn_event_release(raw_command, &trace_kprobe_ops); - ret = trace_kprobe_create(raw_command); + ret = dyn_event_create(raw_command, &trace_kprobe_ops); return ret == -ECANCELED ? -EINVAL : ret; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index fee40ffbd49061..b9ab06c9954323 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1042,11 +1042,12 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, struct trace_event_call *call; struct list_head *head; + lockdep_assert_held_read(&trace_event_sem); + /* ftrace defined events have separate call structures */ if (event->type <= __TRACE_LAST_TYPE) { bool found = false; - down_read(&trace_event_sem); list_for_each_entry(call, &ftrace_events, list) { if (call->event.type == event->type) { found = true; @@ -1056,7 +1057,6 @@ enum print_line_t print_event_fields(struct trace_iterator *iter, if (call->event.type > __TRACE_LAST_TYPE) break; } - up_read(&trace_event_sem); if (!found) { trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type); goto out; diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 2eeecb6c95eea5..424751cdf31f9f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -154,9 +154,12 @@ static const struct fetch_type *find_fetch_type(const char *type, unsigned long } static struct trace_probe_log trace_probe_log; +extern struct mutex dyn_event_ops_mutex; void trace_probe_log_init(const char *subsystem, int argc, const char **argv) { + lockdep_assert_held(&dyn_event_ops_mutex); + trace_probe_log.subsystem = subsystem; trace_probe_log.argc = argc; trace_probe_log.argv = argv; @@ -165,11 +168,15 @@ void trace_probe_log_init(const char *subsystem, int argc, const char **argv) void trace_probe_log_clear(void) { + lockdep_assert_held(&dyn_event_ops_mutex); + memset(&trace_probe_log, 0, sizeof(trace_probe_log)); } void trace_probe_log_set_index(int index) { + lockdep_assert_held(&dyn_event_ops_mutex); + trace_probe_log.index = index; } @@ -178,6 +185,8 @@ void __trace_probe_log_err(int offset, int err_type) char *command, *p; int i, len = 0, pos = 0; + lockdep_assert_held(&dyn_event_ops_mutex); + if (!trace_probe_log.argv) return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3386439ec9f674..35cf76c75dd766 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -741,7 +741,7 @@ static int create_or_delete_trace_uprobe(const char *raw_command) if (raw_command[0] == '-') return dyn_event_release(raw_command, &trace_uprobe_ops); - ret = trace_uprobe_create(raw_command); + ret = dyn_event_create(raw_command, &trace_uprobe_ops); return ret == -ECANCELED ? -EINVAL : ret; } diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 682f40d5632d44..434a25f7b2edb2 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -22,6 +22,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 2ef2e1b8009165..2f844c279a3e01 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(vhost_task_stop); * @arg: data to be passed to fn and handled_kill * @name: the thread's name * - * This returns a specialized task for use by the vhost layer or NULL on + * This returns a specialized task for use by the vhost layer or ERR_PTR() on * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ diff --git a/lib/Kconfig b/lib/Kconfig index 61cce0686b53eb..6c1b8f1842678c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -139,27 +139,22 @@ config TRACE_MMIO_ACCESS source "lib/crypto/Kconfig" config CRC_CCITT - tristate "CRC-CCITT functions" + tristate help - This option is provided for the case where no in-kernel-tree - modules require CRC-CCITT functions, but a module built outside - the kernel tree does. Such modules that use library CRC-CCITT - functions require M here. + The CRC-CCITT library functions. Select this if your module uses any + of the functions from . config CRC16 - tristate "CRC16 functions" + tristate help - This option is provided for the case where no in-kernel-tree - modules require CRC16 functions, but a module built outside - the kernel tree does. Such modules that use library CRC16 - functions require M here. + The CRC16 library functions. Select this if your module uses any of + the functions from . config CRC_T10DIF - tristate "CRC calculation for the T10 Data Integrity Field" + tristate help - This option is only needed if a module that's not in the - kernel tree needs to calculate CRC checks for use with the - SCSI data integrity subsystem. + The CRC-T10DIF library functions. Select this if your module uses + any of the functions from . config ARCH_HAS_CRC_T10DIF bool @@ -169,22 +164,17 @@ config CRC_T10DIF_ARCH default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS config CRC_ITU_T - tristate "CRC ITU-T V.41 functions" + tristate help - This option is provided for the case where no in-kernel-tree - modules require CRC ITU-T V.41 functions, but a module built outside - the kernel tree does. Such modules that use library CRC ITU-T V.41 - functions require M here. + The CRC-ITU-T library functions. Select this if your module uses + any of the functions from . config CRC32 - tristate "CRC32/CRC32c functions" - default y + tristate select BITREVERSE help - This option is provided for the case where no in-kernel-tree - modules require CRC32/CRC32c functions, but a module built outside - the kernel tree does. Such modules that use library CRC32/CRC32c - functions require M here. + The CRC32 library functions. Select this if your module uses any of + the functions from or . config ARCH_HAS_CRC32 bool @@ -195,6 +185,9 @@ config CRC32_ARCH config CRC64 tristate + help + The CRC64 library functions. Select this if your module uses any of + the functions from . config ARCH_HAS_CRC64 bool @@ -205,19 +198,21 @@ config CRC64_ARCH config CRC4 tristate + help + The CRC4 library functions. Select this if your module uses any of + the functions from . config CRC7 tristate - -config LIBCRC32C - tristate - select CRC32 help - This option just selects CRC32 and is provided for compatibility - purposes until the users are updated to select CRC32 directly. + The CRC7 library functions. Select this if your module uses any of + the functions from . config CRC8 tristate + help + The CRC8 library functions. Select this if your module uses any of + the functions from . config CRC_OPTIMIZATIONS bool "Enable optimized CRC implementations" if EXPERT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9fe4d8dfe57829..f9051ab610d543 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3290,7 +3290,7 @@ config GCD_KUNIT_TEST config PRIME_NUMBERS_KUNIT_TEST tristate "Prime number generator test" if !KUNIT_ALL_TESTS depends on KUNIT - select PRIME_NUMBERS + depends on PRIME_NUMBERS default KUNIT_ALL_TESTS help This option enables the KUnit test suite for the {is,next}_prime_number diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 4216b3a4ff218f..96cd896684676d 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -118,7 +118,8 @@ config UBSAN_UNREACHABLE config UBSAN_INTEGER_WRAP bool "Perform checking for integer arithmetic wrap-around" - default UBSAN + # This is very experimental so drop the next line if you really want it + depends on BROKEN depends on !COMPILE_TEST depends on $(cc-option,-fsanitize-undefined-ignore-overflow-pattern=all) depends on $(cc-option,-fsanitize=signed-integer-overflow) diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 1d893e31361493..c7f602fa7b23fc 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -350,18 +350,28 @@ static bool needs_section_mem(struct module *mod, unsigned long size) return size >= sizeof(struct alloc_tag); } -static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to) +static bool clean_unused_counters(struct alloc_tag *start_tag, + struct alloc_tag *end_tag) { - while (from <= to) { + struct alloc_tag *tag; + bool ret = true; + + for (tag = start_tag; tag <= end_tag; tag++) { struct alloc_tag_counters counter; - counter = alloc_tag_read(from); - if (counter.bytes) - return from; - from++; + if (!tag->counters) + continue; + + counter = alloc_tag_read(tag); + if (!counter.bytes) { + free_percpu(tag->counters); + tag->counters = NULL; + } else { + ret = false; + } } - return NULL; + return ret; } /* Called with mod_area_mt locked */ @@ -371,12 +381,16 @@ static void clean_unused_module_areas_locked(void) struct module *val; mas_for_each(&mas, val, module_tags.size) { + struct alloc_tag *start_tag; + struct alloc_tag *end_tag; + if (val != &unloaded_mod) continue; /* Release area if all tags are unused */ - if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), - (struct alloc_tag *)(module_tags.start_addr + mas.last))) + start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); + end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); + if (clean_unused_counters(start_tag, end_tag)) mas_erase(&mas); } } @@ -422,11 +436,20 @@ static int vm_module_tags_populate(void) unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN); unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN); unsigned long more_pages; - unsigned long nr; + unsigned long nr = 0; more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; - nr = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, - NUMA_NO_NODE, more_pages, next_page); + while (nr < more_pages) { + unsigned long allocated; + + allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, + NUMA_NO_NODE, more_pages - nr, next_page + nr); + + if (!allocated) + break; + nr += allocated; + } + if (nr < more_pages || vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, next_page, PAGE_SHIFT) < 0) { @@ -552,7 +575,8 @@ static void *reserve_module_tags(struct module *mod, unsigned long size, static void release_module_tags(struct module *mod, bool used) { MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); - struct alloc_tag *tag; + struct alloc_tag *start_tag; + struct alloc_tag *end_tag; struct module *val; mas_lock(&mas); @@ -566,15 +590,22 @@ static void release_module_tags(struct module *mod, bool used) if (!used) goto release_area; - /* Find out if the area is used */ - tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), - (struct alloc_tag *)(module_tags.start_addr + mas.last)); - if (tag) { - struct alloc_tag_counters counter = alloc_tag_read(tag); + start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); + end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); + if (!clean_unused_counters(start_tag, end_tag)) { + struct alloc_tag *tag; + + for (tag = start_tag; tag <= end_tag; tag++) { + struct alloc_tag_counters counter; + + if (!tag->counters) + continue; - pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", - tag->ct.filename, tag->ct.lineno, tag->ct.modname, - tag->ct.function, counter.bytes); + counter = alloc_tag_read(tag); + pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", + tag->ct.filename, tag->ct.lineno, tag->ct.modname, + tag->ct.function, counter.bytes); + } } else { used = false; } @@ -587,6 +618,34 @@ static void release_module_tags(struct module *mod, bool used) mas_unlock(&mas); } +static void load_module(struct module *mod, struct codetag *start, struct codetag *stop) +{ + /* Allocate module alloc_tag percpu counters */ + struct alloc_tag *start_tag; + struct alloc_tag *stop_tag; + struct alloc_tag *tag; + + if (!mod) + return; + + start_tag = ct_to_alloc_tag(start); + stop_tag = ct_to_alloc_tag(stop); + for (tag = start_tag; tag < stop_tag; tag++) { + WARN_ON(tag->counters); + tag->counters = alloc_percpu(struct alloc_tag_counters); + if (!tag->counters) { + while (--tag >= start_tag) { + free_percpu(tag->counters); + tag->counters = NULL; + } + shutdown_mem_profiling(true); + pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n", + mod->name); + break; + } + } +} + static void replace_module(struct module *mod, struct module *new_mod) { MA_STATE(mas, &mod_area_mt, 0, module_tags.size); @@ -748,6 +807,7 @@ static int __init alloc_tag_init(void) .needs_section_mem = needs_section_mem, .alloc_section_mem = reserve_module_tags, .free_section_mem = release_module_tags, + .module_load = load_module, .module_replaced = replace_module, #endif }; diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 13da529e2e7247..5738ae286b41ed 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -518,4 +518,5 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder, } EXPORT_SYMBOL_GPL(asn1_ber_decoder); +MODULE_DESCRIPTION("Decoder for ASN.1 BER/DER/CER encoded bytestream"); MODULE_LICENSE("GPL"); diff --git a/lib/codetag.c b/lib/codetag.c index 42aadd6c145499..de332e98d6f5b5 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -194,7 +194,7 @@ static int codetag_module_init(struct codetag_type *cttype, struct module *mod) if (err >= 0) { cttype->count += range_size(cttype, &range); if (cttype->desc.module_load) - cttype->desc.module_load(cttype, cmod); + cttype->desc.module_load(mod, range.start, range.stop); } up_write(&cttype->mod_lock); @@ -333,7 +333,8 @@ void codetag_unload_module(struct module *mod) } if (found) { if (cttype->desc.module_unload) - cttype->desc.module_unload(cttype, cmod); + cttype->desc.module_unload(cmod->mod, + cmod->range.start, cmod->range.stop); cttype->count -= range_size(cttype, &cmod->range); idr_remove(&cttype->mod_idr, mod_id); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8c7fdb7d8c8fa3..9ce83ab71bacd8 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -820,7 +820,7 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, size_t size = i->count; do { - size_t len = bvec->bv_len; + size_t len = bvec->bv_len - skip; if (len > size) len = size; @@ -1191,7 +1191,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -ENOMEM; p = *pages; for (int k = 0; k < n; k++) { - struct folio *folio = page_folio(page); + struct folio *folio = page_folio(page + k); p[k] = page + k; if (!folio_test_slab(folio)) folio_get(folio); diff --git a/lib/kunit/static_stub.c b/lib/kunit/static_stub.c index 92b2cccd5e7633..484fd85251b415 100644 --- a/lib/kunit/static_stub.c +++ b/lib/kunit/static_stub.c @@ -96,7 +96,7 @@ void __kunit_activate_static_stub(struct kunit *test, /* If the replacement address is NULL, deactivate the stub. */ if (!replacement_addr) { - kunit_deactivate_static_stub(test, replacement_addr); + kunit_deactivate_static_stub(test, real_fn_addr); return; } diff --git a/lib/string.c b/lib/string.c index eb4486ed40d259..b632c71df1a506 100644 --- a/lib/string.c +++ b/lib/string.c @@ -119,6 +119,7 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) if (count == 0 || WARN_ON_ONCE(count > INT_MAX)) return -E2BIG; +#ifndef CONFIG_DCACHE_WORD_ACCESS #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS /* * If src is unaligned, don't cross a page boundary, @@ -133,12 +134,14 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) /* If src or dest is unaligned, don't do word-at-a-time. */ if (((long) dest | (long) src) & (sizeof(long) - 1)) max = 0; +#endif #endif /* - * read_word_at_a_time() below may read uninitialized bytes after the - * trailing zero and use them in comparisons. Disable this optimization - * under KMSAN to prevent false positive reports. + * load_unaligned_zeropad() or read_word_at_a_time() below may read + * uninitialized bytes after the trailing zero and use them in + * comparisons. Disable this optimization under KMSAN to prevent + * false positive reports. */ if (IS_ENABLED(CONFIG_KMSAN)) max = 0; @@ -146,7 +149,11 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) while (max >= sizeof(unsigned long)) { unsigned long c, data; +#ifdef CONFIG_DCACHE_WORD_ACCESS + c = load_unaligned_zeropad(src+res); +#else c = read_word_at_a_time(src+res); +#endif if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c index 8772e5edaa4fa7..a4b6f52b9c5780 100644 --- a/lib/test_ubsan.c +++ b/lib/test_ubsan.c @@ -77,18 +77,22 @@ static void test_ubsan_shift_out_of_bounds(void) static void test_ubsan_out_of_bounds(void) { - volatile int i = 4, j = 5, k = -1; - volatile char above[4] = { }; /* Protect surrounding memory. */ - volatile int arr[4]; - volatile char below[4] = { }; /* Protect surrounding memory. */ + int i = 4, j = 4, k = -1; + volatile struct { + char above[4]; /* Protect surrounding memory. */ + int arr[4]; + char below[4]; /* Protect surrounding memory. */ + } data; - above[0] = below[0]; + OPTIMIZER_HIDE_VAR(i); + OPTIMIZER_HIDE_VAR(j); + OPTIMIZER_HIDE_VAR(k); UBSAN_TEST(CONFIG_UBSAN_BOUNDS, "above"); - arr[j] = i; + data.arr[j] = i; UBSAN_TEST(CONFIG_UBSAN_BOUNDS, "below"); - arr[k] = i; + data.arr[k] = i; } enum ubsan_test_enum { diff --git a/lib/tests/slub_kunit.c b/lib/tests/slub_kunit.c index d47c472b05201b..848b682a2d70ed 100644 --- a/lib/tests/slub_kunit.c +++ b/lib/tests/slub_kunit.c @@ -325,4 +325,5 @@ static struct kunit_suite test_suite = { }; kunit_test_suite(test_suite); +MODULE_DESCRIPTION("Kunit tests for slub allocator"); MODULE_LICENSE("GPL"); diff --git a/lib/tests/usercopy_kunit.c b/lib/tests/usercopy_kunit.c index 77fa00a13df775..80f8abe10968c1 100644 --- a/lib/tests/usercopy_kunit.c +++ b/lib/tests/usercopy_kunit.c @@ -27,6 +27,7 @@ !defined(CONFIG_MICROBLAZE) && \ !defined(CONFIG_NIOS2) && \ !defined(CONFIG_PPC32) && \ + !defined(CONFIG_SPARC32) && \ !defined(CONFIG_SUPERH)) # define TEST_U64 #endif diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 9308bcfb2ad50c..dfb4f2358cabfe 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -165,4 +165,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) } EXPORT_SYMBOL(ucs2_as_utf8); +MODULE_DESCRIPTION("UCS2 string handling"); MODULE_LICENSE("GPL v2"); diff --git a/lib/zlib_inflate/inflate_syms.c b/lib/zlib_inflate/inflate_syms.c index 9720114c067210..b8996d90e8bcd8 100644 --- a/lib/zlib_inflate/inflate_syms.c +++ b/lib/zlib_inflate/inflate_syms.c @@ -18,4 +18,5 @@ EXPORT_SYMBOL(zlib_inflateEnd); EXPORT_SYMBOL(zlib_inflateReset); EXPORT_SYMBOL(zlib_inflateIncomp); EXPORT_SYMBOL(zlib_inflate_blob); +MODULE_DESCRIPTION("Data decompression using the deflation algorithm"); MODULE_LICENSE("GPL"); diff --git a/mm/Kconfig b/mm/Kconfig index e113f713b49387..825b5932debcd1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -462,6 +462,69 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP config ARCH_WANT_HUGETLB_VMEMMAP_PREINIT bool +config ANON_MIN_RATIO + int "Default value for vm.anon_min_ratio" + depends on SYSCTL + range 0 100 + default 1 + help + This option sets the default value for vm.anon_min_ratio sysctl knob. + + The vm.anon_min_ratio sysctl knob provides *hard* protection of + anonymous pages. The anonymous pages on the current node won't be + reclaimed under any conditions when their amount is below + vm.anon_min_ratio. This knob may be used to prevent excessive swap + thrashing when anonymous memory is low (for example, when memory is + going to be overfilled by compressed data of zram module). + + Setting this value too high (close to MemTotal) can result in + inability to swap and can lead to early OOM under memory pressure. + +config CLEAN_LOW_RATIO + int "Default value for vm.clean_low_ratio" + depends on SYSCTL + range 0 100 + default 15 + help + This option sets the default value for vm.clean_low_ratio sysctl knob. + + The vm.clean_low_ratio sysctl knob provides *best-effort* + protection of clean file pages. The file pages on the current node + won't be reclaimed under memory pressure when the amount of clean file + pages is below vm.clean_low_ratio *unless* we threaten to OOM. + Protection of clean file pages using this knob may be used when + swapping is still possible to + - prevent disk I/O thrashing under memory pressure; + - improve performance in disk cache-bound tasks under memory + pressure. + + Setting it to a high value may result in a early eviction of anonymous + pages into the swap space by attempting to hold the protected amount + of clean file pages in memory. + +config CLEAN_MIN_RATIO + int "Default value for vm.clean_min_ratio" + depends on SYSCTL + range 0 100 + default 4 + help + This option sets the default value for vm.clean_min_ratio sysctl knob. + + The vm.clean_min_ratio sysctl knob provides *hard* protection of + clean file pages. The file pages on the current node won't be + reclaimed under memory pressure when the amount of clean file pages is + below vm.clean_min_ratio. Hard protection of clean file pages using + this knob may be used to + - prevent disk I/O thrashing under memory pressure even with no free + swap space; + - improve performance in disk cache-bound tasks under memory + pressure; + - avoid high latency and prevent livelock in near-OOM conditions. + + Setting it to a high value may result in a early out-of-memory condition + due to the inability to reclaim the protected amount of clean file pages + when other types of pages cannot be reclaimed. + config HAVE_MEMBLOCK_PHYS_MAP bool @@ -654,7 +717,7 @@ config COMPACTION config COMPACT_UNEVICTABLE_DEFAULT int depends on COMPACTION - default 0 if PREEMPT_RT + default 0 if PREEMPT_RT || CACHY default 1 # diff --git a/mm/cma.c b/mm/cma.c index b06d5fe73399f2..c04be488b09929 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -35,7 +35,7 @@ struct cma cma_areas[MAX_CMA_AREAS]; unsigned int cma_area_count; -static int __init __cma_declare_contiguous_nid(phys_addr_t base, +static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, @@ -370,7 +370,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, phys_addr_t align, unsigned int order_per_bit, const char *name, struct cma **res_cma, int nid) { - phys_addr_t start, end; + phys_addr_t start = 0, end; phys_addr_t size, sizesum, sizeleft; struct cma_init_memrange *mrp, *mlp, *failed; struct cma_memrange *cmrp; @@ -384,7 +384,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, /* * First, try it the normal way, producing just one range. */ - ret = __cma_declare_contiguous_nid(0, total_size, 0, align, + ret = __cma_declare_contiguous_nid(&start, total_size, 0, align, order_per_bit, false, name, res_cma, nid); if (ret != -ENOMEM) goto out; @@ -580,7 +580,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, { int ret; - ret = __cma_declare_contiguous_nid(base, size, limit, alignment, + ret = __cma_declare_contiguous_nid(&base, size, limit, alignment, order_per_bit, fixed, name, res_cma, nid); if (ret != 0) pr_err("Failed to reserve %ld MiB\n", @@ -592,14 +592,14 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, return ret; } -static int __init __cma_declare_contiguous_nid(phys_addr_t base, +static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); - phys_addr_t highmem_start; + phys_addr_t highmem_start, base = *basep; int ret; /* @@ -608,7 +608,10 @@ static int __init __cma_declare_contiguous_nid(phys_addr_t base, * complain. Find the boundary by adding one to the last valid * address. */ - highmem_start = __pa(high_memory - 1) + 1; + if (IS_ENABLED(CONFIG_HIGHMEM)) + highmem_start = __pa(high_memory - 1) + 1; + else + highmem_start = memblock_end_of_DRAM(); pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", __func__, &size, &base, &limit, &alignment); @@ -722,12 +725,15 @@ static int __init __cma_declare_contiguous_nid(phys_addr_t base, } ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); - if (ret) + if (ret) { memblock_phys_free(base, size); + return ret; + } (*res_cma)->nid = nid; + *basep = base; - return ret; + return 0; } static void cma_debug_show_areas(struct cma *cma) diff --git a/mm/compaction.c b/mm/compaction.c index 139f00c0308a3d..2070b1e5106f92 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -981,13 +981,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } if (PageHuge(page)) { + const unsigned int order = compound_order(page); /* * skip hugetlbfs if we are not compacting for pages * bigger than its order. THPs and other compound pages * are handled below. */ if (!cc->alloc_contig) { - const unsigned int order = compound_order(page); if (order <= MAX_PAGE_ORDER) { low_pfn += (1UL << order) - 1; @@ -1011,8 +1011,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Do not report -EBUSY down the chain */ if (ret == -EBUSY) ret = 0; - low_pfn += compound_nr(page) - 1; - nr_scanned += compound_nr(page) - 1; + low_pfn += (1UL << order) - 1; + nr_scanned += (1UL << order) - 1; goto isolate_fail; } @@ -1923,7 +1923,11 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE * aggressively the kernel should compact memory in the * background. It takes values in the range [0, 100]. */ +#ifdef CONFIG_CACHY +static unsigned int __read_mostly sysctl_compaction_proactiveness; +#else static unsigned int __read_mostly sysctl_compaction_proactiveness = 20; +#endif static int sysctl_extfrag_threshold = 500; static int __read_mostly sysctl_compact_memory; diff --git a/mm/execmem.c b/mm/execmem.c index e6c4f5076ca8d8..6f7a2653b280ed 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -254,6 +254,34 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) return ptr; } +static bool execmem_cache_rox = false; + +void execmem_cache_make_ro(void) +{ + struct maple_tree *free_areas = &execmem_cache.free_areas; + struct maple_tree *busy_areas = &execmem_cache.busy_areas; + MA_STATE(mas_free, free_areas, 0, ULONG_MAX); + MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); + struct mutex *mutex = &execmem_cache.mutex; + void *area; + + execmem_cache_rox = true; + + mutex_lock(mutex); + + mas_for_each(&mas_free, area, ULONG_MAX) { + unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT; + set_memory_ro(mas_free.index, pages); + } + + mas_for_each(&mas_busy, area, ULONG_MAX) { + unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT; + set_memory_ro(mas_busy.index, pages); + } + + mutex_unlock(mutex); +} + static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; @@ -274,9 +302,15 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - err = set_memory_rox((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; + if (execmem_cache_rox) { + err = set_memory_rox((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; + } else { + err = set_memory_x((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; + } err = execmem_cache_add(p, alloc_size); if (err) diff --git a/mm/filemap.c b/mm/filemap.c index b5e784f34d980b..6af6d8f2929ce4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct folio *folio) } EXPORT_SYMBOL(folio_wait_private_2_killable); +static void filemap_end_dropbehind(struct folio *folio) +{ + struct address_space *mapping = folio->mapping; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) + folio_unmap_invalidate(mapping, folio, 0); +} + /* * If folio was marked as dropbehind, then pages should be dropped when writeback * completes. Do that now. If we fail, it's likely because of a big folio - @@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(struct folio *folio) * invalidation in that case. */ if (in_task() && folio_trylock(folio)) { - if (folio->mapping) - folio_unmap_invalidate(folio->mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } @@ -2244,6 +2253,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, *start = folio->index + nr; goto out; } + xas_advance(&xas, folio_next_index(folio) - 1); continue; put_folio: folio_put(folio); @@ -2634,8 +2644,7 @@ static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio) return (pos1 >> shift == pos2 >> shift); } -static void filemap_end_dropbehind_read(struct address_space *mapping, - struct folio *folio) +static void filemap_end_dropbehind_read(struct folio *folio) { if (!folio_test_dropbehind(folio)) return; @@ -2643,7 +2652,7 @@ static void filemap_end_dropbehind_read(struct address_space *mapping, return; if (folio_trylock(folio)) { if (folio_test_clear_dropbehind(folio)) - folio_unmap_invalidate(mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } @@ -2764,7 +2773,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - filemap_end_dropbehind_read(mapping, folio); + filemap_end_dropbehind_read(folio); folio_put(folio); } folio_batch_init(&fbatch); diff --git a/mm/gup.c b/mm/gup.c index 92351e2fa876bf..84461d384ae2be 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2207,8 +2207,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) } while (start != end); mmap_read_unlock(mm); - if (size > (unsigned long)uaddr - start) - return size - ((unsigned long)uaddr - start); + if (size > start - (unsigned long)uaddr) + return size - (start - (unsigned long)uaddr); return 0; } EXPORT_SYMBOL(fault_in_safe_writeable); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2a47682d1ab777..48615051a8cf16 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -64,7 +64,11 @@ unsigned long transparent_hugepage_flags __read_mostly = #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE (1<mmap_lock writer semaphore held. - * This function should be only used by move_vma() and operate on + * This function should be only used by mremap and operate on * same sized vma. It should never come here with last ref on the * reservation. */ @@ -2271,7 +2271,7 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, * as surplus_pages, otherwise it might confuse * persistent_huge_pages() momentarily. */ - __prep_account_new_huge_page(h, nid); + __prep_account_new_huge_page(h, folio_nid(folio)); /* * We could have raced with the pool size change. @@ -2949,12 +2949,20 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn) while (start_pfn < end_pfn) { folio = pfn_folio(start_pfn); + + /* + * The folio might have been dissolved from under our feet, so make sure + * to carefully check the state under the lock. + */ + spin_lock_irq(&hugetlb_lock); if (folio_test_hugetlb(folio)) { h = folio_hstate(folio); } else { + spin_unlock_irq(&hugetlb_lock); start_pfn++; continue; } + spin_unlock_irq(&hugetlb_lock); if (!folio_ref_count(folio)) { ret = alloc_and_dissolve_hugetlb_folio(h, folio, @@ -3010,7 +3018,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, struct hugepage_subpool *spool = subpool_vma(vma); struct hstate *h = hstate_vma(vma); struct folio *folio; - long retval, gbl_chg; + long retval, gbl_chg, gbl_reserve; map_chg_state map_chg; int ret, idx; struct hugetlb_cgroup *h_cg = NULL; @@ -3163,8 +3171,16 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h), h_cg); out_subpool_put: - if (map_chg) - hugepage_subpool_put_pages(spool, 1); + /* + * put page to subpool iff the quota of subpool's rsv_hpages is used + * during hugepage_subpool_get_pages. + */ + if (map_chg && !gbl_chg) { + gbl_reserve = hugepage_subpool_put_pages(spool, 1); + hugetlb_acct_memory(h, -gbl_reserve); + } + + out_end_reservation: if (map_chg != MAP_CHG_ENFORCED) vma_end_reservation(h, vma, addr); @@ -3825,6 +3841,7 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed, static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, nodemask_t *nodes_allowed) { + unsigned long persistent_free_count; unsigned long min_count; unsigned long allocated; struct folio *folio; @@ -3959,8 +3976,24 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, * though, we'll note that we're not allowed to exceed surplus * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. + * + * min_count is the expected number of persistent pages, we + * shouldn't calculate min_count by using + * resv_huge_pages + persistent_huge_pages() - free_huge_pages, + * because there may exist free surplus huge pages, and this will + * lead to subtracting twice. Free surplus huge pages come from HVO + * failing to restore vmemmap, see comments in the callers of + * hugetlb_vmemmap_restore_folio(). Thus, we should calculate + * persistent free count first. */ - min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; + persistent_free_count = h->free_huge_pages; + if (h->free_huge_pages > persistent_huge_pages(h)) { + if (h->free_huge_pages > h->surplus_huge_pages) + persistent_free_count -= h->surplus_huge_pages; + else + persistent_free_count = 0; + } + min_count = h->resv_huge_pages + persistent_huge_pages(h) - persistent_free_count; min_count = max(count, min_count); try_to_free_low(h, min_count, nodes_allowed); @@ -4017,10 +4050,13 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, list_for_each_entry_safe(folio, next, src_list, lru) { int i; + bool cma; if (folio_test_hugetlb_vmemmap_optimized(folio)) continue; + cma = folio_test_hugetlb_cma(folio); + list_del(&folio->lru); split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst)); @@ -4036,6 +4072,9 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, new_folio->mapping = NULL; init_new_hugetlb_folio(dst, new_folio); + /* Copy the CMA flag so that it is freed correctly */ + if (cma) + folio_set_hugetlb_cma(new_folio); list_add(&new_folio->lru, &dst_list); } } @@ -4630,7 +4669,7 @@ static void __init hugetlb_sysfs_init(void) err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, hstate_kobjs, &hstate_attr_group); if (err) - pr_err("HugeTLB: Unable to add hstate %s", h->name); + pr_err("HugeTLB: Unable to add hstate %s\n", h->name); } #ifdef CONFIG_NUMA @@ -7216,7 +7255,7 @@ bool hugetlb_reserve_pages(struct inode *inode, struct vm_area_struct *vma, vm_flags_t vm_flags) { - long chg = -1, add = -1; + long chg = -1, add = -1, spool_resv, gbl_resv; struct hstate *h = hstate_inode(inode); struct hugepage_subpool *spool = subpool_inode(inode); struct resv_map *resv_map; @@ -7351,8 +7390,16 @@ bool hugetlb_reserve_pages(struct inode *inode, return true; out_put_pages: - /* put back original number of pages, chg */ - (void)hugepage_subpool_put_pages(spool, chg); + spool_resv = chg - gbl_reserve; + if (spool_resv) { + /* put sub pool's reservation back, chg - gbl_reserve */ + gbl_resv = hugepage_subpool_put_pages(spool, spool_resv); + /* + * subpool's reserved pages can not be put back due to race, + * return to hstate. + */ + hugetlb_acct_memory(h, -gbl_resv); + } out_uncharge_cgroup: hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), chg * pages_per_huge_page(h), h_cg); @@ -7892,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), ALIGN_DOWN(vma->vm_end, PUD_SIZE)); } + +/* + * For hugetlb, mremap() is an odd edge case - while the VMA copying is + * performed, we permit both the old and new VMAs to reference the same + * reservation. + * + * We fix this up after the operation succeeds, or if a newly allocated VMA + * is closed as a result of a failure to allocate memory. + */ +void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + if (is_vm_hugetlb_page(vma)) + clear_vma_resv_huge_pages(vma); +} diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 9a99dfa3c49583..27245e86df2500 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -238,11 +238,11 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr, * struct page, the special metadata (e.g. page->flags or page->mapping) * cannot copy to the tail struct page structs. The invalid value will be * checked in the free_tail_page_prepare(). In order to avoid the message - * of "corrupted mapping in tail page". We need to reset at least 3 (one - * head struct page struct and two tail struct page structs) struct page + * of "corrupted mapping in tail page". We need to reset at least 4 (one + * head struct page struct and three tail struct page structs) struct page * structs. */ -#define NR_RESET_STRUCT_PAGE 3 +#define NR_RESET_STRUCT_PAGE 4 static inline void reset_struct_pages(struct page *start) { diff --git a/mm/internal.h b/mm/internal.h index 50c2f590b2d04b..5c7a2b43ad7620 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -248,11 +248,9 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags, bool *any_writable, bool *any_young, bool *any_dirty) { - unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio); - const pte_t *end_ptep = start_ptep + max_nr; pte_t expected_pte, *ptep; bool writable, young, dirty; - int nr; + int nr, cur_nr; if (any_writable) *any_writable = false; @@ -265,11 +263,15 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio); VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio); + /* Limit max_nr to the actual remaining PFNs in the folio we could batch. */ + max_nr = min_t(unsigned long, max_nr, + folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte)); + nr = pte_batch_hint(start_ptep, pte); expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags); ptep = start_ptep + nr; - while (ptep < end_ptep) { + while (nr < max_nr) { pte = ptep_get(ptep); if (any_writable) writable = !!pte_write(pte); @@ -282,14 +284,6 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, if (!pte_same(pte, expected_pte)) break; - /* - * Stop immediately once we reached the end of the folio. In - * corner cases the next PFN might fall into a different - * folio. - */ - if (pte_pfn(pte) >= folio_end_pfn) - break; - if (any_writable) *any_writable |= writable; if (any_young) @@ -297,12 +291,13 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr, if (any_dirty) *any_dirty |= dirty; - nr = pte_batch_hint(ptep, pte); - expected_pte = pte_advance_pfn(expected_pte, nr); - ptep += nr; + cur_nr = pte_batch_hint(ptep, pte); + expected_pte = pte_advance_pfn(expected_pte, cur_nr); + ptep += cur_nr; + nr += cur_nr; } - return min(ptep - start_ptep, max_nr); + return min(nr, max_nr); } /** diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 3ea317837c2d37..5f922dd38ffa13 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -1567,6 +1567,7 @@ static void kasan_memcmp(struct kunit *test) static void kasan_strings(struct kunit *test) { char *ptr; + char *src; size_t size = 24; /* @@ -1578,6 +1579,25 @@ static void kasan_strings(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + src = kmalloc(KASAN_GRANULE_SIZE, GFP_KERNEL | __GFP_ZERO); + strscpy(src, "f0cacc1a0000000", KASAN_GRANULE_SIZE); + + /* + * Make sure that strscpy() does not trigger KASAN if it overreads into + * poisoned memory. + * + * The expected size does not include the terminator '\0' + * so it is (KASAN_GRANULE_SIZE - 2) == + * KASAN_GRANULE_SIZE - ("initial removed character" + "\0"). + */ + KUNIT_EXPECT_EQ(test, KASAN_GRANULE_SIZE - 2, + strscpy(ptr, src + 1, KASAN_GRANULE_SIZE)); + + /* strscpy should fail if the first byte is unreadable. */ + KUNIT_EXPECT_KASAN_FAIL(test, strscpy(ptr, src + KASAN_GRANULE_SIZE, + KASAN_GRANULE_SIZE)); + + kfree(src); kfree(ptr); /* @@ -2127,4 +2147,5 @@ static struct kunit_suite kasan_kunit_test_suite = { kunit_test_suite(kasan_kunit_test_suite); +MODULE_DESCRIPTION("KUnit tests for checking KASAN bug-detection capabilities"); MODULE_LICENSE("GPL"); diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 88d1c9dcb50721..d2c70cd2afb1de 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -292,33 +292,99 @@ void __init __weak kasan_populate_early_vm_area_shadow(void *start, { } +struct vmalloc_populate_data { + unsigned long start; + struct page **pages; +}; + static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, - void *unused) + void *_data) { - unsigned long page; + struct vmalloc_populate_data *data = _data; + struct page *page; pte_t pte; + int index; if (likely(!pte_none(ptep_get(ptep)))) return 0; - page = __get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - __memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); - pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); + index = PFN_DOWN(addr - data->start); + page = data->pages[index]; + __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE); + pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); spin_lock(&init_mm.page_table_lock); if (likely(pte_none(ptep_get(ptep)))) { set_pte_at(&init_mm, addr, ptep, pte); - page = 0; + data->pages[index] = NULL; } spin_unlock(&init_mm.page_table_lock); - if (page) - free_page(page); + + return 0; +} + +static void ___free_pages_bulk(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) { + if (pages[i]) { + __free_pages(pages[i], 0); + pages[i] = NULL; + } + } +} + +static int ___alloc_pages_bulk(struct page **pages, int nr_pages) +{ + unsigned long nr_populated, nr_total = nr_pages; + struct page **page_array = pages; + + while (nr_pages) { + nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); + if (!nr_populated) { + ___free_pages_bulk(page_array, nr_total - nr_pages); + return -ENOMEM; + } + pages += nr_populated; + nr_pages -= nr_populated; + } + return 0; } +static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) +{ + unsigned long nr_pages, nr_total = PFN_UP(end - start); + struct vmalloc_populate_data data; + int ret = 0; + + data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!data.pages) + return -ENOMEM; + + while (nr_total) { + nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); + ret = ___alloc_pages_bulk(data.pages, nr_pages); + if (ret) + break; + + data.start = start; + ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, + kasan_populate_vmalloc_pte, &data); + ___free_pages_bulk(data.pages, nr_pages); + if (ret) + break; + + start += nr_pages * PAGE_SIZE; + nr_total -= nr_pages; + } + + free_page((unsigned long)data.pages); + + return ret; +} + int kasan_populate_vmalloc(unsigned long addr, unsigned long size) { unsigned long shadow_start, shadow_end; @@ -348,9 +414,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) shadow_start = PAGE_ALIGN_DOWN(shadow_start); shadow_end = PAGE_ALIGN(shadow_end); - ret = apply_to_page_range(&init_mm, shadow_start, - shadow_end - shadow_start, - kasan_populate_vmalloc_pte, NULL); + ret = __kasan_populate_vmalloc(shadow_start, shadow_end); if (ret) return ret; diff --git a/mm/memblock.c b/mm/memblock.c index 0a53db4d9f7beb..0e9ebb8aa7fe54 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -457,7 +457,14 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, min(new_area_start, memblock.current_limit), new_alloc_size, PAGE_SIZE); - new_array = addr ? __va(addr) : NULL; + if (addr) { + /* The memory may not have been accepted, yet. */ + accept_memory(addr, new_alloc_size); + + new_array = __va(addr); + } else { + new_array = NULL; + } } if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", @@ -2183,11 +2190,14 @@ static void __init memmap_init_reserved_pages(void) struct memblock_region *region; phys_addr_t start, end; int nid; + unsigned long max_reserved; /* * set nid on all reserved pages and also treat struct * pages for the NOMAP regions as PageReserved */ +repeat: + max_reserved = memblock.reserved.max; for_each_mem_region(region) { nid = memblock_get_region_node(region); start = region->base; @@ -2196,8 +2206,15 @@ static void __init memmap_init_reserved_pages(void) if (memblock_is_nomap(region)) reserve_bootmem_region(start, end, nid); - memblock_set_node(start, end, &memblock.reserved, nid); + memblock_set_node(start, region->size, &memblock.reserved, nid); } + /* + * 'max' is changed means memblock.reserved has been doubled its + * array, which may result a new reserved region before current + * 'start'. Now we should repeat the procedure to set its node id. + */ + if (max_reserved != memblock.reserved.max) + goto repeat; /* * initialize struct pages for reserved regions that don't have diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 8660908850dc81..4a9cf27a70af05 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -620,7 +620,7 @@ void memcg1_swapout(struct folio *folio, swp_entry_t entry) mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); + swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); folio_unqueue_deferred_split(folio); folio->memcg_data = 0; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 421740f1bcdc66..2d4d65f25fecd4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1168,7 +1168,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg, { struct mem_cgroup *iter; int ret = 0; - int i = 0; BUG_ON(mem_cgroup_is_root(memcg)); @@ -1178,10 +1177,9 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg, css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); while (!ret && (task = css_task_iter_next(&it))) { - /* Avoid potential softlockup warning */ - if ((++i & 1023) == 0) - cond_resched(); ret = fn(task, arg); + /* Avoid potential softlockup warning */ + cond_resched(); } css_task_iter_end(&it); if (ret) { @@ -1759,7 +1757,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } struct memcg_stock_pcp { - localtry_lock_t stock_lock; + local_trylock_t stock_lock; struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; @@ -1774,7 +1772,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = { - .stock_lock = INIT_LOCALTRY_LOCK(stock_lock), + .stock_lock = INIT_LOCAL_TRYLOCK(stock_lock), }; static DEFINE_MUTEX(percpu_charge_mutex); @@ -1805,11 +1803,10 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, if (nr_pages > MEMCG_CHARGE_BATCH) return ret; - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { - if (!gfpflags_allow_spinning(gfp_mask)) - return ret; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); - } + if (gfpflags_allow_spinning(gfp_mask)) + local_lock_irqsave(&memcg_stock.stock_lock, flags); + else if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) + return ret; stock = this_cpu_ptr(&memcg_stock); stock_pages = READ_ONCE(stock->nr_pages); @@ -1818,7 +1815,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, ret = true; } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -1857,14 +1854,14 @@ static void drain_local_stock(struct work_struct *dummy) * drain_stock races is that we always operate on local CPU stock * here with IRQ disabled */ - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); old = drain_obj_stock(stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -1894,7 +1891,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { unsigned long flags; - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) { /* * In case of unlikely failure to lock percpu stock_lock * uncharge memcg directly. @@ -1907,7 +1904,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) return; } __refill_stock(memcg, nr_pages); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); } /* @@ -1964,9 +1961,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) stock = &per_cpu(memcg_stock, cpu); /* drain_obj_stock requires stock_lock */ - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); old = drain_obj_stock(stock); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); drain_stock(stock); obj_cgroup_put(old); @@ -2787,7 +2784,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, unsigned long flags; int *bytes; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); /* @@ -2836,7 +2833,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, if (nr) __mod_objcg_mlstate(objcg, pgdat, idx, nr); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -2846,7 +2843,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) unsigned long flags; bool ret = false; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { @@ -2854,7 +2851,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) ret = true; } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -2946,7 +2943,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, unsigned long flags; unsigned int nr_pages = 0; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ @@ -2960,7 +2957,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, stock->nr_bytes &= (PAGE_SIZE - 1); } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); if (nr_pages) diff --git a/mm/memory.c b/mm/memory.c index 2d8c265fc7d607..49199410805cd0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1361,7 +1361,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; struct mmu_notifier_range range; - unsigned long next, pfn; + unsigned long next, pfn = 0; bool is_cow; int ret; @@ -2938,11 +2938,11 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, if (fn) { do { if (create || !pte_none(ptep_get(pte))) { - err = fn(pte++, addr, data); + err = fn(pte, addr, data); if (err) break; } - } while (addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, addr != end); } *mask |= PGTBL_PTE_MODIFIED; @@ -3734,8 +3734,6 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio, return false; VM_WARN_ON_ONCE(folio_test_ksm(folio)); - VM_WARN_ON_ONCE(folio_mapcount(folio) > folio_nr_pages(folio)); - VM_WARN_ON_ONCE(folio_entire_mapcount(folio)); if (unlikely(folio_test_swapcache(folio))) { /* @@ -3753,13 +3751,15 @@ static bool __wp_can_reuse_large_anon_folio(struct folio *folio, /* Stabilize the mapcount vs. refcount and recheck. */ folio_lock_large_mapcount(folio); - VM_WARN_ON_ONCE(folio_large_mapcount(folio) < folio_ref_count(folio)); + VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_ref_count(folio), folio); if (folio_test_large_maybe_mapped_shared(folio)) goto unlock; if (folio_large_mapcount(folio) != folio_ref_count(folio)) goto unlock; + VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_nr_pages(folio), folio); + VM_WARN_ON_ONCE_FOLIO(folio_entire_mapcount(folio), folio); VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != vma->vm_mm->mm_id && folio_mm_id(folio, 1) != vma->vm_mm->mm_id); diff --git a/mm/migrate.c b/mm/migrate.c index f3ee6d8d5e2eaa..676d9cfc7059ca 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -845,9 +845,11 @@ static int __buffer_migrate_folio(struct address_space *mapping, return -EAGAIN; if (check_refs) { - bool busy; + bool busy, migrating; bool invalidated = false; + migrating = test_and_set_bit_lock(BH_Migrate, &head->b_state); + VM_WARN_ON_ONCE(migrating); recheck_buffers: busy = false; spin_lock(&mapping->i_private_lock); @@ -859,12 +861,12 @@ static int __buffer_migrate_folio(struct address_space *mapping, } bh = bh->b_this_page; } while (bh != head); + spin_unlock(&mapping->i_private_lock); if (busy) { if (invalidated) { rc = -EAGAIN; goto unlock_buffers; } - spin_unlock(&mapping->i_private_lock); invalidate_bh_lrus(); invalidated = true; goto recheck_buffers; @@ -883,7 +885,7 @@ static int __buffer_migrate_folio(struct address_space *mapping, unlock_buffers: if (check_refs) - spin_unlock(&mapping->i_private_lock); + clear_bit_unlock(BH_Migrate, &head->b_state); bh = head; do { unlock_buffer(bh); diff --git a/mm/mm_init.c b/mm/mm_init.c index 84f14fa12d0ddb..8201039f1c1ccf 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1785,7 +1785,7 @@ static bool arch_has_descending_max_zone_pfns(void) return IS_ENABLED(CONFIG_ARC) && !IS_ENABLED(CONFIG_ARC_HAS_PAE40); } -static void set_high_memory(void) +static void __init set_high_memory(void) { phys_addr_t highmem = memblock_end_of_DRAM(); @@ -2732,6 +2732,7 @@ static void __init mem_init_print_info(void) , K(totalhigh_pages()) #endif ); + printk(KERN_INFO "le9 Unofficial (le9uo) working set protection 1.15a by Masahito Suzuki (forked from hakavlad's original le9 patch)"); } void __init __weak arch_mm_preinit(void) diff --git a/mm/mremap.c b/mm/mremap.c index 7db9da609c84f0..0d4948b720e22e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, mremap_userfaultfd_prep(new_vma, vrm->uf); } - if (is_vm_hugetlb_page(vma)) - clear_vma_resv_huge_pages(vma); + fixup_hugetlb_reservations(vma); /* Tell pfnmap has moved from this vma */ if (unlikely(vma->vm_flags & VM_PFNMAP)) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c81624bc396971..f19314209557b6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -71,7 +71,11 @@ static long ratelimit_pages = 32; /* * Start background writeback (via writeback threads) at this percentage */ +#ifdef CONFIG_CACHY +static int dirty_background_ratio = 5; +#else static int dirty_background_ratio = 10; +#endif /* * dirty_background_bytes starts at 0 (disabled) so that it is a function of @@ -99,7 +103,11 @@ static unsigned long vm_dirty_bytes; /* * The interval between `kupdate'-style writebacks */ +#ifdef CONFIG_CACHY +unsigned int dirty_writeback_interval = 10 * 100; /* centiseconds */ +#else unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */ +#endif EXPORT_SYMBOL_GPL(dirty_writeback_interval); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd6b865cb1abfb..4632022638a5ae 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -274,7 +274,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = { int min_free_kbytes = 1024; int user_min_free_kbytes = -1; +#ifdef CONFIG_CACHY +static int watermark_boost_factor __read_mostly; +#else static int watermark_boost_factor __read_mostly = 15000; +#endif static int watermark_scale_factor = 10; int defrag_mode; @@ -290,7 +294,8 @@ EXPORT_SYMBOL(nr_online_nodes); #endif static bool page_contains_unaccepted(struct page *page, unsigned int order); -static bool cond_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order, + int alloc_flags); static bool __free_unaccepted(struct page *page); int page_group_by_mobility_disabled __read_mostly; @@ -897,9 +902,7 @@ static inline bool page_expected_state(struct page *page, #ifdef CONFIG_MEMCG page->memcg_data | #endif -#ifdef CONFIG_PAGE_POOL - ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) | -#endif + page_pool_page_is_pp(page) | (page->flags & check_flags))) return false; @@ -926,10 +929,8 @@ static const char *page_bad_reason(struct page *page, unsigned long flags) if (unlikely(page->memcg_data)) bad_reason = "page still charged to cgroup"; #endif -#ifdef CONFIG_PAGE_POOL - if (unlikely((page->pp_magic & ~0x3UL) == PP_SIGNATURE)) + if (unlikely(page_pool_page_is_pp(page))) bad_reason = "page_pool leak"; -#endif return bad_reason; } @@ -1151,14 +1152,9 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) __pgalloc_tag_sub(page, nr); } -static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) +/* When tag is not NULL, assuming mem_alloc_profiling_enabled */ +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) { - struct alloc_tag *tag; - - if (!mem_alloc_profiling_enabled()) - return; - - tag = __pgalloc_tag_get(page); if (tag) this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); } @@ -1168,7 +1164,7 @@ static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {} +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} #endif /* CONFIG_MEM_ALLOC_PROFILING */ @@ -1400,11 +1396,12 @@ static void free_one_page(struct zone *zone, struct page *page, struct llist_head *llhead; unsigned long flags; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(fpi_flags & FPI_TRYLOCK)) { + if (unlikely(fpi_flags & FPI_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) { add_page_to_zone_llist(zone, page, order); return; } + } else { spin_lock_irqsave(&zone->lock, flags); } @@ -2182,23 +2179,15 @@ try_to_claim_block(struct zone *zone, struct page *page, } /* - * Try finding a free buddy page on the fallback list. - * - * This will attempt to claim a whole pageblock for the requested type - * to ensure grouping of such requests in the future. - * - * If a whole block cannot be claimed, steal an individual page, regressing to - * __rmqueue_smallest() logic to at least break up as little contiguity as - * possible. + * Try to allocate from some fallback migratetype by claiming the entire block, + * i.e. converting it to the allocation's start migratetype. * * The use of signed ints for order and current_order is a deliberate * deviation from the rest of this file, to make the for loop * condition simpler. - * - * Return the stolen page, or NULL if none can be found. */ static __always_inline struct page * -__rmqueue_fallback(struct zone *zone, int order, int start_migratetype, +__rmqueue_claim(struct zone *zone, int order, int start_migratetype, unsigned int alloc_flags) { struct free_area *area; @@ -2236,14 +2225,29 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, page = try_to_claim_block(zone, page, current_order, order, start_migratetype, fallback_mt, alloc_flags); - if (page) - goto got_one; + if (page) { + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; + } } - if (alloc_flags & ALLOC_NOFRAGMENT) - return NULL; + return NULL; +} + +/* + * Try to steal a single page from some fallback migratetype. Leave the rest of + * the block as its current migratetype, potentially causing fragmentation. + */ +static __always_inline struct page * +__rmqueue_steal(struct zone *zone, int order, int start_migratetype) +{ + struct free_area *area; + int current_order; + struct page *page; + int fallback_mt; + bool claim_block; - /* No luck claiming pageblock. Find the smallest fallback page */ for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, @@ -2253,25 +2257,28 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, page = get_page_from_free_area(area, fallback_mt); page_del_and_expand(zone, page, order, current_order, fallback_mt); - goto got_one; + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; } return NULL; - -got_one: - trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, fallback_mt); - - return page; } +enum rmqueue_mode { + RMQUEUE_NORMAL, + RMQUEUE_CMA, + RMQUEUE_CLAIM, + RMQUEUE_STEAL, +}; + /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static __always_inline struct page * __rmqueue(struct zone *zone, unsigned int order, int migratetype, - unsigned int alloc_flags) + unsigned int alloc_flags, enum rmqueue_mode *mode) { struct page *page; @@ -2290,16 +2297,48 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, } } - page = __rmqueue_smallest(zone, order, migratetype); - if (unlikely(!page)) { - if (alloc_flags & ALLOC_CMA) + /* + * First try the freelists of the requested migratetype, then try + * fallbacks modes with increasing levels of fragmentation risk. + * + * The fallback logic is expensive and rmqueue_bulk() calls in + * a loop with the zone->lock held, meaning the freelists are + * not subject to any outside changes. Remember in *mode where + * we found pay dirt, to save us the search on the next call. + */ + switch (*mode) { + case RMQUEUE_NORMAL: + page = __rmqueue_smallest(zone, order, migratetype); + if (page) + return page; + fallthrough; + case RMQUEUE_CMA: + if (alloc_flags & ALLOC_CMA) { page = __rmqueue_cma_fallback(zone, order); - - if (!page) - page = __rmqueue_fallback(zone, order, migratetype, - alloc_flags); + if (page) { + *mode = RMQUEUE_CMA; + return page; + } + } + fallthrough; + case RMQUEUE_CLAIM: + page = __rmqueue_claim(zone, order, migratetype, alloc_flags); + if (page) { + /* Replenished preferred freelist, back to normal mode. */ + *mode = RMQUEUE_NORMAL; + return page; + } + fallthrough; + case RMQUEUE_STEAL: + if (!(alloc_flags & ALLOC_NOFRAGMENT)) { + page = __rmqueue_steal(zone, order, migratetype); + if (page) { + *mode = RMQUEUE_STEAL; + return page; + } + } } - return page; + return NULL; } /* @@ -2311,17 +2350,19 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; unsigned long flags; int i; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) return 0; + } else { spin_lock_irqsave(&zone->lock, flags); } for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, - alloc_flags); + alloc_flags, &rmqm); if (unlikely(page == NULL)) break; @@ -2937,15 +2978,18 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, do { page = NULL; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) return NULL; + } else { spin_lock_irqsave(&zone->lock, flags); } if (alloc_flags & ALLOC_HIGHATOMIC) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { - page = __rmqueue(zone, order, migratetype, alloc_flags); + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; + + page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm); /* * If the allocation fails, allow OOM handling and @@ -3422,18 +3466,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, return false; } -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx) -{ - long free_pages = zone_page_state(z, NR_FREE_PAGES); - - if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) - free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); - - return __zone_watermark_ok(z, order, mark, highest_zoneidx, 0, - free_pages); -} - #ifdef CONFIG_NUMA int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; @@ -3580,7 +3612,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, } } - cond_accept_memory(zone, order); + cond_accept_memory(zone, order, alloc_flags); /* * Detect whether the number of free pages is below high @@ -3607,7 +3639,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, gfp_mask)) { int ret; - if (cond_accept_memory(zone, order)) + if (cond_accept_memory(zone, order, alloc_flags)) goto try_this_zone; /* @@ -3660,7 +3692,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, return page; } else { - if (cond_accept_memory(zone, order)) + if (cond_accept_memory(zone, order, alloc_flags)) goto try_this_zone; /* Try again if zone has deferred pages */ @@ -4530,6 +4562,14 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, } retry: + /* + * Deal with possible cpuset update races or zonelist updates to avoid + * infinite retries. + */ + if (check_retry_cpuset(cpuset_mems_cookie, ac) || + check_retry_zonelist(zonelist_iter_cookie)) + goto restart; + /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ if (alloc_flags & ALLOC_KSWAPD) wake_all_kswapds(order, gfp_mask, ac); @@ -4813,7 +4853,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, goto failed; } - cond_accept_memory(zone, 0); + cond_accept_memory(zone, 0, alloc_flags); retry_this_zone: mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages; if (zone_watermark_fast(zone, 0, mark, @@ -4822,7 +4862,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, break; } - if (cond_accept_memory(zone, 0)) + if (cond_accept_memory(zone, 0, alloc_flags)) goto retry_this_zone; /* Try again if zone has deferred pages */ @@ -5029,11 +5069,13 @@ static void ___free_pages(struct page *page, unsigned int order, { /* get PageHead before we drop reference */ int head = PageHead(page); + /* get alloc tag in case the page is released by others */ + struct alloc_tag *tag = pgalloc_tag_get(page); if (put_page_testzero(page)) __free_frozen_pages(page, order, fpi_flags); else if (!head) { - pgalloc_tag_sub_pages(page, (1 << order) - 1); + pgalloc_tag_sub_pages(tag, (1 << order) - 1); while (order-- > 0) __free_frozen_pages(page + (1 << order), order, fpi_flags); @@ -7138,9 +7180,6 @@ bool has_managed_dma(void) #ifdef CONFIG_UNACCEPTED_MEMORY -/* Counts number of zones with unaccepted pages. */ -static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages); - static bool lazy_accept = true; static int __init accept_memory_parse(char *p) @@ -7167,11 +7206,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order) static void __accept_page(struct zone *zone, unsigned long *flags, struct page *page) { - bool last; - list_del(&page->lru); - last = list_empty(&zone->unaccepted_pages); - account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); __ClearPageUnaccepted(page); @@ -7180,9 +7215,6 @@ static void __accept_page(struct zone *zone, unsigned long *flags, accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER); __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); - - if (last) - static_branch_dec(&zones_with_unaccepted_pages); } void accept_page(struct page *page) @@ -7219,20 +7251,17 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static inline bool has_unaccepted_memory(void) -{ - return static_branch_unlikely(&zones_with_unaccepted_pages); -} - -static bool cond_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order, + int alloc_flags) { long to_accept, wmark; bool ret = false; - if (!has_unaccepted_memory()) + if (list_empty(&zone->unaccepted_pages)) return false; - if (list_empty(&zone->unaccepted_pages)) + /* Bailout, since try_to_accept_memory_one() needs to take a lock */ + if (alloc_flags & ALLOC_TRYLOCK) return false; wmark = promo_wmark_pages(zone); @@ -7265,22 +7294,17 @@ static bool __free_unaccepted(struct page *page) { struct zone *zone = page_zone(page); unsigned long flags; - bool first = false; if (!lazy_accept) return false; spin_lock_irqsave(&zone->lock, flags); - first = list_empty(&zone->unaccepted_pages); list_add_tail(&page->lru, &zone->unaccepted_pages); account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); __SetPageUnaccepted(page); spin_unlock_irqrestore(&zone->lock, flags); - if (first) - static_branch_inc(&zones_with_unaccepted_pages); - return true; } @@ -7291,7 +7315,8 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order) return false; } -static bool cond_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order, + int alloc_flags) { return false; } @@ -7362,11 +7387,6 @@ struct page *try_alloc_pages_noprof(int nid, unsigned int order) if (!pcp_allowed_order(order)) return NULL; -#ifdef CONFIG_UNACCEPTED_MEMORY - /* Bailout, since try_to_accept_memory_one() needs to take a lock */ - if (has_unaccepted_memory()) - return NULL; -#endif /* Bailout, since _deferred_grow_zone() needs to take a lock */ if (deferred_pages_enabled()) return NULL; diff --git a/mm/slub.c b/mm/slub.c index b46f87662e71d4..be8b09e09d3043 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,6 +1973,11 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ __GFP_ACCOUNT | __GFP_NOFAIL) +static inline void init_slab_obj_exts(struct slab *slab) +{ + slab->obj_exts = 0; +} + int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2023,8 +2028,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, return 0; } -/* Should be called only if mem_alloc_profiling_enabled() */ -static noinline void free_slab_obj_exts(struct slab *slab) +static inline void free_slab_obj_exts(struct slab *slab) { struct slabobj_ext *obj_exts; @@ -2044,20 +2048,12 @@ static noinline void free_slab_obj_exts(struct slab *slab) slab->obj_exts = 0; } -static inline bool need_slab_obj_ext(void) -{ - if (mem_alloc_profiling_enabled()) - return true; +#else /* CONFIG_SLAB_OBJ_EXT */ - /* - * CONFIG_MEMCG creates vector of obj_cgroup objects conditionally - * inside memcg_slab_post_alloc_hook. No other users for now. - */ - return false; +static inline void init_slab_obj_exts(struct slab *slab) +{ } -#else /* CONFIG_SLAB_OBJ_EXT */ - static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2068,11 +2064,6 @@ static inline void free_slab_obj_exts(struct slab *slab) { } -static inline bool need_slab_obj_ext(void) -{ - return false; -} - #endif /* CONFIG_SLAB_OBJ_EXT */ #ifdef CONFIG_MEM_ALLOC_PROFILING @@ -2120,7 +2111,7 @@ __alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) static inline void alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags) { - if (need_slab_obj_ext()) + if (mem_alloc_profiling_enabled()) __alloc_tagging_slab_alloc_hook(s, object, flags); } @@ -2592,8 +2583,12 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online() || need_slab_obj_ext()) - free_slab_obj_exts(slab); + /* + * The slab object extensions should now be freed regardless of + * whether mem_alloc_profiling_enabled() or not because profiling + * might have been disabled after slab->obj_exts got allocated. + */ + free_slab_obj_exts(slab); mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); @@ -2637,6 +2632,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) slab->objects = oo_objects(oo); slab->inuse = 0; slab->frozen = 0; + init_slab_obj_exts(slab); account_slab(slab, oo_order(oo), s, flags); diff --git a/mm/swap.c b/mm/swap.c index 77b2d599787342..443eb4a8b42b8e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -1091,6 +1091,10 @@ static const struct ctl_table swap_sysctl_table[] = { */ void __init swap_setup(void) { +#ifdef CONFIG_CACHY + /* Only swap-in pages requested, avoid readahead */ + page_cluster = 0; +#else unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT); /* Use a smaller cluster for small-memory machines */ @@ -1098,6 +1102,7 @@ void __init swap_setup(void) page_cluster = 2; else page_cluster = 3; +#endif /* CONFIG_CACHY */ /* * Right now other parts of the system means that we * _really_ don't want to cluster much more diff --git a/mm/swapfile.c b/mm/swapfile.c index 2eff8b51a9455f..412ccd6543b34d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1272,13 +1272,22 @@ int folio_alloc_swap(struct folio *folio, gfp_t gfp) VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); - /* - * Should not even be attempting large allocations when huge - * page swap is disabled. Warn and fail the allocation. - */ - if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) { - VM_WARN_ON_ONCE(1); - return -EINVAL; + if (order) { + /* + * Reject large allocation when THP_SWAP is disabled, + * the caller should split the folio and try again. + */ + if (!IS_ENABLED(CONFIG_THP_SWAP)) + return -EAGAIN; + + /* + * Allocation size should never exceed cluster size + * (HPAGE_PMD_SIZE). + */ + if (size > SWAPFILE_CLUSTER) { + VM_WARN_ON_ONCE(1); + return -EINVAL; + } } local_lock(&percpu_swap_cluster.lock); @@ -3322,6 +3331,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } + /* + * The swap subsystem needs a major overhaul to support this. + * It doesn't work yet so just disable it for now. + */ + if (mapping_min_folio_order(mapping) > 0) { + error = -EINVAL; + goto bad_swap_unlock_inode; + } + /* * Read the swap header. */ diff --git a/mm/truncate.c b/mm/truncate.c index 5d98054094d1f3..f2aaf99f29906a 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -191,6 +191,7 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { loff_t pos = folio_pos(folio); + size_t size = folio_size(folio); unsigned int offset, length; struct page *split_at, *split_at2; @@ -198,14 +199,13 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) offset = start - pos; else offset = 0; - length = folio_size(folio); - if (pos + length <= (u64)end) - length = length - offset; + if (pos + size <= (u64)end) + length = size - offset; else length = end + 1 - pos - offset; folio_wait_writeback(folio); - if (length == folio_size(folio)) { + if (length == size) { truncate_inode_folio(folio->mapping, folio); return true; } @@ -224,16 +224,20 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) return true; split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); - split_at2 = folio_page(folio, - PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); - if (!try_folio_split(folio, split_at, NULL)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste * for shmem truncate */ - struct folio *folio2 = page_folio(split_at2); + struct folio *folio2; + + if (offset + length == size) + goto no_split; + + split_at2 = folio_page(folio, + PAGE_ALIGN_DOWN(offset + length) / PAGE_SIZE); + folio2 = page_folio(split_at2); if (!folio_try_get(folio2)) goto no_split; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index fbf2cf62ab9f53..e0db855c89b41a 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1064,8 +1064,13 @@ static int move_present_pte(struct mm_struct *mm, src_folio->index = linear_page_index(dst_vma, dst_addr); orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot); - /* Follow mremap() behavior and treat the entry dirty after the move */ - orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma); + /* Set soft dirty bit so userspace can notice the pte was moved */ +#ifdef CONFIG_MEM_SOFT_DIRTY + orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); +#endif + if (pte_dirty(orig_src_pte)) + orig_dst_pte = pte_mkdirty(orig_dst_pte); + orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma); set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte); out: @@ -1100,6 +1105,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); +#ifdef CONFIG_MEM_SOFT_DIRTY + orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); +#endif set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); @@ -1902,6 +1910,14 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, unsigned long end) { struct vm_area_struct *ret; + bool give_up_on_oom = false; + + /* + * If we are modifying only and not splitting, just give up on the merge + * if OOM prevents us from merging successfully. + */ + if (start == vma->vm_start && end == vma->vm_end) + give_up_on_oom = true; /* Reset ptes for the whole vma range if wr-protected */ if (userfaultfd_wp(vma)) @@ -1909,7 +1925,7 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, ret = vma_modify_flags_uffd(vmi, prev, vma, start, end, vma->vm_flags & ~__VM_UFFD_FLAGS, - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, give_up_on_oom); /* * In the vma_merge() successful mprotect-like case 8: @@ -1960,7 +1976,8 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx, new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags; vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end, new_flags, - (struct vm_userfaultfd_ctx){ctx}); + (struct vm_userfaultfd_ctx){ctx}, + /* give_up_on_oom = */false); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/mm/util.c b/mm/util.c index 448117da071f69..b9334775c51d43 100644 --- a/mm/util.c +++ b/mm/util.c @@ -857,6 +857,40 @@ static const struct ctl_table util_sysctl_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, + { + .procname = "workingset_protection", + .data = &sysctl_workingset_protection, + .maxlen = sizeof(bool), + .mode = 0644, + .proc_handler = &proc_dobool, + }, + { + .procname = "anon_min_ratio", + .data = &sysctl_anon_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_low_ratio", + .data = &sysctl_clean_low_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, + { + .procname = "clean_min_ratio", + .data = &sysctl_clean_min_ratio, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = &vm_workingset_protection_update_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, }; static int __init init_vm_util_sysctls(void) diff --git a/mm/vma.c b/mm/vma.c index 5cdc5612bfc19e..a468d4c29c0cd4 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -666,6 +666,9 @@ static void vmg_adjust_set_range(struct vma_merge_struct *vmg) /* * Actually perform the VMA merge operation. * + * IMPORTANT: We guarantee that, should vmg->give_up_on_oom is set, to not + * modify any VMAs or cause inconsistent state should an OOM condition arise. + * * Returns 0 on success, or an error value on failure. */ static int commit_merge(struct vma_merge_struct *vmg) @@ -685,6 +688,12 @@ static int commit_merge(struct vma_merge_struct *vmg) init_multi_vma_prep(&vp, vma, vmg); + /* + * If vmg->give_up_on_oom is set, we're safe, because we don't actually + * manipulate any VMAs until we succeed at preallocation. + * + * Past this point, we will not return an error. + */ if (vma_iter_prealloc(vmg->vmi, vma)) return -ENOMEM; @@ -915,7 +924,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (anon_dup) unlink_anon_vmas(anon_dup); - vmg->state = VMA_MERGE_ERROR_NOMEM; + /* + * We've cleaned up any cloned anon_vma's, no VMAs have been + * modified, no harm no foul if the user requests that we not + * report this and just give up, leaving the VMAs unmerged. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return NULL; } @@ -926,7 +941,15 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( abort: vma_iter_set(vmg->vmi, start); vma_iter_load(vmg->vmi); - vmg->state = VMA_MERGE_ERROR_NOMEM; + + /* + * This means we have failed to clone anon_vma's correctly, but no + * actual changes to VMAs have occurred, so no harm no foul - if the + * user doesn't want this reported and instead just wants to give up on + * the merge, allow it. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return NULL; } @@ -1068,6 +1091,10 @@ int vma_expand(struct vma_merge_struct *vmg) /* This should already have been checked by this point. */ VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); vma_start_write(next); + /* + * In this case we don't report OOM, so vmg->give_up_on_mm is + * safe. + */ ret = dup_anon_vma(middle, next, &anon_dup); if (ret) return ret; @@ -1090,9 +1117,15 @@ int vma_expand(struct vma_merge_struct *vmg) return 0; nomem: - vmg->state = VMA_MERGE_ERROR_NOMEM; if (anon_dup) unlink_anon_vmas(anon_dup); + /* + * If the user requests that we just give upon OOM, we are safe to do so + * here, as commit merge provides this contract to us. Nothing has been + * changed - no harm no foul, just don't report it. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return -ENOMEM; } @@ -1534,6 +1567,13 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) if (vmg_nomem(vmg)) return ERR_PTR(-ENOMEM); + /* + * Split can fail for reasons other than OOM, so if the user requests + * this it's probably a mistake. + */ + VM_WARN_ON(vmg->give_up_on_oom && + (vma->vm_start != start || vma->vm_end != end)); + /* Split any preceding portion of the VMA. */ if (vma->vm_start < start) { int err = split_vma(vmg->vmi, vma, start, 1); @@ -1602,12 +1642,15 @@ struct vm_area_struct struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx) + struct vm_userfaultfd_ctx new_ctx, + bool give_up_on_oom) { VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); vmg.flags = new_flags; vmg.uffd_ctx = new_ctx; + if (give_up_on_oom) + vmg.give_up_on_oom = true; return vma_modify(&vmg); } @@ -1791,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return new_vma; out_vma_link: + fixup_hugetlb_reservations(new_vma); vma_close(new_vma); if (new_vma->vm_file) diff --git a/mm/vma.h b/mm/vma.h index 7356ca5a22d332..149926e8a6d1ac 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -114,6 +114,12 @@ struct vma_merge_struct { */ bool just_expand :1; + /* + * If a merge is possible, but an OOM error occurs, give up and don't + * execute the merge, returning NULL. + */ + bool give_up_on_oom :1; + /* Internal flags set during merge process: */ /* @@ -255,7 +261,8 @@ __must_check struct vm_area_struct struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx); + struct vm_userfaultfd_ctx new_ctx, + bool give_up_on_oom); __must_check struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 3ed720a787ecd8..00cf1b575c8962 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1940,7 +1940,7 @@ static inline void setup_vmalloc_vm(struct vm_struct *vm, { vm->flags = flags; vm->addr = (void *)va->va_start; - vm->size = va_size(va); + vm->size = vm->requested_size = va_size(va); vm->caller = caller; va->vm = vm; } @@ -3133,6 +3133,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size, area->flags = flags; area->caller = caller; + area->requested_size = requested_size; va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area); if (IS_ERR(va)) { @@ -4063,6 +4064,8 @@ EXPORT_SYMBOL(vzalloc_node_noprof); */ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) { + struct vm_struct *vm = NULL; + size_t alloced_size = 0; size_t old_size = 0; void *n; @@ -4072,15 +4075,17 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) } if (p) { - struct vm_struct *vm; - vm = find_vm_area(p); if (unlikely(!vm)) { WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p); return NULL; } - old_size = get_vm_area_size(vm); + alloced_size = get_vm_area_size(vm); + old_size = vm->requested_size; + if (WARN(alloced_size < old_size, + "vrealloc() has mismatched area vs requested sizes (%p)\n", p)) + return NULL; } /* @@ -4088,11 +4093,26 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags) * would be a good heuristic for when to shrink the vm_area? */ if (size <= old_size) { - /* Zero out spare memory. */ - if (want_init_on_alloc(flags)) + /* Zero out "freed" memory, potentially for future realloc. */ + if (want_init_on_free() || want_init_on_alloc(flags)) memset((void *)p + size, 0, old_size - size); + vm->requested_size = size; kasan_poison_vmalloc(p + size, old_size - size); - kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL); + return (void *)p; + } + + /* + * We already have the bytes available in the allocation; use them. + */ + if (size <= alloced_size) { + kasan_unpoison_vmalloc(p + old_size, size - old_size, + KASAN_VMALLOC_PROT_NORMAL); + /* + * No need to zero memory here, as unused memory will have + * already been zeroed at initial allocation time or during + * realloc shrink time. + */ + vm->requested_size = size; return (void *)p; } diff --git a/mm/vmpressure.c b/mm/vmpressure.c index bd5183dfd8791f..3a410f53a07ca4 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -43,7 +43,11 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; * essence, they are percents: the higher the value, the more number * unsuccessful reclaims there were. */ +#ifdef CONFIG_CACHY +static const unsigned int vmpressure_level_med = 65; +#else static const unsigned int vmpressure_level_med = 60; +#endif static const unsigned int vmpressure_level_critical = 95; /* diff --git a/mm/vmscan.c b/mm/vmscan.c index b620d74b0f66e3..75f48ea2b33694 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,6 +148,15 @@ struct scan_control { /* The file folios on the current node are dangerously low */ unsigned int file_is_tiny:1; + /* The anonymous pages on the current node are below vm.anon_min_ratio */ + unsigned int anon_below_min:1; + + /* The clean file pages on the current node are below vm.clean_low_ratio */ + unsigned int clean_below_low:1; + + /* The clean file pages on the current node are below vm.clean_min_ratio */ + unsigned int clean_below_min:1; + /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; @@ -197,10 +206,23 @@ struct scan_control { #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) #endif +bool sysctl_workingset_protection __read_mostly = true; +u8 sysctl_anon_min_ratio __read_mostly = CONFIG_ANON_MIN_RATIO; +u8 sysctl_clean_low_ratio __read_mostly = CONFIG_CLEAN_LOW_RATIO; +u8 sysctl_clean_min_ratio __read_mostly = CONFIG_CLEAN_MIN_RATIO; +static u64 sysctl_anon_min_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_low_ratio_kb __read_mostly = 0; +static u64 sysctl_clean_min_ratio_kb __read_mostly = 0; +static u64 workingset_protection_prev_totalram __read_mostly = 0; + /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ +#ifdef CONFIG_CACHY +int vm_swappiness = 100; +#else int vm_swappiness = 60; +#endif #ifdef CONFIG_MEMCG @@ -1147,6 +1169,10 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, if (!sc->may_unmap && folio_mapped(folio)) goto keep_locked; + if (folio_is_file_lru(folio) ? sc->clean_below_min : + (sc->anon_below_min && !sc->clean_below_min)) + goto keep_locked; + /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing @@ -2521,6 +2547,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } + /* + * Force-scan anon if clean file pages is under vm.clean_low_ratio + * or vm.clean_min_ratio. + */ + if (sc->clean_below_low || sc->clean_below_min) { + scan_balance = SCAN_ANON; + goto out; + } + /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. @@ -2638,6 +2673,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } + /* + * Hard protection of the working set. + * Don't reclaim anon/file pages when the amount is + * below the watermark of the same type. + */ + if (file ? sc->clean_below_min : sc->anon_below_min) + scan = 0; + nr[lru] = scan; } } @@ -2657,6 +2700,96 @@ static bool can_age_anon_pages(struct pglist_data *pgdat, return can_demote(pgdat->node_id, sc); } +int vm_workingset_protection_update_handler(const struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos); + if (ret || !write) + return ret; + + workingset_protection_prev_totalram = 0; + + return 0; +} + +static void prepare_workingset_protection(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long node_mem_total; + struct sysinfo i; + + if (!(sysctl_workingset_protection)) { + sc->anon_below_min = 0; + sc->clean_below_low = 0; + sc->clean_below_min = 0; + return; + } + + if (likely(sysctl_anon_min_ratio || + sysctl_clean_low_ratio || + sysctl_clean_min_ratio)) { +#ifdef CONFIG_NUMA + si_meminfo_node(&i, pgdat->node_id); +#else //CONFIG_NUMA + si_meminfo(&i); +#endif //CONFIG_NUMA + node_mem_total = i.totalram; + + if (unlikely(workingset_protection_prev_totalram != node_mem_total)) { + sysctl_anon_min_ratio_kb = + node_mem_total * sysctl_anon_min_ratio / 100; + sysctl_clean_low_ratio_kb = + node_mem_total * sysctl_clean_low_ratio / 100; + sysctl_clean_min_ratio_kb = + node_mem_total * sysctl_clean_min_ratio / 100; + workingset_protection_prev_totalram = node_mem_total; + } + } + + /* + * Check the number of anonymous pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_anon_min_ratio) { + unsigned long reclaimable_anon; + + reclaimable_anon = + node_page_state(pgdat, NR_ACTIVE_ANON) + + node_page_state(pgdat, NR_INACTIVE_ANON) + + node_page_state(pgdat, NR_ISOLATED_ANON); + + sc->anon_below_min = reclaimable_anon < sysctl_anon_min_ratio_kb; + } else + sc->anon_below_min = 0; + + /* + * Check the number of clean file pages to protect them from + * reclaiming if their amount is below the specified. + */ + if (sysctl_clean_low_ratio || sysctl_clean_min_ratio) { + unsigned long reclaimable_file, dirty, clean; + + reclaimable_file = + node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE) + + node_page_state(pgdat, NR_ISOLATED_FILE); + dirty = node_page_state(pgdat, NR_FILE_DIRTY); + /* + * node_page_state() sum can go out of sync since + * all the values are not read at once. + */ + if (likely(reclaimable_file > dirty)) + clean = reclaimable_file - dirty; + else + clean = 0; + + sc->clean_below_low = clean < sysctl_clean_low_ratio_kb; + sc->clean_below_min = clean < sysctl_clean_min_ratio_kb; + } else { + sc->clean_below_low = 0; + sc->clean_below_min = 0; + } +} + #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED @@ -4623,11 +4756,21 @@ static int get_tier_idx(struct lruvec *lruvec, int type) return tier - 1; } -static int get_type_to_scan(struct lruvec *lruvec, int swappiness) +static int get_type_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness) { struct ctrl_pos sp, pv; - if (swappiness <= MIN_SWAPPINESS + 1) + if (swappiness == MIN_SWAPPINESS) + return LRU_GEN_FILE; + + if (sc->clean_below_min) + return LRU_GEN_ANON; + if (sc->anon_below_min) + return LRU_GEN_FILE; + if (sc->clean_below_low) + return LRU_GEN_ANON; + + if (swappiness == MIN_SWAPPINESS + 1) return LRU_GEN_FILE; if (swappiness >= MAX_SWAPPINESS) @@ -4646,7 +4789,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw int *type_scanned, struct list_head *list) { int i; - int type = get_type_to_scan(lruvec, swappiness); + int type = get_type_to_scan(lruvec, sc, swappiness); for_each_evictable_type(i, swappiness) { int scanned; @@ -4889,6 +5032,12 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); + prepare_workingset_protection(pgdat, sc); + + if (sysctl_workingset_protection && sc->clean_below_min && + !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) + return 0; + /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; @@ -6027,6 +6176,8 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) prepare_scan_control(pgdat, sc); + prepare_workingset_protection(pgdat, sc); + shrink_node_memcgs(pgdat, sc); flush_reclaim_state(sc); @@ -6736,6 +6887,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) * meet watermarks. */ for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { + enum zone_stat_item item; unsigned long free_pages; if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) @@ -6746,11 +6898,33 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) /* * In defrag_mode, watermarks must be met in whole * blocks to avoid polluting allocator fallbacks. + * + * However, kswapd usually cannot accomplish this on + * its own and needs kcompactd support. Once it's + * reclaimed a compaction gap, and kswapd_shrink_node + * has dropped order, simply ensure there are enough + * base pages for compaction, wake kcompactd & sleep. */ - if (defrag_mode) - free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS); + if (defrag_mode && order) + item = NR_FREE_PAGES_BLOCKS; else - free_pages = zone_page_state(zone, NR_FREE_PAGES); + item = NR_FREE_PAGES; + + /* + * When there is a high number of CPUs in the system, + * the cumulative error from the vmstat per-cpu cache + * can blur the line between the watermarks. In that + * case, be safe and get an accurate snapshot. + * + * TODO: NR_FREE_PAGES_BLOCKS moves in steps of + * pageblock_nr_pages, while the vmstat pcp threshold + * is limited to 125. On many configurations that + * counter won't actually be per-cpu cached. But keep + * things simple for now; revisit when somebody cares. + */ + free_pages = zone_page_state(zone, item); + if (zone->percpu_drift_mark && free_pages < zone->percpu_drift_mark) + free_pages = zone_page_state_snapshot(zone, item); if (__zone_watermark_ok(zone, order, mark, highest_zoneidx, 0, free_pages)) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 961b270f023c23..d14a7e317ac8bf 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1243,19 +1243,19 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle, class = zspage_class(pool, zspage); off = offset_in_page(class->size * obj_idx); - if (off + class->size <= PAGE_SIZE) { + if (!ZsHugePage(zspage)) + off += ZS_HANDLE_SIZE; + + if (off + mem_len <= PAGE_SIZE) { /* this object is contained entirely within a page */ void *dst = kmap_local_zpdesc(zpdesc); - if (!ZsHugePage(zspage)) - off += ZS_HANDLE_SIZE; memcpy(dst + off, handle_mem, mem_len); kunmap_local(dst); } else { /* this object spans two pages */ size_t sizes[2]; - off += ZS_HANDLE_SIZE; sizes[0] = PAGE_SIZE - off; sizes[1] = mem_len - sizes[0]; diff --git a/net/9p/client.c b/net/9p/client.c index 61461b9fa13431..5c1ca57ccd2853 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1704,7 +1704,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) start, len, &subreq->io_iter); } if (IS_ERR(req)) { - netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false); + netfs_write_subrequest_terminated(subreq, PTR_ERR(req)); return; } @@ -1712,7 +1712,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); - netfs_write_subrequest_terminated(subreq, err, false); + netfs_write_subrequest_terminated(subreq, err); return; } @@ -1724,7 +1724,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); p9_req_put(clnt, req); - netfs_write_subrequest_terminated(subreq, written, false); + netfs_write_subrequest_terminated(subreq, written); } EXPORT_SYMBOL(p9_client_write_subreq); diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 860a0786bc1e40..20b316207f9aa6 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -9,7 +9,7 @@ config BATMAN_ADV tristate "B.A.T.M.A.N. Advanced Meshing Protocol" - select LIBCRC32C + select CRC32 help B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is a routing protocol for multi-hop ad-hoc mesh networks. The diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f145f966265310..558d39dffc2330 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -506,28 +506,32 @@ batadv_hardif_is_iface_up(const struct batadv_hard_iface *hard_iface) return false; } -static void batadv_check_known_mac_addr(const struct net_device *net_dev) +static void batadv_check_known_mac_addr(const struct batadv_hard_iface *hard_iface) { - const struct batadv_hard_iface *hard_iface; + const struct net_device *mesh_iface = hard_iface->mesh_iface; + const struct batadv_hard_iface *tmp_hard_iface; - rcu_read_lock(); - list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if (hard_iface->if_status != BATADV_IF_ACTIVE && - hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) + if (!mesh_iface) + return; + + list_for_each_entry(tmp_hard_iface, &batadv_hardif_list, list) { + if (tmp_hard_iface == hard_iface) + continue; + + if (tmp_hard_iface->mesh_iface != mesh_iface) continue; - if (hard_iface->net_dev == net_dev) + if (tmp_hard_iface->if_status == BATADV_IF_NOT_IN_USE) continue; - if (!batadv_compare_eth(hard_iface->net_dev->dev_addr, - net_dev->dev_addr)) + if (!batadv_compare_eth(tmp_hard_iface->net_dev->dev_addr, + hard_iface->net_dev->dev_addr)) continue; pr_warn("The newly added mac address (%pM) already exists on: %s\n", - net_dev->dev_addr, hard_iface->net_dev->name); + hard_iface->net_dev->dev_addr, tmp_hard_iface->net_dev->name); pr_warn("It is strongly recommended to keep mac addresses unique to avoid problems!\n"); } - rcu_read_unlock(); } /** @@ -725,7 +729,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, kref_get(&hard_iface->refcount); - dev_hold(mesh_iface); netdev_hold(mesh_iface, &hard_iface->meshif_dev_tracker, GFP_ATOMIC); hard_iface->mesh_iface = mesh_iface; bat_priv = netdev_priv(hard_iface->mesh_iface); @@ -764,6 +767,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->net_dev->name, hardif_mtu, required_mtu); + batadv_check_known_mac_addr(hard_iface); + if (batadv_hardif_is_iface_up(hard_iface)) batadv_hardif_activate_interface(hard_iface); else @@ -902,7 +907,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_v_hardif_init(hard_iface); - batadv_check_known_mac_addr(hard_iface->net_dev); kref_get(&hard_iface->refcount); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); batadv_hardif_generation++; @@ -989,7 +993,7 @@ static int batadv_hard_if_event(struct notifier_block *this, if (hard_iface->if_status == BATADV_IF_NOT_IN_USE) goto hardif_put; - batadv_check_known_mac_addr(hard_iface->net_dev); + batadv_check_known_mac_addr(hard_iface); bat_priv = netdev_priv(hard_iface->mesh_iface); bat_priv->algo_ops->iface.update_mac(hard_iface); diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 1bc51e2b05a347..3f72111ba651f9 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) return ad_len; } -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size) { struct adv_info *adv = NULL; u8 ad_len = 0, flags = 0; @@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) /* If flags would still be empty, then there is no need to * include the "Flags" AD field". */ - if (flags) { + if (flags && (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_FLAGS; ptr[2] = flags; @@ -316,7 +316,8 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) } /* Provide Tx Power only if we can provide a valid value for it */ - if (adv_tx_power != HCI_TX_POWER_INVALID) { + if (adv_tx_power != HCI_TX_POWER_INVALID && + (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_TX_POWER; ptr[2] = (u8)adv_tx_power; @@ -366,17 +367,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr) void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len) { - while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) { + size_t dlen; + + while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) { u16 value = get_unaligned_le16(eir); if (uuid == value) { if (len) - *len -= 2; + *len = dlen - 2; return &eir[2]; } - eir += *len; - eir_len -= *len; + eir += dlen; + eir_len -= dlen; } return NULL; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 5c89a05e8b2905..9372db83f912fa 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -9,7 +9,7 @@ void eir_create(struct hci_dev *hdev, u8 *data); -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 95972fd4c78434..fccdb864af7264 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -785,7 +785,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c d->sync_handle = conn->sync_handle; if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { - hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + hci_conn_hash_list_flag(hdev, find_bis, BIS_LINK, HCI_CONN_PA_SYNC, d); if (!d->count) @@ -795,7 +795,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c } if (test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags)) { - hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + hci_conn_hash_list_flag(hdev, find_bis, BIS_LINK, HCI_CONN_BIG_SYNC, d); if (!d->count) @@ -885,9 +885,11 @@ static void cis_cleanup(struct hci_conn *conn) /* Check if ISO connection is a CIS and remove CIG if there are * no other connections using it. */ - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_BOUND, &d); - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &d); - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d); + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_BOUND, &d); + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECT, + &d); + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECTED, + &d); if (d.count) return; @@ -910,7 +912,8 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t if (!hdev->acl_mtu) return ERR_PTR(-ECONNREFUSED); break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: if (hdev->iso_mtu) /* Dedicated ISO Buffer exists */ break; @@ -974,7 +977,8 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t hci_copy_identity_address(hdev, &conn->src, &conn->src_type); conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); @@ -1071,7 +1075,8 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason) if (HCI_CONN_HANDLE_UNSET(conn->handle)) hci_conn_failed(conn, reason); break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: if ((conn->state != BT_CONNECTED && !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) || test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) @@ -1146,7 +1151,8 @@ void hci_conn_del(struct hci_conn *conn) hdev->acl_cnt += conn->sent; } else { /* Unacked ISO frames */ - if (conn->type == ISO_LINK) { + if (conn->type == CIS_LINK || + conn->type == BIS_LINK) { if (hdev->iso_pkts) hdev->iso_cnt += conn->sent; else if (hdev->le_pkts) @@ -1532,7 +1538,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, memcmp(conn->le_per_adv_data, base, base_len))) return ERR_PTR(-EADDRINUSE); - conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + conn = hci_conn_add_unset(hdev, BIS_LINK, dst, HCI_ROLE_MASTER); if (IS_ERR(conn)) return conn; @@ -1740,7 +1746,7 @@ static int hci_le_create_big(struct hci_conn *conn, struct bt_iso_qos *qos) data.count = 0; /* Create a BIS for each bound connection */ - hci_conn_hash_list_state(hdev, bis_list, ISO_LINK, + hci_conn_hash_list_state(hdev, bis_list, BIS_LINK, BT_BOUND, &data); cp.handle = qos->bcast.big; @@ -1829,12 +1835,12 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) for (data.cig = 0x00; data.cig < 0xf0; data.cig++) { data.count = 0; - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECT, &data); if (data.count) continue; - hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, + hci_conn_hash_list_state(hdev, find_cis, CIS_LINK, BT_CONNECTED, &data); if (!data.count) break; @@ -1884,7 +1890,8 @@ struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, cis = hci_conn_hash_lookup_cis(hdev, dst, dst_type, qos->ucast.cig, qos->ucast.cis); if (!cis) { - cis = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_MASTER); + cis = hci_conn_add_unset(hdev, CIS_LINK, dst, + HCI_ROLE_MASTER); if (IS_ERR(cis)) return cis; cis->cleanup = cis_cleanup; @@ -1976,7 +1983,7 @@ bool hci_iso_setup_path(struct hci_conn *conn) int hci_conn_check_create_cis(struct hci_conn *conn) { - if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY)) + if (conn->type != CIS_LINK) return -EINVAL; if (!conn->parent || conn->parent->state != BT_CONNECTED || @@ -2064,102 +2071,15 @@ static int create_big_sync(struct hci_dev *hdev, void *data) return hci_le_create_big(conn, &conn->iso_qos); } -static void create_pa_complete(struct hci_dev *hdev, void *data, int err) -{ - bt_dev_dbg(hdev, ""); - - if (err) - bt_dev_err(hdev, "Unable to create PA: %d", err); -} - -static bool hci_conn_check_create_pa_sync(struct hci_conn *conn) -{ - if (conn->type != ISO_LINK || conn->sid == HCI_SID_INVALID) - return false; - - return true; -} - -static int create_pa_sync(struct hci_dev *hdev, void *data) -{ - struct hci_cp_le_pa_create_sync cp = {0}; - struct hci_conn *conn; - int err = 0; - - hci_dev_lock(hdev); - - rcu_read_lock(); - - /* The spec allows only one pending LE Periodic Advertising Create - * Sync command at a time. If the command is pending now, don't do - * anything. We check for pending connections after each PA Sync - * Established event. - * - * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E - * page 2493: - * - * If the Host issues this command when another HCI_LE_Periodic_ - * Advertising_Create_Sync command is pending, the Controller shall - * return the error code Command Disallowed (0x0C). - */ - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (test_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags)) - goto unlock; - } - - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (hci_conn_check_create_pa_sync(conn)) { - struct bt_iso_qos *qos = &conn->iso_qos; - - cp.options = qos->bcast.options; - cp.sid = conn->sid; - cp.addr_type = conn->dst_type; - bacpy(&cp.addr, &conn->dst); - cp.skip = cpu_to_le16(qos->bcast.skip); - cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); - cp.sync_cte_type = qos->bcast.sync_cte_type; - - break; - } - } - -unlock: - rcu_read_unlock(); - - hci_dev_unlock(hdev); - - if (bacmp(&cp.addr, BDADDR_ANY)) { - hci_dev_set_flag(hdev, HCI_PA_SYNC); - set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); - - err = __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC, - sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (!err) - err = hci_update_passive_scan_sync(hdev); - - if (err) { - hci_dev_clear_flag(hdev, HCI_PA_SYNC); - clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); - } - } - - return err; -} - -int hci_pa_create_sync_pending(struct hci_dev *hdev) -{ - /* Queue start pa_create_sync and scan */ - return hci_cmd_sync_queue(hdev, create_pa_sync, - NULL, create_pa_complete); -} - struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos) { struct hci_conn *conn; - conn = hci_conn_add_unset(hdev, ISO_LINK, dst, HCI_ROLE_SLAVE); + bt_dev_dbg(hdev, "dst %pMR type %d sid %d", dst, dst_type, sid); + + conn = hci_conn_add_unset(hdev, BIS_LINK, dst, HCI_ROLE_SLAVE); if (IS_ERR(conn)) return conn; @@ -2167,97 +2087,18 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, conn->dst_type = dst_type; conn->sid = sid; conn->state = BT_LISTEN; + conn->conn_timeout = msecs_to_jiffies(qos->bcast.sync_timeout * 10); hci_conn_hold(conn); - hci_pa_create_sync_pending(hdev); + hci_connect_pa_sync(hdev, conn); return conn; } -static bool hci_conn_check_create_big_sync(struct hci_conn *conn) -{ - if (!conn->num_bis) - return false; - - return true; -} - -static void big_create_sync_complete(struct hci_dev *hdev, void *data, int err) -{ - bt_dev_dbg(hdev, ""); - - if (err) - bt_dev_err(hdev, "Unable to create BIG sync: %d", err); -} - -static int big_create_sync(struct hci_dev *hdev, void *data) -{ - DEFINE_FLEX(struct hci_cp_le_big_create_sync, pdu, bis, num_bis, 0x11); - struct hci_conn *conn; - - rcu_read_lock(); - - pdu->num_bis = 0; - - /* The spec allows only one pending LE BIG Create Sync command at - * a time. If the command is pending now, don't do anything. We - * check for pending connections after each BIG Sync Established - * event. - * - * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E - * page 2586: - * - * If the Host sends this command when the Controller is in the - * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_ - * Established event has not been generated, the Controller shall - * return the error code Command Disallowed (0x0C). - */ - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (test_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags)) - goto unlock; - } - - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (hci_conn_check_create_big_sync(conn)) { - struct bt_iso_qos *qos = &conn->iso_qos; - - set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); - - pdu->handle = qos->bcast.big; - pdu->sync_handle = cpu_to_le16(conn->sync_handle); - pdu->encryption = qos->bcast.encryption; - memcpy(pdu->bcode, qos->bcast.bcode, - sizeof(pdu->bcode)); - pdu->mse = qos->bcast.mse; - pdu->timeout = cpu_to_le16(qos->bcast.timeout); - pdu->num_bis = conn->num_bis; - memcpy(pdu->bis, conn->bis, conn->num_bis); - - break; - } - } - -unlock: - rcu_read_unlock(); - - if (!pdu->num_bis) - return 0; - - return hci_send_cmd(hdev, HCI_OP_LE_BIG_CREATE_SYNC, - struct_size(pdu, bis, pdu->num_bis), pdu); -} - -int hci_le_big_create_sync_pending(struct hci_dev *hdev) -{ - /* Queue big_create_sync */ - return hci_cmd_sync_queue_once(hdev, big_create_sync, - NULL, big_create_sync_complete); -} - -int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, - struct bt_iso_qos *qos, - __u16 sync_handle, __u8 num_bis, __u8 bis[]) +int hci_conn_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, + struct bt_iso_qos *qos, __u16 sync_handle, + __u8 num_bis, __u8 bis[]) { int err; @@ -2274,9 +2115,10 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, hcon->num_bis = num_bis; memcpy(hcon->bis, bis, num_bis); + hcon->conn_timeout = msecs_to_jiffies(qos->bcast.timeout * 10); } - return hci_le_big_create_sync_pending(hdev); + return hci_connect_big_sync(hdev, hcon); } static void create_big_complete(struct hci_dev *hdev, void *data, int err) @@ -2386,7 +2228,7 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, * the start periodic advertising and create BIG commands have * been queued */ - hci_conn_hash_list_state(hdev, bis_mark_per_adv, ISO_LINK, + hci_conn_hash_list_state(hdev, bis_mark_per_adv, BIS_LINK, BT_BOUND, &data); /* Queue start periodic advertising and create BIG */ @@ -3072,6 +2914,7 @@ void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, const struct sockcm_cookie *sockc) { struct sock *sk = skb ? skb->sk : NULL; + int key; /* This shall be called on a single skb of those generated by user * sendmsg(), and only when the sendmsg() does not return error to @@ -3087,13 +2930,16 @@ void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); + if (sk->sk_type == SOCK_STREAM) + key = atomic_add_return(key_offset, &sk->sk_tskey); + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { skb_shinfo(skb)->tskey = sockc->ts_opt_id; } else { - int key = atomic_add_return(key_offset, &sk->sk_tskey); - + if (sk->sk_type != SOCK_STREAM) + key = atomic_inc_return(&sk->sk_tskey); skb_shinfo(skb)->tskey = key - 1; } } @@ -3114,7 +2960,8 @@ void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) * TODO: SCO support without flowctl (needs to be done in drivers) */ switch (conn->type) { - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: case ACL_LINK: case LE_LINK: break; @@ -3186,3 +3033,27 @@ void hci_conn_tx_dequeue(struct hci_conn *conn) kfree_skb(skb); } + +u8 *hci_conn_key_enc_size(struct hci_conn *conn) +{ + if (conn->type == ACL_LINK) { + struct link_key *key; + + key = hci_find_link_key(conn->hdev, &conn->dst); + if (!key) + return NULL; + + return &key->pin_len; + } else if (conn->type == LE_LINK) { + struct smp_ltk *ltk; + + ltk = hci_find_ltk(conn->hdev, &conn->dst, conn->dst_type, + conn->role); + if (!ltk) + return NULL; + + return <k->enc_size; + } + + return NULL; +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 5eb0600bbd03cc..af30a420bab75a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1877,10 +1877,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) if (monitor->handle) idr_remove(&hdev->adv_monitors_idr, monitor->handle); - if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) { + if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) hdev->adv_monitors_cnt--; - mgmt_adv_monitor_removed(hdev, monitor->handle); - } kfree(monitor); } @@ -2487,6 +2485,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + mutex_init(&hdev->mgmt_pending_lock); ida_init(&hdev->unset_handle_ida); @@ -2898,12 +2897,13 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) break; case HCI_ACLDATA_PKT: /* Detect if ISO packet has been sent as ACL */ - if (hci_conn_num(hdev, ISO_LINK)) { + if (hci_conn_num(hdev, CIS_LINK) || + hci_conn_num(hdev, BIS_LINK)) { __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); __u8 type; type = hci_conn_lookup_type(hdev, hci_handle(handle)); - if (type == ISO_LINK) + if (type == CIS_LINK || type == BIS_LINK) hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; } break; @@ -3345,7 +3345,8 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) case LE_LINK: cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: cnt = hdev->iso_mtu ? hdev->iso_cnt : hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; break; @@ -3359,7 +3360,7 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) } static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, - int *quote) + __u8 type2, int *quote) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *conn = NULL, *c; @@ -3371,7 +3372,8 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { - if (c->type != type || skb_queue_empty(&c->data_q)) + if ((c->type != type && c->type != type2) || + skb_queue_empty(&c->data_q)) continue; if (c->state != BT_CONNECTED && c->state != BT_CONFIG) @@ -3403,23 +3405,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) bt_dev_err(hdev, "link tx timeout"); - rcu_read_lock(); + hci_dev_lock(hdev); /* Kill stalled connections */ - list_for_each_entry_rcu(c, &h->list, list) { + list_for_each_entry(c, &h->list, list) { if (c->type == type && c->sent) { bt_dev_err(hdev, "killing stalled connection %pMR", &c->dst); - /* hci_disconnect might sleep, so, we have to release - * the RCU read lock before calling it. - */ - rcu_read_unlock(); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); - rcu_read_lock(); } } - rcu_read_unlock(); + hci_dev_unlock(hdev); } static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, @@ -3579,7 +3576,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) else cnt = &hdev->sco_cnt; - while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { + while (*cnt && (conn = hci_low_sent(hdev, type, type, "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); hci_send_conn_frame(hdev, conn, skb); @@ -3707,12 +3704,14 @@ static void hci_sched_iso(struct hci_dev *hdev) BT_DBG("%s", hdev->name); - if (!hci_conn_num(hdev, ISO_LINK)) + if (!hci_conn_num(hdev, CIS_LINK) && + !hci_conn_num(hdev, BIS_LINK)) return; cnt = hdev->iso_pkts ? &hdev->iso_cnt : hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; - while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { + while (*cnt && (conn = hci_low_sent(hdev, CIS_LINK, BIS_LINK, + "e))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); hci_send_conn_frame(hdev, conn, skb); @@ -4057,10 +4056,13 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) return; } - err = hci_send_frame(hdev, skb); - if (err < 0) { - hci_cmd_sync_cancel_sync(hdev, -err); - return; + if (hci_skb_opcode(skb) != HCI_OP_NOP) { + err = hci_send_frame(hdev, skb); + if (err < 0) { + hci_cmd_sync_cancel_sync(hdev, -err); + return; + } + atomic_dec(&hdev->cmd_cnt); } if (hdev->req_status == HCI_REQ_PEND && @@ -4068,8 +4070,6 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); } - - atomic_dec(&hdev->cmd_cnt); } static void hci_cmd_work(struct work_struct *work) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1d8616f2e740ae..66052d6aaa1d50 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -739,10 +739,17 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, handle); conn->enc_key_size = 0; } else { + u8 *key_enc_size = hci_conn_key_enc_size(conn); + conn->enc_key_size = rp->key_size; status = 0; - if (conn->enc_key_size < hdev->min_enc_key_size) { + /* Attempt to check if the key size is too small or if it has + * been downgraded from the last time it was stored as part of + * the link_key. + */ + if (conn->enc_key_size < hdev->min_enc_key_size || + (key_enc_size && conn->enc_key_size < *key_enc_size)) { /* As slave role, the conn->state has been set to * BT_CONNECTED and l2cap conn req might not be received * yet, at this moment the l2cap layer almost does @@ -755,6 +762,10 @@ static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, clear_bit(HCI_CONN_ENCRYPT, &conn->flags); clear_bit(HCI_CONN_AES_CCM, &conn->flags); } + + /* Update the key encryption size with the connection one */ + if (key_enc_size && *key_enc_size != conn->enc_key_size) + *key_enc_size = conn->enc_key_size; } hci_encrypt_cfm(conn, status); @@ -3065,6 +3076,34 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata, hci_dev_unlock(hdev); } +static int hci_read_enc_key_size(struct hci_dev *hdev, struct hci_conn *conn) +{ + struct hci_cp_read_enc_key_size cp; + u8 *key_enc_size = hci_conn_key_enc_size(conn); + + if (!read_key_size_capable(hdev)) { + conn->enc_key_size = HCI_LINK_KEY_SIZE; + return -EOPNOTSUPP; + } + + bt_dev_dbg(hdev, "hcon %p", conn); + + memset(&cp, 0, sizeof(cp)); + cp.handle = cpu_to_le16(conn->handle); + + /* If the key enc_size is already known, use it as conn->enc_key_size, + * otherwise use hdev->min_enc_key_size so the likes of + * l2cap_check_enc_key_size don't fail while waiting for + * HCI_OP_READ_ENC_KEY_SIZE response. + */ + if (key_enc_size && *key_enc_size) + conn->enc_key_size = *key_enc_size; + else + conn->enc_key_size = hdev->min_enc_key_size; + + return hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); +} + static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -3157,23 +3196,11 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) && ev->link_type == ACL_LINK) { struct link_key *key; - struct hci_cp_read_enc_key_size cp; key = hci_find_link_key(hdev, &ev->bdaddr); if (key) { set_bit(HCI_CONN_ENCRYPT, &conn->flags); - - if (!read_key_size_capable(hdev)) { - conn->enc_key_size = HCI_LINK_KEY_SIZE; - } else { - cp.handle = cpu_to_le16(conn->handle); - if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, - sizeof(cp), &cp)) { - bt_dev_err(hdev, "sending read key size failed"); - conn->enc_key_size = HCI_LINK_KEY_SIZE; - } - } - + hci_read_enc_key_size(hdev, conn); hci_encrypt_cfm(conn, ev->status); } } @@ -3612,24 +3639,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, /* Try reading the encryption key size for encrypted ACL links */ if (!ev->status && ev->encrypt && conn->type == ACL_LINK) { - struct hci_cp_read_enc_key_size cp; - - /* Only send HCI_Read_Encryption_Key_Size if the - * controller really supports it. If it doesn't, assume - * the default size (16). - */ - if (!read_key_size_capable(hdev)) { - conn->enc_key_size = HCI_LINK_KEY_SIZE; + if (hci_read_enc_key_size(hdev, conn)) goto notify; - } - - cp.handle = cpu_to_le16(conn->handle); - if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, - sizeof(cp), &cp)) { - bt_dev_err(hdev, "sending read key size failed"); - conn->enc_key_size = HCI_LINK_KEY_SIZE; - goto notify; - } goto unlock; } @@ -3793,7 +3804,7 @@ static void hci_unbound_cis_failed(struct hci_dev *hdev, u8 cig, u8 status) lockdep_assert_held(&hdev->lock); list_for_each_entry_safe(conn, tmp, &hdev->conn_hash.list, list) { - if (conn->type != ISO_LINK || !bacmp(&conn->dst, BDADDR_ANY) || + if (conn->type != CIS_LINK || conn->state == BT_OPEN || conn->iso_qos.ucast.cig != cig) continue; @@ -4456,7 +4467,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, break; - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: if (hdev->iso_pkts) { hdev->iso_cnt += count; if (hdev->iso_cnt > hdev->iso_pkts) @@ -6160,11 +6172,12 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * event or send an immediate device found event if the data * should not be stored for later. */ - if (!ext_adv && !has_pending_adv_report(hdev)) { + if (!has_pending_adv_report(hdev)) { /* If the report will trigger a SCAN_REQ store it for * later merging. */ - if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + if (!ext_adv && (type == LE_ADV_IND || + type == LE_ADV_SCAN_IND)) { store_pending_adv_report(hdev, bdaddr, bdaddr_type, rssi, flags, data, len); return; @@ -6339,6 +6352,17 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, info->secondary_phy &= 0x1f; } + /* Check if PA Sync is pending and if the hci_conn SID has not + * been set update it. + */ + if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_create_pa_sync(hdev); + if (conn && conn->sid == HCI_SID_INVALID) + conn->sid = info->sid; + } + if (legacy_evt_type != LE_ADV_INVALID) { process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, @@ -6377,8 +6401,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_PA_SYNC); - conn = hci_conn_hash_lookup_sid(hdev, ev->sid, &ev->bdaddr, - ev->bdaddr_type); + conn = hci_conn_hash_lookup_create_pa_sync(hdev); if (!conn) { bt_dev_err(hdev, "Unable to find connection for dst %pMR sid 0x%2.2x", @@ -6391,7 +6414,8 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, conn->sync_handle = le16_to_cpu(ev->handle); conn->sid = HCI_SID_INVALID; - mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ISO_LINK, &flags); + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, BIS_LINK, + &flags); if (!(mask & HCI_LM_ACCEPT)) { hci_le_pa_term_sync(hdev, ev->handle); goto unlock; @@ -6401,7 +6425,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, goto unlock; /* Add connection to indicate PA sync event */ - pa_sync = hci_conn_add_unset(hdev, ISO_LINK, BDADDR_ANY, + pa_sync = hci_conn_add_unset(hdev, BIS_LINK, BDADDR_ANY, HCI_ROLE_SLAVE); if (IS_ERR(pa_sync)) @@ -6417,9 +6441,6 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data, } unlock: - /* Handle any other pending PA sync command */ - hci_pa_create_sync_pending(hdev); - hci_dev_unlock(hdev); } @@ -6435,7 +6456,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); + mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, BIS_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) goto unlock; @@ -6719,7 +6740,7 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, goto unlock; } - if (conn->type != ISO_LINK) { + if (conn->type != CIS_LINK) { bt_dev_err(hdev, "Invalid connection link type handle 0x%4.4x", handle); @@ -6837,7 +6858,7 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, if (!acl) goto unlock; - mask = hci_proto_connect_ind(hdev, &acl->dst, ISO_LINK, &flags); + mask = hci_proto_connect_ind(hdev, &acl->dst, CIS_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) { hci_le_reject_cis(hdev, ev->cis_handle); goto unlock; @@ -6845,8 +6866,8 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, cis = hci_conn_hash_lookup_handle(hdev, cis_handle); if (!cis) { - cis = hci_conn_add(hdev, ISO_LINK, &acl->dst, HCI_ROLE_SLAVE, - cis_handle); + cis = hci_conn_add(hdev, CIS_LINK, &acl->dst, + HCI_ROLE_SLAVE, cis_handle); if (IS_ERR(cis)) { hci_le_reject_cis(hdev, ev->cis_handle); goto unlock; @@ -6931,7 +6952,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + if (!hci_le_ev_skb_pull(hdev, skb, HCI_EVT_LE_BIG_SYNC_ESTABLISHED, flex_array_size(ev, bis, ev->num_bis))) return; @@ -6961,7 +6982,7 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "ignore too large handle %u", handle); continue; } - bis = hci_conn_add(hdev, ISO_LINK, BDADDR_ANY, + bis = hci_conn_add(hdev, BIS_LINK, BDADDR_ANY, HCI_ROLE_SLAVE, handle); if (IS_ERR(bis)) continue; @@ -7002,9 +7023,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, } unlock: - /* Handle any other pending BIG sync command */ - hci_le_big_create_sync_pending(hdev); - hci_dev_unlock(hdev); } @@ -7020,7 +7038,7 @@ static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, ISO_LINK, &flags); + mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, BIS_LINK, &flags); if (!(mask & HCI_LM_ACCEPT)) goto unlock; @@ -7126,8 +7144,8 @@ static const struct hci_le_ev { hci_le_create_big_complete_evt, sizeof(struct hci_evt_le_create_big_complete), HCI_MAX_EVENT_SIZE), - /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABILISHED] */ - HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABILISHED, + /* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABLISHED] */ + HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABLISHED, hci_le_big_sync_established_evt, sizeof(struct hci_evt_le_big_sync_estabilished), HCI_MAX_EVENT_SIZE), @@ -7150,7 +7168,8 @@ static void hci_le_meta_evt(struct hci_dev *hdev, void *data, /* Only match event if command OGF is for LE */ if (hdev->req_skb && - hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 && + (hci_opcode_ogf(hci_skb_opcode(hdev->req_skb)) == 0x08 || + hci_skb_opcode(hdev->req_skb) == HCI_OP_NOP) && hci_skb_event(hdev->req_skb) == ev->subevent) { *opcode = hci_skb_opcode(hdev->req_skb); hci_req_cmd_complete(hdev, *opcode, 0x00, req_complete, @@ -7506,8 +7525,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) goto done; } + hci_dev_lock(hdev); kfree_skb(hdev->recv_event); hdev->recv_event = skb_clone(skb, GFP_KERNEL); + hci_dev_unlock(hdev); event = hdr->evt; if (!event) { diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 609b035e5c9041..83de3847c8eaf7 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1559,7 +1559,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) { u8 bid[3]; - u8 ad[4 + 3]; + u8 ad[HCI_MAX_EXT_AD_LENGTH]; + u8 len; /* Skip if NULL adv as instance 0x00 is used for general purpose * advertising so it cannot used for the likes of Broadcast Announcement @@ -1585,8 +1586,10 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) /* Generate Broadcast ID */ get_random_bytes(bid, sizeof(bid)); - eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); - hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL); + len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); + memcpy(ad + len, adv->adv_data, adv->adv_data_len); + hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len, + ad, 0, NULL); return hci_update_adv_data_sync(hdev, adv->instance); } @@ -1603,8 +1606,15 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, if (instance) { adv = hci_find_adv_instance(hdev, instance); - /* Create an instance if that could not be found */ - if (!adv) { + if (adv) { + /* Turn it into periodic advertising */ + adv->periodic = true; + adv->per_adv_data_len = data_len; + if (data) + memcpy(adv->per_adv_data, data, data_len); + adv->flags = flags; + } else if (!adv) { + /* Create an instance if that could not be found */ adv = hci_add_per_instance(hdev, instance, flags, data_len, data, sync_interval, @@ -1812,7 +1822,8 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) return 0; } - len = eir_create_adv_data(hdev, instance, pdu->data); + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); pdu->length = len; pdu->handle = adv ? adv->handle : instance; @@ -1843,7 +1854,7 @@ static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) memset(&cp, 0, sizeof(cp)); - len = eir_create_adv_data(hdev, instance, cp.data); + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && @@ -2693,16 +2704,16 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev) /* Force address filtering if PA Sync is in progress */ if (hci_dev_test_flag(hdev, HCI_PA_SYNC)) { - struct hci_cp_le_pa_create_sync *sent; + struct hci_conn *conn; - sent = hci_sent_cmd_data(hdev, HCI_OP_LE_PA_CREATE_SYNC); - if (sent) { + conn = hci_conn_hash_lookup_create_pa_sync(hdev); + if (conn) { struct conn_params pa; memset(&pa, 0, sizeof(pa)); - bacpy(&pa.addr, &sent->addr); - pa.addr_type = sent->addr_type; + bacpy(&pa.addr, &conn->dst); + pa.addr_type = conn->dst_type; /* Clear first since there could be addresses left * behind. @@ -2860,7 +2871,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type, if (sent) { struct hci_conn *conn; - conn = hci_conn_hash_lookup_ba(hdev, ISO_LINK, + conn = hci_conn_hash_lookup_ba(hdev, BIS_LINK, &sent->bdaddr); if (conn) { struct bt_iso_qos *qos = &conn->iso_qos; @@ -5477,7 +5488,7 @@ static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, if (conn->type == LE_LINK) return hci_le_connect_cancel_sync(hdev, conn, reason); - if (conn->type == ISO_LINK) { + if (conn->type == CIS_LINK) { /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E * page 1857: * @@ -5490,9 +5501,10 @@ static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn, return hci_disconnect_sync(hdev, conn, reason); /* CIS with no Create CIS sent have nothing to cancel */ - if (bacmp(&conn->dst, BDADDR_ANY)) - return HCI_ERROR_LOCAL_HOST_TERM; + return HCI_ERROR_LOCAL_HOST_TERM; + } + if (conn->type == BIS_LINK) { /* There is no way to cancel a BIS without terminating the BIG * which is done later on connection cleanup. */ @@ -5554,9 +5566,12 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, { struct hci_cp_reject_conn_req cp; - if (conn->type == ISO_LINK) + if (conn->type == CIS_LINK) return hci_le_reject_cis_sync(hdev, conn, reason); + if (conn->type == BIS_LINK) + return -EINVAL; + if (conn->type == SCO_LINK || conn->type == ESCO_LINK) return hci_reject_sco_sync(hdev, conn, reason); @@ -6895,3 +6910,182 @@ int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } + +static void create_pa_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_conn *conn = data; + struct hci_conn *pa_sync; + + bt_dev_dbg(hdev, "err %d", err); + + if (err == -ECANCELED) + return; + + hci_dev_lock(hdev); + + hci_dev_clear_flag(hdev, HCI_PA_SYNC); + + if (!hci_conn_valid(hdev, conn)) + clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); + + if (!err) + goto unlock; + + /* Add connection to indicate PA sync error */ + pa_sync = hci_conn_add_unset(hdev, BIS_LINK, BDADDR_ANY, + HCI_ROLE_SLAVE); + + if (IS_ERR(pa_sync)) + goto unlock; + + set_bit(HCI_CONN_PA_SYNC_FAILED, &pa_sync->flags); + + /* Notify iso layer */ + hci_connect_cfm(pa_sync, bt_status(err)); + +unlock: + hci_dev_unlock(hdev); +} + +static int hci_le_pa_create_sync(struct hci_dev *hdev, void *data) +{ + struct hci_cp_le_pa_create_sync cp; + struct hci_conn *conn = data; + struct bt_iso_qos *qos = &conn->iso_qos; + int err; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + if (conn->sync_handle != HCI_SYNC_HANDLE_INVALID) + return -EINVAL; + + if (hci_dev_test_and_set_flag(hdev, HCI_PA_SYNC)) + return -EBUSY; + + /* Stop scanning if SID has not been set and active scanning is enabled + * so we use passive scanning which will be scanning using the allow + * list programmed to contain only the connection address. + */ + if (conn->sid == HCI_SID_INVALID && + hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + hci_scan_disable_sync(hdev); + hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + } + + /* Mark HCI_CONN_CREATE_PA_SYNC so hci_update_passive_scan_sync can + * program the address in the allow list so PA advertisements can be + * received. + */ + set_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); + + hci_update_passive_scan_sync(hdev); + + /* SID has not been set listen for HCI_EV_LE_EXT_ADV_REPORT to update + * it. + */ + if (conn->sid == HCI_SID_INVALID) + __hci_cmd_sync_status_sk(hdev, HCI_OP_NOP, 0, NULL, + HCI_EV_LE_EXT_ADV_REPORT, + conn->conn_timeout, NULL); + + memset(&cp, 0, sizeof(cp)); + cp.options = qos->bcast.options; + cp.sid = conn->sid; + cp.addr_type = conn->dst_type; + bacpy(&cp.addr, &conn->dst); + cp.skip = cpu_to_le16(qos->bcast.skip); + cp.sync_timeout = cpu_to_le16(qos->bcast.sync_timeout); + cp.sync_cte_type = qos->bcast.sync_cte_type; + + /* The spec allows only one pending LE Periodic Advertising Create + * Sync command at a time so we forcefully wait for PA Sync Established + * event since cmd_work can only schedule one command at a time. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2493: + * + * If the Host issues this command when another HCI_LE_Periodic_ + * Advertising_Create_Sync command is pending, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_PA_CREATE_SYNC, + sizeof(cp), &cp, + HCI_EV_LE_PA_SYNC_ESTABLISHED, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_CREATE_SYNC_CANCEL, + 0, NULL, HCI_CMD_TIMEOUT); + + return err; +} + +int hci_connect_pa_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_pa_create_sync, conn, + create_pa_complete); +} + +static void create_big_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_conn *conn = data; + + bt_dev_dbg(hdev, "err %d", err); + + if (err == -ECANCELED) + return; + + if (hci_conn_valid(hdev, conn)) + clear_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); +} + +static int hci_le_big_create_sync(struct hci_dev *hdev, void *data) +{ + DEFINE_FLEX(struct hci_cp_le_big_create_sync, cp, bis, num_bis, 0x11); + struct hci_conn *conn = data; + struct bt_iso_qos *qos = &conn->iso_qos; + int err; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + set_bit(HCI_CONN_CREATE_BIG_SYNC, &conn->flags); + + memset(cp, 0, sizeof(*cp)); + cp->handle = qos->bcast.big; + cp->sync_handle = cpu_to_le16(conn->sync_handle); + cp->encryption = qos->bcast.encryption; + memcpy(cp->bcode, qos->bcast.bcode, sizeof(cp->bcode)); + cp->mse = qos->bcast.mse; + cp->timeout = cpu_to_le16(qos->bcast.timeout); + cp->num_bis = conn->num_bis; + memcpy(cp->bis, conn->bis, conn->num_bis); + + /* The spec allows only one pending LE BIG Create Sync command at + * a time, so we forcefully wait for BIG Sync Established event since + * cmd_work can only schedule one command at a time. + * + * BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E + * page 2586: + * + * If the Host sends this command when the Controller is in the + * process of synchronizing to any BIG, i.e. the HCI_LE_BIG_Sync_ + * Established event has not been generated, the Controller shall + * return the error code Command Disallowed (0x0C). + */ + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_BIG_CREATE_SYNC, + struct_size(cp, bis, cp->num_bis), cp, + HCI_EVT_LE_BIG_SYNC_ESTABLISHED, + conn->conn_timeout, NULL); + if (err == -ETIMEDOUT) + hci_le_big_terminate_sync(hdev, cp->handle); + + return err; +} + +int hci_connect_big_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_big_create_sync, conn, + create_big_complete); +} diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 3501a991f1c64e..5389af86bdae4f 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -941,7 +941,7 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type; - if (sa->iso_bc->bc_sid > 0x0f) + if (sa->iso_bc->bc_sid > 0x0f && sa->iso_bc->bc_sid != HCI_SID_INVALID) return -EINVAL; iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid; @@ -1462,14 +1462,13 @@ static void iso_conn_big_sync(struct sock *sk) lock_sock(sk); if (!test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { - err = hci_le_big_create_sync(hdev, iso_pi(sk)->conn->hcon, - &iso_pi(sk)->qos, - iso_pi(sk)->sync_handle, - iso_pi(sk)->bc_num_bis, - iso_pi(sk)->bc_bis); + err = hci_conn_big_create_sync(hdev, iso_pi(sk)->conn->hcon, + &iso_pi(sk)->qos, + iso_pi(sk)->sync_handle, + iso_pi(sk)->bc_num_bis, + iso_pi(sk)->bc_bis); if (err) - bt_dev_err(hdev, "hci_le_big_create_sync: %d", - err); + bt_dev_err(hdev, "hci_big_create_sync: %d", err); } release_sock(sk); @@ -1922,7 +1921,7 @@ static void iso_conn_ready(struct iso_conn *conn) hcon); } else if (test_bit(HCI_CONN_BIG_SYNC_FAILED, &hcon->flags)) { ev = hci_recv_event_data(hcon->hdev, - HCI_EVT_LE_BIG_SYNC_ESTABILISHED); + HCI_EVT_LE_BIG_SYNC_ESTABLISHED); /* Get reference to PA sync parent socket, if it exists */ parent = iso_get_sock(&hcon->src, &hcon->dst, @@ -2030,6 +2029,9 @@ static bool iso_match_sid(struct sock *sk, void *data) { struct hci_ev_le_pa_sync_established *ev = data; + if (iso_pi(sk)->bc_sid == HCI_SID_INVALID) + return true; + return ev->sid == iso_pi(sk)->bc_sid; } @@ -2076,8 +2078,10 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (ev1) { sk = iso_get_sock(&hdev->bdaddr, bdaddr, BT_LISTEN, iso_match_sid, ev1); - if (sk && !ev1->status) + if (sk && !ev1->status) { iso_pi(sk)->sync_handle = le16_to_cpu(ev1->handle); + iso_pi(sk)->bc_sid = ev1->sid; + } goto done; } @@ -2113,12 +2117,11 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) if (!test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags) && !test_and_set_bit(BT_SK_BIG_SYNC, &iso_pi(sk)->flags)) { - err = hci_le_big_create_sync(hdev, - hcon, - &iso_pi(sk)->qos, - iso_pi(sk)->sync_handle, - iso_pi(sk)->bc_num_bis, - iso_pi(sk)->bc_bis); + err = hci_conn_big_create_sync(hdev, hcon, + &iso_pi(sk)->qos, + iso_pi(sk)->sync_handle, + iso_pi(sk)->bc_num_bis, + iso_pi(sk)->bc_bis); if (err) { bt_dev_err(hdev, "hci_le_big_create_sync: %d", err); @@ -2205,7 +2208,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) { - if (hcon->type != ISO_LINK) { + if (hcon->type != CIS_LINK && hcon->type != BIS_LINK) { if (hcon->type != LE_LINK) return; @@ -2246,7 +2249,7 @@ static void iso_connect_cfm(struct hci_conn *hcon, __u8 status) static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason) { - if (hcon->type != ISO_LINK) + if (hcon->type != CIS_LINK && hcon->type != BIS_LINK) return; BT_DBG("hcon %p reason %d", hcon, reason); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c7b66b2ea9f210..a5bde5db58efcb 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1411,7 +1411,8 @@ static void l2cap_request_info(struct l2cap_conn *conn) sizeof(req), &req); } -static bool l2cap_check_enc_key_size(struct hci_conn *hcon) +static bool l2cap_check_enc_key_size(struct hci_conn *hcon, + struct l2cap_chan *chan) { /* The minimum encryption key size needs to be enforced by the * host stack before establishing any L2CAP connections. The @@ -1425,7 +1426,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon) int min_key_size = hcon->hdev->min_enc_key_size; /* On FIPS security level, key size must be 16 bytes */ - if (hcon->sec_level == BT_SECURITY_FIPS) + if (chan->sec_level == BT_SECURITY_FIPS) min_key_size = 16; return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || @@ -1453,7 +1454,7 @@ static void l2cap_do_start(struct l2cap_chan *chan) !__l2cap_no_conn_pending(chan)) return; - if (l2cap_check_enc_key_size(conn->hcon)) + if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); @@ -1528,7 +1529,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) continue; } - if (l2cap_check_enc_key_size(conn->hcon)) + if (l2cap_check_enc_key_size(conn->hcon, chan)) l2cap_start_connection(chan); else l2cap_chan_close(chan, ECONNREFUSED); @@ -3991,7 +3992,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(L2CAP_PSM_SDP) && - !hci_conn_check_link_mode(conn->hcon)) { + (!hci_conn_check_link_mode(conn->hcon) || + !l2cap_check_enc_key_size(conn->hcon, pchan))) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; @@ -4868,7 +4870,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { - result = L2CAP_CR_LE_AUTHENTICATION; + result = pchan->sec_level == BT_SECURITY_MEDIUM ? + L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION; chan = NULL; goto response_unlock; } @@ -7351,7 +7354,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } if (chan->state == BT_CONNECT) { - if (!status && l2cap_check_enc_key_size(hcon)) + if (!status && l2cap_check_enc_key_size(hcon, chan)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); @@ -7361,7 +7364,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) struct l2cap_conn_rsp rsp; __u16 res, stat; - if (!status && l2cap_check_enc_key_size(hcon)) { + if (!status && l2cap_check_enc_key_size(hcon, chan)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { res = L2CAP_CR_PEND; stat = L2CAP_CS_AUTHOR_PEND; @@ -7414,6 +7417,9 @@ static int l2cap_recv_frag(struct l2cap_conn *conn, struct sk_buff *skb, return -ENOMEM; /* Init rx_len */ conn->rx_len = len; + + skb_set_delivery_time(conn->rx_skb, skb->tstamp, + skb->tstamp_type); } /* Copy as much as the rx_skb can hold */ @@ -7538,8 +7544,24 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (skb->len > len) { BT_ERR("Frame is too long (len %u, expected len %d)", skb->len, len); + /* PTS test cases L2CAP/COS/CED/BI-14-C and BI-15-C + * (Multiple Signaling Command in one PDU, Data + * Truncated, BR/EDR) send a C-frame to the IUT with + * PDU Length set to 8 and Channel ID set to the + * correct signaling channel for the logical link. + * The Information payload contains one L2CAP_ECHO_REQ + * packet with Data Length set to 0 with 0 octets of + * echo data and one invalid command packet due to + * data truncated in PDU but present in HCI packet. + * + * Shorter the socket buffer to the PDU length to + * allow to process valid commands from the PDU before + * setting the socket unreliable. + */ + skb->len = len; + l2cap_recv_frame(conn, skb); l2cap_conn_unreliable(conn, ECOMM); - goto drop; + goto unlock; } /* Append fragment into frame (with header) */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c1e1e529e26cc2..d540f7b4f75fbf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1447,22 +1447,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); - list_del(&cmd->list); - if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } - - mgmt_pending_free(cmd); } static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); - mgmt_pending_remove(cmd); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status); } static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) @@ -1476,8 +1471,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) if (cmd->cmd_complete) { cmd->cmd_complete(cmd, match->mgmt_status); - mgmt_pending_remove(cmd); - return; } @@ -1486,13 +1479,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, cmd->param_len); } static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, sizeof(struct mgmt_addr_info)); } @@ -1532,7 +1525,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto done; } @@ -1707,7 +1700,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -1943,8 +1936,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) new_settings(hdev, NULL); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, - &mgmt_err); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, + cmd_status_rsp, &mgmt_err); return; } @@ -1954,7 +1947,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -2074,12 +2067,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, - &status); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp, + &status); return; } - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -2138,7 +2131,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) struct sock *sk = cmd->sk; if (status) { - mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, + mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); return; } @@ -2566,7 +2559,8 @@ static int mgmt_hci_cmd_sync(struct sock *sk, struct hci_dev *hdev, struct mgmt_pending_cmd *cmd; int err; - if (len < sizeof(*cp)) + if (len != (offsetof(struct mgmt_cp_hci_cmd_sync, params) + + le16_to_cpu(cp->params_len))) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_HCI_CMD_SYNC, MGMT_STATUS_INVALID_PARAMS); @@ -2637,7 +2631,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), hdev->dev_class, 3); mgmt_pending_free(cmd); @@ -3221,7 +3215,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, static u8 link_to_bdaddr(u8 link_type, u8 addr_type) { switch (link_type) { - case ISO_LINK: + case CIS_LINK: + case BIS_LINK: case LE_LINK: switch (addr_type) { case ADDR_LE_DEV_PUBLIC: @@ -3425,7 +3420,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, + err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE, status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ @@ -5106,24 +5101,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle) +static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, + __le16 handle) { struct mgmt_ev_adv_monitor_removed ev; - struct mgmt_pending_cmd *cmd; - struct sock *sk_skip = NULL; - struct mgmt_cp_remove_adv_monitor *cp; - cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); - if (cmd) { - cp = cmd->param; + ev.monitor_handle = handle; - if (cp->monitor_handle) - sk_skip = cmd->sk; - } - - ev.monitor_handle = cpu_to_le16(handle); - - mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip); + mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, @@ -5194,7 +5179,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -5225,8 +5210,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || - pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) { + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } @@ -5396,8 +5380,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_adv_monitor *cp; - if (status == -ECANCELED || - cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) + if (status == -ECANCELED) return; hci_dev_lock(hdev); @@ -5406,12 +5389,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, rp.monitor_handle = cp->monitor_handle; - if (!status) + if (!status) { + mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle); hci_update_passive_scan(hdev); + } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "remove monitor %d complete, status %d", @@ -5421,10 +5406,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - - if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) - return -ECANCELED; - struct mgmt_cp_remove_adv_monitor *cp = cmd->param; u16 handle = __le16_to_cpu(cp->monitor_handle); @@ -5443,14 +5424,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (pending_find(MGMT_OP_SET_LE, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); + cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); if (!cmd) { status = MGMT_STATUS_NO_RESOURCES; goto unlock; @@ -5460,7 +5440,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, mgmt_remove_adv_monitor_complete); if (err) { - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; @@ -5790,7 +5770,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) return; - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6011,7 +5991,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6236,7 +6216,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) u8 status = mgmt_status(err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, cmd_status_rsp, &status); return; } @@ -6246,7 +6226,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) else hci_dev_clear_flag(hdev, HCI_ADVERTISING); - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -6590,7 +6570,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) */ hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); @@ -6727,7 +6707,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -7174,7 +7154,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) rp.max_tx_power = HCI_TX_POWER_INVALID; } - mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -7334,7 +7314,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) } complete: - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -7506,11 +7486,16 @@ static void add_device_complete(struct hci_dev *hdev, void *data, int err) struct mgmt_cp_add_device *cp = cmd->param; if (!err) { + struct hci_conn_params *params; + + params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + device_added(cmd->sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action); device_flags_changed(NULL, hdev, &cp->addr.bdaddr, cp->addr.type, hdev->conn_flags, - PTR_UINT(cmd->user_data)); + params ? params->flags : 0); } mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_ADD_DEVICE, @@ -7613,8 +7598,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - cmd->user_data = UINT_PTR(current_flags); - err = hci_cmd_sync_queue(hdev, add_device_sync, cmd, add_device_complete); if (err < 0) { @@ -8581,10 +8564,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); add_adv_complete(hdev, cmd->sk, cp->instance, err); @@ -8772,10 +8755,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, hci_remove_adv_instance(hdev, cp->instance); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); } else { - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); } @@ -8922,10 +8905,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9084,10 +9067,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data, rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9359,7 +9342,7 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, @@ -9397,7 +9380,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err) hci_update_passive_scan(hdev); } - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); new_settings(hdev, match.sk); @@ -9412,7 +9396,8 @@ void __mgmt_power_off(struct hci_dev *hdev) struct cmd_lookup match = { NULL, hdev }; u8 zero_cod[] = { 0, 0, 0 }; - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); /* If the power off is because of hdev unregistration let * use the appropriate INVALID_INDEX status. Otherwise use @@ -9426,7 +9411,7 @@ void __mgmt_power_off(struct hci_dev *hdev) else match.mgmt_status = MGMT_STATUS_NOT_POWERED; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, @@ -9667,7 +9652,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); cmd->cmd_complete(cmd, 0); - mgmt_pending_remove(cmd); } bool mgmt_powering_down(struct hci_dev *hdev) @@ -9723,8 +9707,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_cp_disconnect *cp; struct mgmt_pending_cmd *cmd; - mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, - hdev); + mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true, + unpair_device_rsp, hdev); cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) @@ -9917,7 +9901,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) if (status) { u8 mgmt_err = mgmt_status(status); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, cmd_status_rsp, &mgmt_err); return; } @@ -9927,8 +9911,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) else changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, - &match); + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, + settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -9952,9 +9936,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, { struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); + mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup, + &match); if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index e5ff65e424b5b4..a88a07da394734 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -217,30 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev) { - struct mgmt_pending_cmd *cmd; + struct mgmt_pending_cmd *cmd, *tmp; + + mutex_lock(&hdev->mgmt_pending_lock); - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (hci_sock_get_channel(cmd->sk) != channel) continue; - if (cmd->opcode == opcode) + + if (cmd->opcode == opcode) { + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; + } } + mutex_unlock(&hdev->mgmt_pending_lock); + return NULL; } -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data) { struct mgmt_pending_cmd *cmd, *tmp; + mutex_lock(&hdev->mgmt_pending_lock); + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; + if (remove) + list_del(&cmd->list); + cb(cmd, data); + + if (remove) + mgmt_pending_free(cmd); } + + mutex_unlock(&hdev->mgmt_pending_lock); } struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, @@ -254,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, return NULL; cmd->opcode = opcode; - cmd->index = hdev->id; + cmd->hdev = hdev; cmd->param = kmemdup(data, len, GFP_KERNEL); if (!cmd->param) { @@ -280,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, if (!cmd) return NULL; + mutex_lock(&hdev->mgmt_pending_lock); list_add_tail(&cmd->list, &hdev->mgmt_pending); + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; } @@ -294,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd) void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) { + mutex_lock(&cmd->hdev->mgmt_pending_lock); list_del(&cmd->list); + mutex_unlock(&cmd->hdev->mgmt_pending_lock); + mgmt_pending_free(cmd); } @@ -304,7 +326,7 @@ void mgmt_mesh_foreach(struct hci_dev *hdev, { struct mgmt_mesh_tx *mesh_tx, *tmp; - list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) { + list_for_each_entry_safe(mesh_tx, tmp, &hdev->mesh_pending, list) { if (!sk || mesh_tx->sk == sk) cb(mesh_tx, data); } diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index f2ba994ab1d847..024e51dd693756 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -33,7 +33,7 @@ struct mgmt_mesh_tx { struct mgmt_pending_cmd { struct list_head list; u16 opcode; - int index; + struct hci_dev *hdev; void *param; size_t param_len; struct sock *sk; @@ -54,7 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev); -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data); struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 98aea5485aaef4..a8c67035e23c00 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = { * ipt_REJECT needs it. Future netfilter modules might * require us to fill additional fields. */ -static const u32 br_dst_default_metrics[RTAX_MAX] = { - [RTAX_MTU - 1] = 1500, -}; - void br_netfilter_rtable_init(struct net_bridge *br) { struct rtable *rt = &br->fake_rtable; rcuref_init(&rt->dst.__rcuref, 1); rt->dst.dev = br->dev; - dst_init_metrics(&rt->dst, br_dst_default_metrics, true); + dst_init_metrics(&rt->dst, br->metrics, false); + dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu); rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; rt->dst.ops = &fake_dst_ops; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d5b3c5936a79e1..4715a8d6dc3266 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -505,6 +505,7 @@ struct net_bridge { struct rtable fake_rtable; struct rt6_info fake_rt6_info; }; + u32 metrics[RTAX_MAX]; #endif u16 group_fwd_mask; u16 group_fwd_mask_required; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d9a69ec9affe59..939a3aa78d5c46 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -715,8 +715,8 @@ static int br_vlan_add_existing(struct net_bridge *br, u16 flags, bool *changed, struct netlink_ext_ack *extack) { - bool would_change = __vlan_flags_would_change(vlan, flags); bool becomes_brentry = false; + bool would_change = false; int err; if (!br_vlan_is_brentry(vlan)) { @@ -725,6 +725,8 @@ static int br_vlan_add_existing(struct net_bridge *br, return -EINVAL; becomes_brentry = true; + } else { + would_change = __vlan_flags_would_change(vlan, flags); } /* Master VLANs that aren't brentries weren't notified before, diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 816bb0fde718ed..6482de4d875092 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -60,19 +60,19 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct ip_fraglist_iter iter; struct sk_buff *frag; - if (first_len - hlen > mtu || - skb_headroom(skb) < ll_rs) + if (first_len - hlen > mtu) goto blackhole; - if (skb_cloned(skb)) + if (skb_cloned(skb) || + skb_headroom(skb) < ll_rs) goto slow_path; skb_walk_frags(skb, frag) { - if (frag->len > mtu || - skb_headroom(frag) < hlen + ll_rs) + if (frag->len > mtu) goto blackhole; - if (skb_shared(frag)) + if (skb_shared(frag) || + skb_headroom(frag) < hlen + ll_rs) goto slow_path; } diff --git a/net/can/bcm.c b/net/can/bcm.c index 0bca1b9b3f7072..6bc1cc4c94c5ef 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,7 @@ struct bcm_op { struct canfd_frame last_sframe; struct sock *sk; struct net_device *rx_reg_dev; + spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */ }; struct bcm_sock { @@ -217,7 +219,9 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex)); seq_printf(m, " <<<\n"); - list_for_each_entry(op, &bo->rx_ops, list) { + rcu_read_lock(); + + list_for_each_entry_rcu(op, &bo->rx_ops, list) { unsigned long reduction; @@ -273,6 +277,9 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, "# sent %ld\n", op->frames_abs); } seq_putc(m, '\n'); + + rcu_read_unlock(); + return 0; } #endif /* CONFIG_PROC_FS */ @@ -285,13 +292,18 @@ static void bcm_can_tx(struct bcm_op *op) { struct sk_buff *skb; struct net_device *dev; - struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe; + struct canfd_frame *cf; int err; /* no target device? => exit */ if (!op->ifindex) return; + /* read currframe under lock protection */ + spin_lock_bh(&op->bcm_tx_lock); + cf = op->frames + op->cfsiz * op->currframe; + spin_unlock_bh(&op->bcm_tx_lock); + dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (!dev) { /* RFC: should this bcm_op remove itself here? */ @@ -312,6 +324,10 @@ static void bcm_can_tx(struct bcm_op *op) skb->dev = dev; can_skb_set_owner(skb, op->sk); err = can_send(skb, 1); + + /* update currframe and count under lock protection */ + spin_lock_bh(&op->bcm_tx_lock); + if (!err) op->frames_abs++; @@ -320,6 +336,11 @@ static void bcm_can_tx(struct bcm_op *op) /* reached last frame? */ if (op->currframe >= op->nframes) op->currframe = 0; + + if (op->count > 0) + op->count--; + + spin_unlock_bh(&op->bcm_tx_lock); out: dev_put(dev); } @@ -430,7 +451,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { - op->count--; + bcm_can_tx(op); if (!op->count && (op->flags & TX_COUNTEVT)) { /* create notification to user */ @@ -445,7 +466,6 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) bcm_send_to_user(op, &msg_head, NULL, 0); } - bcm_can_tx(op); } else if (op->kt_ival2) { bcm_can_tx(op); @@ -843,7 +863,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, REGMASK(op->can_id), bcm_rx_handler, op); - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } @@ -863,7 +883,7 @@ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh, list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return 1; /* done */ } @@ -956,6 +976,27 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } op->flags = msg_head->flags; + /* only lock for unlikely count/nframes/currframe changes */ + if (op->nframes != msg_head->nframes || + op->flags & TX_RESET_MULTI_IDX || + op->flags & SETTIMER) { + + spin_lock_bh(&op->bcm_tx_lock); + + if (op->nframes != msg_head->nframes || + op->flags & TX_RESET_MULTI_IDX) { + /* potentially update changed nframes */ + op->nframes = msg_head->nframes; + /* restart multiple frame transmission */ + op->currframe = 0; + } + + if (op->flags & SETTIMER) + op->count = msg_head->count; + + spin_unlock_bh(&op->bcm_tx_lock); + } + } else { /* insert new BCM operation for the given can_id */ @@ -963,9 +1004,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, if (!op) return -ENOMEM; + spin_lock_init(&op->bcm_tx_lock); op->can_id = msg_head->can_id; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; + op->nframes = msg_head->nframes; + + if (op->flags & SETTIMER) + op->count = msg_head->count; /* create array for CAN frames and copy the data */ if (msg_head->nframes > 1) { @@ -1023,22 +1069,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */ - if (op->nframes != msg_head->nframes) { - op->nframes = msg_head->nframes; - /* start multiple frame transmission with index 0 */ - op->currframe = 0; - } - - /* check flags */ - - if (op->flags & TX_RESET_MULTI_IDX) { - /* start multiple frame transmission with index 0 */ - op->currframe = 0; - } - if (op->flags & SETTIMER) { /* set timer values */ - op->count = msg_head->count; op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); @@ -1055,11 +1087,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->flags |= TX_ANNOUNCE; } - if (op->flags & TX_ANNOUNCE) { + if (op->flags & TX_ANNOUNCE) bcm_can_tx(op); - if (op->count) - op->count--; - } if (op->flags & STARTTIMER) bcm_tx_start_timer(op); @@ -1272,7 +1301,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ - list_del(&op->list); + list_del_rcu(&op->list); bcm_remove_op(op); return err; } diff --git a/net/can/gw.c b/net/can/gw.c index ef93293c1fae39..55eccb1c7620c0 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -130,7 +130,7 @@ struct cgw_job { u32 handled_frames; u32 dropped_frames; u32 deleted_frames; - struct cf_mod mod; + struct cf_mod __rcu *cf_mod; union { /* CAN frame data source */ struct net_device *dev; @@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; + struct cf_mod *mod; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ @@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ - if (gwj->mod.modfunc[0]) + mod = rcu_dereference(gwj->cf_mod); + if (mod->modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ - while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) - (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx]) + (*mod->modfunc[modidx++])(cf, mod); /* Has the CAN frame been modified? */ if (modidx) { @@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) } /* check for checksum updates */ - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (mod->csumfunc.crc8) + (*mod->csumfunc.crc8)(cf, &mod->csum.crc8); - if (gwj->mod.csumfunc.xor) - (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + if (mod->csumfunc.xor) + (*mod->csumfunc.xor)(cf, &mod->csum.xor); } /* clear the skb timestamp if not configured the other way */ @@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + /* cgw_job::cf_mod is always accessed from the same cgw_job object within + * the same RCU read section. Once cgw_job is scheduled for removal, + * cf_mod can also be removed without mandating an additional grace period. + */ + kfree(rcu_access_pointer(gwj->cf_mod)); kmem_cache_free(cgw_cache, gwj); } +/* Return cgw_job::cf_mod with RTNL protected section */ +static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj) +{ + return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked()); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; + struct cf_mod *mod; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) @@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + mod = cgw_job_cf_mod(gwj); if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } - if (gwj->mod.uid) { - if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + if (mod->uid) { + if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0) goto cancel; } - if (gwj->mod.csumfunc.crc8) { + if (mod->csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, - &gwj->mod.csum.crc8) < 0) + &mod->csum.crc8) < 0) goto cancel; } - if (gwj->mod.csumfunc.xor) { + if (mod->csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, - &gwj->mod.csum.xor) < 0) + &mod->csum.xor) < 0) goto cancel; } @@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; - struct cf_mod mod; + struct cf_mod *mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; - err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + mod = kmalloc(sizeof(*mod), GFP_KERNEL); + if (!mod) + return -ENOMEM; + + err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); if (err < 0) - return err; + goto out_free_cf; - if (mod.uid) { + if (mod->uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) + struct cf_mod *old_cf; + + old_cf = cgw_job_cf_mod(gwj); + if (old_cf->uid != mod->uid) continue; /* interfaces & filters must be identical */ - if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) - return -EINVAL; + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) { + err = -EINVAL; + goto out_free_cf; + } - /* update modifications with disabled softirq & quit */ - local_bh_disable(); - memcpy(&gwj->mod, &mod, sizeof(mod)); - local_bh_enable(); + rcu_assign_pointer(gwj->cf_mod, mod); + kfree_rcu_mightsleep(old_cf); return 0; } } /* ifindex == 0 is not allowed for job creation */ - if (!ccgw.src_idx || !ccgw.dst_idx) - return -ENODEV; + if (!ccgw.src_idx || !ccgw.dst_idx) { + err = -ENODEV; + goto out_free_cf; + } gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); - if (!gwj) - return -ENOMEM; + if (!gwj) { + err = -ENOMEM; + goto out_free_cf; + } gwj->handled_frames = 0; gwj->dropped_frames = 0; @@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->limit_hops = limhops; /* insert already parsed information */ - memcpy(&gwj->mod, &mod, sizeof(mod)); + RCU_INIT_POINTER(gwj->cf_mod, mod); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; @@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: - if (err) + if (err) { kmem_cache_free(cgw_cache, gwj); - +out_free_cf: + kfree(mod); + } return err; } @@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { + struct cf_mod *cf_mod; + if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; + cf_mod = cgw_job_cf_mod(gwj); /* we have a match when uid is enabled and identical */ - if (gwj->mod.uid || mod.uid) { - if (gwj->mod.uid != mod.uid) + if (cf_mod->uid || mod.uid) { + if (cf_mod->uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ - if (memcmp(&gwj->mod, &mod, sizeof(mod))) + if (memcmp(cf_mod, &mod, sizeof(mod))) continue; } diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 17226b2341d03d..6fefe7a6876116 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -655,6 +655,7 @@ static int j1939_sk_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); sock_put(sk); return 0; diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index c5c4eef3a9ff13..0aa21fcbf6ece5 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -2,7 +2,7 @@ config CEPH_LIB tristate "Ceph core library" depends on INET - select LIBCRC32C + select CRC32 select CRYPTO_AES select CRYPTO_CBC select CRYPTO_GCM diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b24afec241382b..6664ea73ccf81b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -220,16 +220,6 @@ void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req, } EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages); -void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, extent, osd_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); -} -EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist); - #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -297,19 +287,6 @@ static void osd_req_op_cls_request_info_pagelist( ceph_osd_data_pagelist_init(osd_data, pagelist); } -void osd_req_op_cls_request_data_pagelist( - struct ceph_osd_request *osd_req, - unsigned int which, struct ceph_pagelist *pagelist) -{ - struct ceph_osd_data *osd_data; - - osd_data = osd_req_op_data(osd_req, which, cls, request_data); - ceph_osd_data_pagelist_init(osd_data, pagelist); - osd_req->r_ops[which].cls.indata_len += pagelist->length; - osd_req->r_ops[which].indata_len += pagelist->length; -} -EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist); - void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req, unsigned int which, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages) diff --git a/net/core/dev.c b/net/core/dev.c index 0608605cfc242f..2b20aadaf9268d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1518,16 +1518,10 @@ void netdev_features_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_features_change); -/** - * netdev_state_change - device changes state - * @dev: device to cause notification - * - * Called to indicate a device has changed state. This function calls - * the notifier chains for netdev_chain and sends a NEWLINK message - * to the routing socket. - */ -void netdev_state_change(struct net_device *dev) +void netif_state_change(struct net_device *dev) { + netdev_ops_assert_locked_or_invisible(dev); + if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { .info.dev = dev, @@ -1538,7 +1532,6 @@ void netdev_state_change(struct net_device *dev) rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL); } } -EXPORT_SYMBOL(netdev_state_change); /** * __netdev_notify_peers - notify network peers about existence of @dev, @@ -9200,18 +9193,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; int err; @@ -9223,7 +9205,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } -EXPORT_SYMBOL(dev_set_promiscuity); int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { @@ -10412,7 +10393,7 @@ static void dev_index_release(struct net *net, int ifindex) static bool from_cleanup_net(void) { #ifdef CONFIG_NET_NS - return current == cleanup_net_task; + return current == READ_ONCE(cleanup_net_task); #else return false; #endif @@ -10460,6 +10441,7 @@ static void netdev_sync_lower_features(struct net_device *upper, if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", &feature, lower->name); + netdev_lock_ops(lower); lower->wanted_features &= ~feature; __netdev_update_features(lower); @@ -10468,6 +10450,7 @@ static void netdev_sync_lower_features(struct net_device *upper, &feature, lower->name); else netdev_features_change(lower); + netdev_unlock_ops(lower); } } } @@ -11941,15 +11924,24 @@ void unregister_netdevice_many_notify(struct list_head *head, BUG_ON(dev->reg_state != NETREG_REGISTERED); } - /* If device is running, close it first. */ + /* If device is running, close it first. Start with ops locked... */ list_for_each_entry(dev, head, unreg_list) { - list_add_tail(&dev->close_list, &close_head); - netdev_lock_ops(dev); + if (netdev_need_ops_lock(dev)) { + list_add_tail(&dev->close_list, &close_head); + netdev_lock(dev); + } + } + dev_close_many(&close_head, true); + /* ... now unlock them and go over the rest. */ + list_for_each_entry(dev, head, unreg_list) { + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); + else + list_add_tail(&dev->close_list, &close_head); } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { - netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); @@ -11964,9 +11956,9 @@ void unregister_netdevice_many_notify(struct list_head *head, struct sk_buff *skb = NULL; /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); dev_tcx_uninstall(dev); - netdev_lock_ops(dev); dev_xdp_uninstall(dev); dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); @@ -12159,7 +12151,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, synchronize_net(); /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); + netdev_unlock_ops(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90bafb0b1b8c3f..f9a160ab596f3a 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -267,6 +267,29 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +/** + * dev_set_promiscuity() - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. + */ +int dev_set_promiscuity(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_promiscuity(dev, inc); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_promiscuity); + /** * dev_set_allmulti() - update allmulti count on a device * @dev: device @@ -327,3 +350,19 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) return ret; } EXPORT_SYMBOL_GPL(dev_xdp_propagate); + +/** + * netdev_state_change() - device changes state + * @dev: device to cause notification + * + * Called to indicate a device has changed state. This function calls + * the notifier chains for netdev_chain and sends a NEWLINK message + * to the routing socket. + */ +void netdev_state_change(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_state_change(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(netdev_state_change); diff --git a/net/core/devmem.c b/net/core/devmem.c index 6e27a47d049354..2db428ab6b8be4 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -200,6 +200,8 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, refcount_set(&binding->ref, 1); + mutex_init(&binding->lock); + binding->dmabuf = dmabuf; binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); @@ -379,6 +381,11 @@ static void mp_dmabuf_devmem_uninstall(void *mp_priv, xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) { if (bound_rxq == rxq) { xa_erase(&binding->bound_rxqs, xa_idx); + if (xa_empty(&binding->bound_rxqs)) { + mutex_lock(&binding->lock); + binding->dev = NULL; + mutex_unlock(&binding->lock); + } break; } } diff --git a/net/core/devmem.h b/net/core/devmem.h index 7fc158d527293b..a1aabc9685cc67 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -20,6 +20,8 @@ struct net_devmem_dmabuf_binding { struct sg_table *sgt; struct net_device *dev; struct gen_pool *chunk_pool; + /* Protect dev */ + struct mutex lock; /* The user holds a ref (via the netlink API) for as long as they want * the binding to remain alive. Each page pool using this binding holds diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 4bc64d912a1c00..7af302080a6608 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -257,6 +257,24 @@ static int nla_put_port_range(struct sk_buff *skb, int attrtype, return nla_put(skb, attrtype, sizeof(*range), range); } +static bool fib_rule_iif_match(const struct fib_rule *rule, int iifindex, + const struct flowi *fl) +{ + u8 iif_is_l3_master = READ_ONCE(rule->iif_is_l3_master); + + return iif_is_l3_master ? l3mdev_fib_rule_iif_match(fl, iifindex) : + fl->flowi_iif == iifindex; +} + +static bool fib_rule_oif_match(const struct fib_rule *rule, int oifindex, + const struct flowi *fl) +{ + u8 oif_is_l3_master = READ_ONCE(rule->oif_is_l3_master); + + return oif_is_l3_master ? l3mdev_fib_rule_oif_match(fl, oifindex) : + fl->flowi_oif == oifindex; +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -264,11 +282,11 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, int iifindex, oifindex, ret = 0; iifindex = READ_ONCE(rule->iifindex); - if (iifindex && (iifindex != fl->flowi_iif)) + if (iifindex && !fib_rule_iif_match(rule, iifindex, fl)) goto out; oifindex = READ_ONCE(rule->oifindex); - if (oifindex && (oifindex != fl->flowi_oif)) + if (oifindex && !fib_rule_oif_match(rule, oifindex, fl)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) @@ -736,16 +754,20 @@ static int fib_nl2rule_rtnl(struct fib_rule *nlrule, struct net_device *dev; dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname); - if (dev) + if (dev) { nlrule->iifindex = dev->ifindex; + nlrule->iif_is_l3_master = netif_is_l3_master(dev); + } } if (tb[FRA_OIFNAME]) { struct net_device *dev; dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname); - if (dev) + if (dev) { nlrule->oifindex = dev->ifindex; + nlrule->oif_is_l3_master = netif_is_l3_master(dev); + } } return 0; @@ -1336,11 +1358,17 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == -1 && - strcmp(dev->name, rule->iifname) == 0) + strcmp(dev->name, rule->iifname) == 0) { WRITE_ONCE(rule->iifindex, dev->ifindex); + WRITE_ONCE(rule->iif_is_l3_master, + netif_is_l3_master(dev)); + } if (rule->oifindex == -1 && - strcmp(dev->name, rule->oifname) == 0) + strcmp(dev->name, rule->oifname) == 0) { WRITE_ONCE(rule->oifindex, dev->ifindex); + WRITE_ONCE(rule->oif_is_l3_master, + netif_is_l3_master(dev)); + } } } @@ -1349,10 +1377,14 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) struct fib_rule *rule; list_for_each_entry(rule, rules, list) { - if (rule->iifindex == dev->ifindex) + if (rule->iifindex == dev->ifindex) { WRITE_ONCE(rule->iifindex, -1); - if (rule->oifindex == dev->ifindex) + WRITE_ONCE(rule->iif_is_l3_master, false); + } + if (rule->oifindex == dev->ifindex) { WRITE_ONCE(rule->oifindex, -1); + WRITE_ONCE(rule->oif_is_l3_master, false); + } } } diff --git a/net/core/filter.c b/net/core/filter.c index bc6828761a47c0..357d26b76c22d9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -218,24 +218,36 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) return 0; } +static int bpf_skb_load_helper_convert_offset(const struct sk_buff *skb, int offset) +{ + if (likely(offset >= 0)) + return offset; + + if (offset >= SKF_NET_OFF) + return offset - SKF_NET_OFF + skb_network_offset(skb); + + if (offset >= SKF_LL_OFF && skb_mac_header_was_set(skb)) + return offset - SKF_LL_OFF + skb_mac_offset(skb); + + return INT_MIN; +} + BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - u8 tmp, *ptr; + u8 tmp; const int len = sizeof(tmp); - if (offset >= 0) { - if (headlen - offset >= len) - return *(u8 *)(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return tmp; - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return *(u8 *)ptr; - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return *(u8 *)(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return tmp; + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, @@ -248,21 +260,19 @@ BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - __be16 tmp, *ptr; + __be16 tmp; const int len = sizeof(tmp); - if (offset >= 0) { - if (headlen - offset >= len) - return get_unaligned_be16(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return be16_to_cpu(tmp); - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return get_unaligned_be16(ptr); - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return get_unaligned_be16(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return be16_to_cpu(tmp); + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, @@ -275,21 +285,19 @@ BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb, BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *, data, int, headlen, int, offset) { - __be32 tmp, *ptr; + __be32 tmp; const int len = sizeof(tmp); - if (likely(offset >= 0)) { - if (headlen - offset >= len) - return get_unaligned_be32(data + offset); - if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) - return be32_to_cpu(tmp); - } else { - ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len); - if (likely(ptr)) - return get_unaligned_be32(ptr); - } + offset = bpf_skb_load_helper_convert_offset(skb, offset); + if (offset == INT_MIN) + return -EFAULT; - return -EFAULT; + if (headlen - offset >= len) + return get_unaligned_be32(data + offset); + if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp))) + return be32_to_cpu(tmp); + else + return -EFAULT; } BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, @@ -1960,10 +1968,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; bool do_mforce = flags & BPF_F_MARK_ENFORCE; + bool is_ipv6 = flags & BPF_F_IPV6; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | - BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6))) return -EINVAL; if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; @@ -1979,7 +1988,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, if (unlikely(from != 0)) return -EINVAL; - inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); + inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6); break; case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); @@ -2501,6 +2510,7 @@ int skb_do_redirect(struct sk_buff *skb) goto out_drop; skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); + skb_scrub_packet(skb, false); return -EAGAIN; } return flags & BPF_F_NEIGH ? diff --git a/net/core/link_watch.c b/net/core/link_watch.c index cb04ef2b9807c9..864f3bbc3a4c50 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -183,7 +183,7 @@ static void linkwatch_do_dev(struct net_device *dev) else dev_deactivate(dev); - netdev_state_change(dev); + netif_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). @@ -240,7 +240,9 @@ static void __linkwatch_run_queue(int urgent_only) */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); + netdev_lock_ops(dev); linkwatch_do_dev(dev); + netdev_unlock_ops(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } @@ -253,25 +255,41 @@ static void __linkwatch_run_queue(int urgent_only) spin_unlock_irq(&lweventlist_lock); } -void linkwatch_sync_dev(struct net_device *dev) +static bool linkwatch_clean_dev(struct net_device *dev) { unsigned long flags; - int clean = 0; + bool clean = false; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); - clean = 1; + clean = true; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); - if (clean) + + return clean; +} + +void __linkwatch_sync_dev(struct net_device *dev) +{ + netdev_ops_assert_locked(dev); + + if (linkwatch_clean_dev(dev)) linkwatch_do_dev(dev); } +void linkwatch_sync_dev(struct net_device *dev) +{ + if (linkwatch_clean_dev(dev)) { + netdev_lock_ops(dev); + linkwatch_do_dev(dev); + netdev_unlock_ops(dev); + } +} /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c index b7f22dc92a6f30..941e26c1343de4 100644 --- a/net/core/lock_debug.c +++ b/net/core/lock_debug.c @@ -20,11 +20,11 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event, switch (cmd) { case NETDEV_REGISTER: case NETDEV_UP: + case NETDEV_CHANGE: netdev_ops_assert_locked(dev); fallthrough; case NETDEV_DOWN: case NETDEV_REBOOT: - case NETDEV_CHANGE: case NETDEV_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index e39a459540ec02..60f27cb4e54f18 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -333,6 +333,8 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; int ret; + local_bh_disable(); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); @@ -348,8 +350,10 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || - lwtstate->type > LWTUNNEL_ENCAP_MAX) - return 0; + lwtstate->type > LWTUNNEL_ENCAP_MAX) { + ret = 0; + goto out; + } ret = -EOPNOTSUPP; rcu_read_lock(); @@ -364,11 +368,13 @@ int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (ret == -EOPNOTSUPP) goto drop; - return ret; + goto out; drop: kfree_skb(skb); +out: + local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_output); @@ -380,6 +386,8 @@ int lwtunnel_xmit(struct sk_buff *skb) struct dst_entry *dst; int ret; + local_bh_disable(); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); @@ -396,8 +404,10 @@ int lwtunnel_xmit(struct sk_buff *skb) lwtstate = dst->lwtstate; if (lwtstate->type == LWTUNNEL_ENCAP_NONE || - lwtstate->type > LWTUNNEL_ENCAP_MAX) - return 0; + lwtstate->type > LWTUNNEL_ENCAP_MAX) { + ret = 0; + goto out; + } ret = -EOPNOTSUPP; rcu_read_lock(); @@ -412,11 +422,13 @@ int lwtunnel_xmit(struct sk_buff *skb) if (ret == -EOPNOTSUPP) goto drop; - return ret; + goto out; drop: kfree_skb(skb); +out: + local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(lwtunnel_xmit); @@ -428,6 +440,8 @@ int lwtunnel_input(struct sk_buff *skb) struct dst_entry *dst; int ret; + DEBUG_NET_WARN_ON_ONCE(!in_softirq()); + if (dev_xmit_recursion()) { net_crit_ratelimited("%s(): recursion limit reached on datapath\n", __func__); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b0dfdf791ece5a..599f6a89ae581e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -600,7 +600,7 @@ static void cleanup_net(struct work_struct *work) LIST_HEAD(net_exit_list); LIST_HEAD(dev_kill_list); - cleanup_net_task = current; + WRITE_ONCE(cleanup_net_task, current); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); @@ -676,7 +676,7 @@ static void cleanup_net(struct work_struct *work) put_user_ns(net->user_ns); net_passive_dec(net); } - cleanup_net_task = NULL; + WRITE_ONCE(cleanup_net_task, NULL); } /** diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 5d7af50fe70272..a877693fecd65a 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -708,25 +708,66 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, return 0; } +/** + * netdev_stat_queue_sum() - add up queue stats from range of queues + * @netdev: net_device + * @rx_start: index of the first Rx queue to query + * @rx_end: index after the last Rx queue (first *not* to query) + * @rx_sum: output Rx stats, should be already initialized + * @tx_start: index of the first Tx queue to query + * @tx_end: index after the last Tx queue (first *not* to query) + * @tx_sum: output Tx stats, should be already initialized + * + * Add stats from [start, end) range of queue IDs to *x_sum structs. + * The sum structs must be already initialized. Usually this + * helper is invoked from the .get_base_stats callbacks of drivers + * to account for stats of disabled queues. In that case the ranges + * are usually [netdev->real_num_*x_queues, netdev->num_*x_queues). + */ +void netdev_stat_queue_sum(struct net_device *netdev, + int rx_start, int rx_end, + struct netdev_queue_stats_rx *rx_sum, + int tx_start, int tx_end, + struct netdev_queue_stats_tx *tx_sum) +{ + const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx; + struct netdev_queue_stats_tx tx; + int i; + + ops = netdev->stat_ops; + + for (i = rx_start; i < rx_end; i++) { + memset(&rx, 0xff, sizeof(rx)); + if (ops->get_queue_stats_rx) + ops->get_queue_stats_rx(netdev, i, &rx); + netdev_nl_stats_add(rx_sum, &rx, sizeof(rx)); + } + for (i = tx_start; i < tx_end; i++) { + memset(&tx, 0xff, sizeof(tx)); + if (ops->get_queue_stats_tx) + ops->get_queue_stats_tx(netdev, i, &tx); + netdev_nl_stats_add(tx_sum, &tx, sizeof(tx)); + } +} +EXPORT_SYMBOL(netdev_stat_queue_sum); + static int netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { - struct netdev_queue_stats_rx rx_sum, rx; - struct netdev_queue_stats_tx tx_sum, tx; - const struct netdev_stat_ops *ops; + struct netdev_queue_stats_rx rx_sum; + struct netdev_queue_stats_tx tx_sum; void *hdr; - int i; - ops = netdev->stat_ops; /* Netdev can't guarantee any complete counters */ - if (!ops->get_base_stats) + if (!netdev->stat_ops->get_base_stats) return 0; memset(&rx_sum, 0xff, sizeof(rx_sum)); memset(&tx_sum, 0xff, sizeof(tx_sum)); - ops->get_base_stats(netdev, &rx_sum, &tx_sum); + netdev->stat_ops->get_base_stats(netdev, &rx_sum, &tx_sum); /* The op was there, but nothing reported, don't bother */ if (!memchr_inv(&rx_sum, 0xff, sizeof(rx_sum)) && @@ -739,18 +780,8 @@ netdev_nl_stats_by_netdev(struct net_device *netdev, struct sk_buff *rsp, if (nla_put_u32(rsp, NETDEV_A_QSTATS_IFINDEX, netdev->ifindex)) goto nla_put_failure; - for (i = 0; i < netdev->real_num_rx_queues; i++) { - memset(&rx, 0xff, sizeof(rx)); - if (ops->get_queue_stats_rx) - ops->get_queue_stats_rx(netdev, i, &rx); - netdev_nl_stats_add(&rx_sum, &rx, sizeof(rx)); - } - for (i = 0; i < netdev->real_num_tx_queues; i++) { - memset(&tx, 0xff, sizeof(tx)); - if (ops->get_queue_stats_tx) - ops->get_queue_stats_tx(netdev, i, &tx); - netdev_nl_stats_add(&tx_sum, &tx, sizeof(tx)); - } + netdev_stat_queue_sum(netdev, 0, netdev->real_num_rx_queues, &rx_sum, + 0, netdev->real_num_tx_queues, &tx_sum); if (netdev_nl_stats_write_rx(rsp, &rx_sum) || netdev_nl_stats_write_tx(rsp, &tx_sum)) @@ -861,14 +892,17 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) mutex_lock(&priv->lock); + err = 0; netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex); - if (!netdev || !netif_device_present(netdev)) { + if (!netdev) { err = -ENODEV; goto err_unlock_sock; } - - if (!netdev_need_ops_lock(netdev)) { + if (!netif_device_present(netdev)) + err = -ENODEV; + else if (!netdev_need_ops_lock(netdev)) err = -EOPNOTSUPP; + if (err) { NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_DEV_IFINDEX]); goto err_unlock; @@ -945,14 +979,25 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) { struct net_devmem_dmabuf_binding *binding; struct net_devmem_dmabuf_binding *temp; + netdevice_tracker dev_tracker; struct net_device *dev; mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { + mutex_lock(&binding->lock); dev = binding->dev; + if (!dev) { + mutex_unlock(&binding->lock); + net_devmem_unbind_dmabuf(binding); + continue; + } + netdev_hold(dev, &dev_tracker, GFP_KERNEL); + mutex_unlock(&binding->lock); + netdev_lock(dev); net_devmem_unbind_dmabuf(binding); netdev_unlock(dev); + netdev_put(dev, &dev_tracker); } mutex_unlock(&priv->lock); } diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index 7eadb8393e002f..cd95394399b40c 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -5,7 +5,7 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) { - return __netmem_clear_lsb(netmem)->pp_magic; + return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; } static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) @@ -15,9 +15,16 @@ static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) static inline void netmem_clear_pp_magic(netmem_ref netmem) { + WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK); + __netmem_clear_lsb(netmem)->pp_magic = 0; } +static inline bool netmem_is_pp(netmem_ref netmem) +{ + return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE; +} + static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) { __netmem_clear_lsb(netmem)->pp = pool; @@ -28,4 +35,28 @@ static inline void netmem_set_dma_addr(netmem_ref netmem, { __netmem_clear_lsb(netmem)->dma_addr = dma_addr; } + +static inline unsigned long netmem_get_dma_index(netmem_ref netmem) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return 0; + + magic = __netmem_clear_lsb(netmem)->pp_magic; + + return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT; +} + +static inline void netmem_set_dma_index(netmem_ref netmem, + unsigned long id) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return; + + magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT); + __netmem_clear_lsb(netmem)->pp_magic = magic; +} #endif diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 7745ad924ae2d8..2d9c51f480fb5f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -153,9 +153,9 @@ u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) EXPORT_SYMBOL(page_pool_ethtool_stats_get); #else -#define alloc_stat_inc(pool, __stat) -#define recycle_stat_inc(pool, __stat) -#define recycle_stat_add(pool, __stat, val) +#define alloc_stat_inc(...) do { } while (0) +#define recycle_stat_inc(...) do { } while (0) +#define recycle_stat_add(...) do { } while (0) #endif static bool page_pool_producer_lock(struct page_pool *pool) @@ -276,8 +276,7 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->dma_map) - get_device(pool->p.dev); + xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1); if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { netdev_assert_locked(pool->slow.netdev); @@ -320,9 +319,7 @@ static int page_pool_init(struct page_pool *pool, static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); - - if (pool->dma_map) - put_device(pool->p.dev); + xa_destroy(&pool->dma_mapped); #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) @@ -463,13 +460,21 @@ page_pool_dma_sync_for_device(const struct page_pool *pool, netmem_ref netmem, u32 dma_sync_size) { - if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) - __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); + if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) { + rcu_read_lock(); + /* re-check under rcu_read_lock() to sync with page_pool_scrub() */ + if (pool->dma_sync) + __page_pool_dma_sync_for_device(pool, netmem, + dma_sync_size); + rcu_read_unlock(); + } } -static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) { dma_addr_t dma; + int err; + u32 id; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit @@ -483,15 +488,30 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) if (dma_mapping_error(pool->p.dev, dma)) return false; - if (page_pool_set_dma_addr_netmem(netmem, dma)) + if (page_pool_set_dma_addr_netmem(netmem, dma)) { + WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); goto unmap_failed; + } + + if (in_softirq()) + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + else + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + if (err) { + WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@"); + goto unset_failed; + } + netmem_set_dma_index(netmem, id); page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; +unset_failed: + page_pool_set_dma_addr_netmem(netmem, 0); unmap_failed: - WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); @@ -508,7 +528,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) { put_page(page); return NULL; } @@ -554,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { netmem = pool->alloc.cache[i]; - if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) { put_page(netmem_to_page(netmem)); continue; } @@ -656,6 +676,8 @@ void page_pool_clear_pp_info(netmem_ref netmem) static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, netmem_ref netmem) { + struct page *old, *page = netmem_to_page(netmem); + unsigned long id; dma_addr_t dma; if (!pool->dma_map) @@ -664,6 +686,17 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, */ return; + id = netmem_get_dma_index(netmem); + if (!id) + return; + + if (in_softirq()) + old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); + else + old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); + if (old != page) + return; + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ @@ -671,6 +704,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr_netmem(netmem, 0); + netmem_set_dma_index(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need @@ -707,19 +741,16 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem) { - int ret; - /* BH protection not needed if current is softirq */ - if (in_softirq()) - ret = ptr_ring_produce(&pool->ring, (__force void *)netmem); - else - ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem); + bool in_softirq, ret; - if (!ret) { + /* BH protection not needed if current is softirq */ + in_softirq = page_pool_producer_lock(pool); + ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem); + if (ret) recycle_stat_inc(pool, ring); - return true; - } + page_pool_producer_unlock(pool, in_softirq); - return false; + return ret; } /* Only allow direct recycling in special circumstances, into the @@ -1080,8 +1111,29 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) static void page_pool_scrub(struct page_pool *pool) { + unsigned long id; + void *ptr; + page_pool_empty_alloc_cache_once(pool); - pool->destroy_cnt++; + if (!pool->destroy_cnt++ && pool->dma_map) { + if (pool->dma_sync) { + /* Disable page_pool_dma_sync_for_device() */ + pool->dma_sync = false; + + /* Make sure all concurrent returns that may see the old + * value of dma_sync (and thus perform a sync) have + * finished before doing the unmapping below. Skip the + * wait if the device doesn't actually need syncing, or + * if there are no outstanding mapped pages. + */ + if (dma_dev_need_sync(pool->p.dev) && + !xa_empty(&pool->dma_mapped)) + synchronize_net(); + } + + xa_for_each(&pool->dma_mapped, id, ptr) + __page_pool_release_page_dma(pool, page_to_netmem(ptr)); + } /* No more consumers should exist, but producers could still * be in-flight. @@ -1091,10 +1143,14 @@ static void page_pool_scrub(struct page_pool *pool) static int page_pool_release(struct page_pool *pool) { + bool in_softirq; int inflight; page_pool_scrub(pool); inflight = page_pool_inflight(pool, true); + /* Acquire producer lock to make sure producers have exited. */ + in_softirq = page_pool_producer_lock(pool); + page_pool_producer_unlock(pool, in_softirq); if (!inflight) __page_pool_destroy(pool); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c238528350506d..fc6815ad78266f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1043,7 +1043,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); -void netdev_set_operstate(struct net_device *dev, int newstate) +void netif_set_operstate(struct net_device *dev, int newstate) { unsigned int old = READ_ONCE(dev->operstate); @@ -1052,9 +1052,9 @@ void netdev_set_operstate(struct net_device *dev, int newstate) return; } while (!try_cmpxchg(&dev->operstate, &old, newstate)); - netdev_state_change(dev); + netif_state_change(dev); } -EXPORT_SYMBOL(netdev_set_operstate); +EXPORT_SYMBOL(netif_set_operstate); static void set_operstate(struct net_device *dev, unsigned char transition) { @@ -1080,7 +1080,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; } - netdev_set_operstate(dev, operstate); + netif_set_operstate(dev, operstate); } static unsigned int rtnl_dev_get_flags(const struct net_device *dev) @@ -3027,7 +3027,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, err = validate_linkmsg(dev, tb, extack); if (err < 0) - goto errout; + return err; if (tb[IFLA_IFNAME]) nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); @@ -3396,7 +3396,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, errout: if (status & DO_SETLINK_MODIFIED) { if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) - netdev_state_change(dev); + netif_state_change(dev); if (err < 0) net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", @@ -3681,7 +3681,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) - dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + netif_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); if (tb[IFLA_GSO_MAX_SIZE]) netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); if (tb[IFLA_GSO_MAX_SEGS]) diff --git a/net/core/selftests.c b/net/core/selftests.c index e99ae983fca9bb..35f807ea995235 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -100,10 +100,10 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, ehdr->h_proto = htons(ETH_P_IP); if (attr->tcp) { + memset(thdr, 0, sizeof(*thdr)); thdr->source = htons(attr->sport); thdr->dest = htons(attr->dport); thdr->doff = sizeof(struct tcphdr) / 4; - thdr->check = 0; } else { uhdr->source = htons(attr->sport); uhdr->dest = htons(attr->dport); @@ -144,10 +144,18 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev, attr->id = net_test_next_id; shdr->id = net_test_next_id++; - if (attr->size) - skb_put(skb, attr->size); - if (attr->max_size && attr->max_size > skb->len) - skb_put(skb, attr->max_size - skb->len); + if (attr->size) { + void *payload = skb_put(skb, attr->size); + + memset(payload, 0, attr->size); + } + + if (attr->max_size && attr->max_size > skb->len) { + size_t pad_len = attr->max_size - skb->len; + void *pad = skb_put(skb, pad_len); + + memset(pad, 0, pad_len); + } skb->csum = 0; skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6cbf77bc61fce7..74a2d886a35b51 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -893,11 +893,6 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -static bool is_pp_netmem(netmem_ref netmem) -{ - return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE; -} - int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom) { @@ -995,14 +990,7 @@ bool napi_pp_put_page(netmem_ref netmem) { netmem = netmem_compound_head(netmem); - /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation - * in order to preserve any existing bits, such as bit 0 for the - * head page of compound page and bit 1 for pfmemalloc page, so - * mask those bits for freeing side when doing below checking, - * and page_is_pfmemalloc() is checked in __page_pool_put_page() - * to avoid recycling the pfmemalloc page. - */ - if (unlikely(!is_pp_netmem(netmem))) + if (unlikely(!netmem_is_pp(netmem))) return false; page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); @@ -1042,7 +1030,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) for (i = 0; i < shinfo->nr_frags; i++) { head_netmem = netmem_compound_head(shinfo->frags[i].netmem); - if (likely(is_pp_netmem(head_netmem))) + if (likely(netmem_is_pp(head_netmem))) page_pool_ref_netmem(head_netmem); else page_ref_inc(netmem_to_page(head_netmem)); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 0ddc4c7188332a..6d689918c2b390 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -530,16 +530,22 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, u32 off, u32 len, struct sk_psock *psock, struct sock *sk, - struct sk_msg *msg) + struct sk_msg *msg, + bool take_ref) { int num_sge, copied; + /* skb_to_sgvec will fail when the total number of fragments in + * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the + * caller may aggregate multiple skbs. + */ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); if (num_sge < 0) { /* skb linearize may fail with ENOMEM, but lets simply try again * later if this happens. Under memory pressure we don't want to * drop the skb. We need to linearize the skb so that the mapping * in skb_to_sgvec can not error. + * Note that skb_linearize requires the skb not to be shared. */ if (skb_linearize(skb)) return -EAGAIN; @@ -556,7 +562,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, msg->sg.start = 0; msg->sg.size = copied; msg->sg.end = num_sge; - msg->skb = skb; + msg->skb = take_ref ? skb_get(skb) : skb; sk_psock_queue_msg(psock, msg); sk_psock_data_ready(sk, psock); @@ -564,7 +570,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, } static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, - u32 off, u32 len); + u32 off, u32 len, bool take_ref); static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len) @@ -578,7 +584,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, * correctly. */ if (unlikely(skb->sk == sk)) - return sk_psock_skb_ingress_self(psock, skb, off, len); + return sk_psock_skb_ingress_self(psock, skb, off, len, true); msg = sk_psock_create_ingress_msg(sk, skb); if (!msg) return -EAGAIN; @@ -590,7 +596,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, * into user buffers. */ skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true); if (err < 0) kfree(msg); return err; @@ -601,7 +607,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, * because the skb is already accounted for here. */ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, - u32 off, u32 len) + u32 off, u32 len, bool take_ref) { struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC); struct sock *sk = psock->sk; @@ -610,7 +616,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb if (unlikely(!msg)) return -EAGAIN; skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref); if (err < 0) kfree(msg); return err; @@ -619,18 +625,13 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len, bool ingress) { - int err = 0; - if (!ingress) { if (!sock_writeable(psock->sk)) return -EAGAIN; return skb_send_sock(psock->sk, skb, off, len); } - skb_get(skb); - err = sk_psock_skb_ingress(psock, skb, off, len); - if (err < 0) - kfree_skb(skb); - return err; + + return sk_psock_skb_ingress(psock, skb, off, len); } static void sk_psock_skb_state(struct sk_psock *psock, @@ -655,12 +656,14 @@ static void sk_psock_backlog(struct work_struct *work) bool ingress; int ret; + /* Increment the psock refcnt to synchronize with close(fd) path in + * sock_map_close(), ensuring we wait for backlog thread completion + * before sk_socket freed. If refcnt increment fails, it indicates + * sock_map_close() completed with sk_socket potentially already freed. + */ + if (!sk_psock_get(psock->sk)) + return; mutex_lock(&psock->work_mutex); - if (unlikely(state->len)) { - len = state->len; - off = state->off; - } - while ((skb = skb_peek(&psock->ingress_skb))) { len = skb->len; off = 0; @@ -670,6 +673,13 @@ static void sk_psock_backlog(struct work_struct *work) off = stm->offset; len = stm->full_len; } + + /* Resume processing from previous partial state */ + if (unlikely(state->len)) { + len = state->len; + off = state->off; + } + ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); do { @@ -697,11 +707,14 @@ static void sk_psock_backlog(struct work_struct *work) len -= ret; } while (len); + /* The entire skb sent, clear state */ + sk_psock_skb_state(psock, state, 0, 0); skb = skb_dequeue(&psock->ingress_skb); kfree_skb(skb); } end: mutex_unlock(&psock->work_mutex); + sk_psock_put(psock->sk, psock); } struct sk_psock *sk_psock_init(struct sock *sk, int node) @@ -1014,7 +1027,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, off = stm->offset; len = stm->full_len; } - err = sk_psock_skb_ingress_self(psock, skb, off, len); + err = sk_psock_skb_ingress_self(psock, skb, off, len, false); } if (err < 0) { spin_lock_bh(&psock->ingress_lock); diff --git a/net/core/sock.c b/net/core/sock.c index f67a3c5b09884b..5034d0fbd4a427 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2130,6 +2130,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname, */ static inline void sock_lock_init(struct sock *sk) { + sk_owner_clear(sk); + if (sk->sk_kern_sock) sock_lock_init_class_and_name( sk, @@ -2226,6 +2228,9 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) cgroup_sk_free(&sk->sk_cgrp_data); mem_cgroup_sk_free(sk); security_sk_free(sk); + + sk_owner_put(sk); + if (slab != NULL) kmem_cache_free(slab, sk); else @@ -3229,16 +3234,16 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL; struct proto *prot = sk->sk_prot; - bool charged = false; + bool charged = true; long allocated; sk_memory_allocated_add(sk, amt); allocated = sk_memory_allocated(sk); if (memcg) { - if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge())) + charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()); + if (!charged) goto suppress_allocation; - charged = true; } /* Under limit. */ @@ -3323,7 +3328,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) sk_memory_allocated_sub(sk, amt); - if (charged) + if (memcg && charged) mem_cgroup_uncharge_skmem(memcg, amt); return 0; diff --git a/net/core/utils.c b/net/core/utils.c index 27f4cffaae05d9..b8c21a859e27b1 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, EXPORT_SYMBOL(inet_proto_csum_replace16); void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, - __wsum diff, bool pseudohdr) + __wsum diff, bool pseudohdr, bool ipv6) { if (skb->ip_summed != CHECKSUM_PARTIAL) { csum_replace_by_diff(sum, diff); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6) skb->csum = ~csum_sub(diff, skb->csum); } else if (pseudohdr) { *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); diff --git a/net/core/xdp.c b/net/core/xdp.c index f86eedad586a77..0ba73943c6eed8 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -437,8 +437,8 @@ void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, netmem = netmem_compound_head(netmem); if (napi_direct && xdp_return_frame_no_direct()) napi_direct = false; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as mem->type knows this a page_pool page + /* No need to check netmem_is_pp() as mem->type knows this a + * page_pool page */ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, napi_direct); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e827775baf2ee1..436a7e1b412ade 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -862,6 +862,16 @@ static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) kfree(dst->lags); } +static void dsa_tree_teardown_routing_table(struct dsa_switch_tree *dst) +{ + struct dsa_link *dl, *next; + + list_for_each_entry_safe(dl, next, &dst->rtable, list) { + list_del(&dl->list); + kfree(dl); + } +} + static int dsa_tree_setup(struct dsa_switch_tree *dst) { bool complete; @@ -879,7 +889,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) err = dsa_tree_setup_cpu_ports(dst); if (err) - return err; + goto teardown_rtable; err = dsa_tree_setup_switches(dst); if (err) @@ -911,14 +921,14 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); +teardown_rtable: + dsa_tree_teardown_routing_table(dst); return err; } static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - struct dsa_link *dl, *next; - if (!dst->setup) return; @@ -932,10 +942,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_cpu_ports(dst); - list_for_each_entry_safe(dl, next, &dst->rtable, list) { - list_del(&dl->list); - kfree(dl); - } + dsa_tree_teardown_routing_table(dst); pr_info("DSA: tree %d torn down\n", dst->index); @@ -1478,12 +1485,44 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { + struct dsa_mac_addr *a, *tmp; struct dsa_port *dp, *next; + struct dsa_vlan *v, *n; dsa_switch_for_each_port_safe(dp, next, ds) { - WARN_ON(!list_empty(&dp->fdbs)); - WARN_ON(!list_empty(&dp->mdbs)); - WARN_ON(!list_empty(&dp->vlans)); + /* These are either entries that upper layers lost track of + * (probably due to bugs), or installed through interfaces + * where one does not necessarily have to remove them, like + * ndo_dflt_fdb_add(). + */ + list_for_each_entry_safe(a, tmp, &dp->fdbs, list) { + dev_info(ds->dev, + "Cleaning up unicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + list_for_each_entry_safe(a, tmp, &dp->mdbs, list) { + dev_info(ds->dev, + "Cleaning up multicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + /* These are entries that upper layers have lost track of, + * probably due to bugs, but also due to dsa_port_do_vlan_del() + * having failed and the VLAN entry still lingering on. + */ + list_for_each_entry_safe(v, n, &dp->vlans, list) { + dev_info(ds->dev, + "Cleaning up vid %u from port %d\n", + v->vid, dp->index); + list_del(&v->list); + kfree(v); + } + list_del(&dp->list); kfree(dp); } diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 3ee53e28ec2e9f..53e03fd8071b4a 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -197,7 +197,7 @@ static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) err = ds->ops->tag_8021q_vlan_del(ds, port, vid); if (err) { - refcount_inc(&v->refcount); + refcount_set(&v->refcount, 1); return err; } diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 8c3c068728e51c..fe75821623a4fc 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -257,7 +257,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, int source_port; u8 *brcm_tag; - if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID))) + if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) return NULL; brcm_tag = dsa_etype_header_pos_rx(skb); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index c33d4bf179297c..0b7564b53790da 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -140,7 +140,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) { - u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + u8 *tag; + + if (skb_linearize(skb)) + return NULL; + + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M, KSZ_EGRESS_TAG_LEN); @@ -311,10 +316,16 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { - /* Tag decoding */ - u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; - unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; unsigned int len = KSZ_EGRESS_TAG_LEN; + unsigned int port; + u8 *tag; + + if (skb_linearize(skb)) + return NULL; + + /* Tag decoding */ + tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; /* Extra 4-bytes PTP timestamp */ if (tag[0] & KSZ9477_PTP_TAG_INDICATION) { diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h index 1e790413db0e8d..4a9a946cabf05d 100644 --- a/net/ethtool/cmis.h +++ b/net/ethtool/cmis.h @@ -101,7 +101,6 @@ struct ethtool_cmis_cdb_rpl { }; u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs); -u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs); void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl, diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index d159dc121bde58..3057576bc81e3d 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -16,15 +16,6 @@ u32 ethtool_cmis_get_max_lpl_size(u8 num_of_byte_octs) return 8 * (1 + min_t(u8, num_of_byte_octs, 15)); } -/* For accessing the EPL field on page 9Fh, the allowable length extension is - * min(i, 255) byte octets where i specifies the allowable additional number of - * byte octets in a READ or a WRITE. - */ -u32 ethtool_cmis_get_max_epl_size(u8 num_of_byte_octs) -{ - return 8 * (1 + min_t(u8, num_of_byte_octs, 255)); -} - void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, enum ethtool_cmis_cdb_cmd_id cmd, u8 *lpl, u8 lpl_len, u8 *epl, u16 epl_len, @@ -33,19 +24,16 @@ void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, { args->req.id = cpu_to_be16(cmd); args->req.lpl_len = lpl_len; - if (lpl) { + if (lpl) memcpy(args->req.payload, lpl, args->req.lpl_len); - args->read_write_len_ext = - ethtool_cmis_get_max_lpl_size(read_write_len_ext); - } if (epl) { args->req.epl_len = cpu_to_be16(epl_len); args->req.epl = epl; - args->read_write_len_ext = - ethtool_cmis_get_max_epl_size(read_write_len_ext); } args->max_duration = max_duration; + args->read_write_len_ext = + ethtool_cmis_get_max_lpl_size(read_write_len_ext); args->msleep_pre_rpl = msleep_pre_rpl; args->rpl_exp_len = rpl_exp_len; args->flags = flags; @@ -363,7 +351,7 @@ ethtool_cmis_module_poll(struct net_device *dev, struct netlink_ext_ack extack = {}; int err; - ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl)); + ethtool_cmis_page_init(&page_data, 0, offset, sizeof(*rpl)); page_data.data = (u8 *)rpl; err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0cb6da1f692a0a..49bea6b45bd5c1 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -830,6 +830,7 @@ void ethtool_ringparam_get_cfg(struct net_device *dev, /* Driver gives us current state, we want to return current config */ kparam->tcp_data_split = dev->cfg->hds_config; + kparam->hds_thresh = dev->cfg->hds_thresh; } static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 221639407c7245..4b1badeebc741c 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -60,7 +60,7 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev) u32 ethtool_op_get_link(struct net_device *dev) { /* Synchronize carrier state with link watch, see also rtnl_getlink() */ - linkwatch_sync_dev(dev); + __linkwatch_sync_dev(dev); return netif_carrier_ok(dev) ? 1 : 0; } @@ -1001,7 +1001,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + if (info.rss_context && + !xa_load(&dev->ethtool->rss_ctx, info.rss_context)) return -EINVAL; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index a163d40c6431b9..977beeaaa2f991 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -500,7 +500,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) netdev_unlock_ops(req_info->dev); rtnl_unlock(); if (ret < 0) - goto err_cleanup; + goto err_dev; ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; @@ -560,7 +560,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, netdev_unlock_ops(dev); rtnl_unlock(); if (ret < 0) - goto out; + goto out_cancel; ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr); if (ret < 0) goto out; @@ -569,6 +569,7 @@ static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev, out: if (ctx->ops->cleanup_data) ctx->ops->cleanup_data(ctx->reply_data); +out_cancel: ctx->reply_data->dev = NULL; if (ret < 0) genlmsg_cancel(skb, ehdr); @@ -793,7 +794,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, ethnl_init_reply_data(reply_data, ops, dev); ret = ops->prepare_data(req_info, reply_data, &info); if (ret < 0) - goto err_cleanup; + goto err_rep; ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; @@ -828,6 +829,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); +err_rep: kfree(reply_data); kfree(req_info); return; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 439cfb7ad5d148..1b1b700ec05eac 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -33,14 +33,14 @@ static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) struct net_device *dev = master->dev; if (!is_admin_up(dev)) { - netdev_set_operstate(dev, IF_OPER_DOWN); + netif_set_operstate(dev, IF_OPER_DOWN); return; } if (has_carrier) - netdev_set_operstate(dev, IF_OPER_UP); + netif_set_operstate(dev, IF_OPER_UP); else - netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); + netif_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); } static bool hsr_check_carrier(struct hsr_port *master) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 0e4076866c0a40..f14a41ee4aa101 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4, @@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -199,8 +154,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -209,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -392,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dd5cf8914a28d1..b216088a6abd30 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -632,7 +632,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, + prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a8b04d4abcaae8..85dc208f32e997 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -120,11 +120,6 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv4.mr_tables)) -static bool ipmr_can_free_table(struct net *net) -{ - return !check_net(net) || !net_initialized(net); -} - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -317,11 +312,6 @@ EXPORT_SYMBOL(ipmr_rule_default); #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) -static bool ipmr_can_free_table(struct net *net) -{ - return !check_net(net); -} - static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -437,7 +427,7 @@ static void ipmr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - WARN_ON_ONCE(!ipmr_can_free_table(net)); + WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 9082ca17e845cb..7e7c49535e3f56 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -50,7 +50,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, else addr = iph->saddr; - *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); + if (priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) { + *dst = inet_dev_addr_type(nft_net(pkt), dev, addr); + return; + } + + *dst = inet_addr_type_dev_table(nft_net(pkt), pkt->skb->dev, addr); } EXPORT_SYMBOL_GPL(nft_fib4_eval_type); @@ -65,8 +70,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, + .flowi4_proto = pkt->tprot, .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), - .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; const struct net_device *oif; const struct net_device *found; @@ -90,6 +95,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; + fl4.flowi4_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, oif); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 934f777f29d36c..d293087b426df7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -439,7 +439,7 @@ static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; if (sk) - sock_put(sk); + sock_gen_put(sk); } INDIRECT_CALLABLE_SCOPE diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 2c0725583be39f..9b295b2878befa 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -247,6 +247,62 @@ static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) return segs; } +static void __udpv6_gso_segment_csum(struct sk_buff *seg, + struct in6_addr *oldip, + const struct in6_addr *newip, + __be16 *oldport, __be16 newport) +{ + struct udphdr *uh = udp_hdr(seg); + + if (ipv6_addr_equal(oldip, newip) && *oldport == newport) + return; + + if (uh->check) { + inet_proto_csum_replace16(&uh->check, seg, oldip->s6_addr32, + newip->s6_addr32, true); + + inet_proto_csum_replace2(&uh->check, seg, *oldport, newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + + *oldip = *newip; + *oldport = newport; +} + +static struct sk_buff *__udpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct ipv6hdr *iph; + const struct udphdr *uh; + struct ipv6hdr *iph2; + struct sk_buff *seg; + struct udphdr *uh2; + + seg = segs; + uh = udp_hdr(seg); + iph = ipv6_hdr(seg); + uh2 = udp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&uh->source ^ *(const u32 *)&uh2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ipv6_hdr(seg); + + __udpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr, + &uh2->source, uh->source); + __udpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr, + &uh2->dest, uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, netdev_features_t features, bool is_ipv6) @@ -259,7 +315,10 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); + if (is_ipv6) + return __udpv6_gso_segment_list_csum(skb); + else + return __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, @@ -273,6 +332,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, bool copy_dtor; __sum16 check; __be16 newlen; + int ret = 0; mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) @@ -301,6 +361,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) return __udp_gso_segment_list(gso_skb, features, is_ipv6); + ret = __skb_linearize(gso_skb); + if (ret) + return ERR_PTR(ret); + /* Setup csum, as fraglist skips this in udp4_gro_receive. */ gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head; gso_skb->csum_offset = offsetof(struct udphdr, check); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index b5b06323cfd94a..0d31a8c108d4f6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -182,11 +182,15 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; - - offset = offset - sizeof(struct udphdr); + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm4_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2cffb8f4a2bc80..c6b22170dc4928 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3154,12 +3154,13 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); - netdev_lock_ops(dev); - if (dev) + if (dev) { + netdev_lock_ops(dev); err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); - else + netdev_unlock_ops(dev); + } else { err = -ENODEV; - netdev_unlock_ops(dev); + } rtnl_net_unlock(net); return err; } @@ -3213,16 +3214,13 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen, offset = 0; + int scope, plen; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ - if (idev->dev->addr_len == sizeof(struct in6_addr)) - offset = sizeof(struct in6_addr) - 4; - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3533,7 +3531,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - if (dev->type == ARPHRD_ETHER) { + /* Generate the IPv6 link-local address using addrconf_addr_gen(), + * unless we have an IPv4 GRE device not bound to an IP address and + * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this + * case). Such devices fall back to add_v4_addrs() instead. + */ + if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9e73944e3b530a..72adfc107b557d 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb) } #ifdef CONFIG_INET6_ESPINTCP -struct esp_tcp_sk { - struct sock *sk; - struct rcu_head rcu; -}; - -static void esp_free_tcp_sk(struct rcu_head *head) -{ - struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu); - - sock_put(esk->sk); - kfree(esk); -} - static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; struct net *net = xs_net(x); - struct esp_tcp_sk *esk; __be16 sport, dport; - struct sock *nsk; struct sock *sk; - sk = rcu_dereference(x->encap_sk); - if (sk && sk->sk_state == TCP_ESTABLISHED) - return sk; - spin_lock_bh(&x->lock); sport = encap->encap_sport; dport = encap->encap_dport; - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (sk && sk == nsk) { - esk = kmalloc(sizeof(*esk), GFP_ATOMIC); - if (!esk) { - spin_unlock_bh(&x->lock); - return ERR_PTR(-ENOMEM); - } - RCU_INIT_POINTER(x->encap_sk, NULL); - esk->sk = sk; - call_rcu(&esk->rcu, esp_free_tcp_sk); - } spin_unlock_bh(&x->lock); sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, @@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) return ERR_PTR(-EINVAL); } - spin_lock_bh(&x->lock); - nsk = rcu_dereference_protected(x->encap_sk, - lockdep_is_held(&x->lock)); - if (encap->encap_sport != sport || - encap->encap_dport != dport) { - sock_put(sk); - sk = nsk ?: ERR_PTR(-EREMCHG); - } else if (sk == nsk) { - sock_put(sk); - } else { - rcu_assign_pointer(x->encap_sk, sk); - } - spin_unlock_bh(&x->lock); - return sk; } @@ -216,8 +171,10 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) sk = esp6_find_tcp_sk(x); err = PTR_ERR_OR_ZERO(sk); - if (err) + if (err) { + kfree_skb(skb); goto out; + } bh_lock_sock(sk); if (sock_owned_by_user(sk)) @@ -226,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb) err = espintcp_push_skb(sk, skb); bh_unlock_sock(sk); + sock_put(sk); + out: rcu_read_unlock(); return err; @@ -422,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x, if (IS_ERR(sk)) return ERR_CAST(sk); + sock_put(sk); + *lenp = htons(len); esph = (struct ip_esp_hdr *)(lenp + 1); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 95e9146918cc6f..b8d43ed4689db9 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&th->check, skb, - diff, true); + diff, true, true); } break; case NEXTHDR_UDP: @@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&uh->check, skb, - diff, true); + diff, true, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } @@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, - diff, true); + diff, true, true); } break; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b413c9c8a21c6f..3276cde5ebd704 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -108,11 +108,6 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv6.mr6_tables)) -static bool ip6mr_can_free_table(struct net *net) -{ - return !check_net(net) || !net_initialized(net); -} - static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -306,11 +301,6 @@ EXPORT_SYMBOL(ip6mr_rule_default); #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) -static bool ip6mr_can_free_table(struct net *net) -{ - return !check_net(net); -} - static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -416,7 +406,7 @@ static void ip6mr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - WARN_ON_ONCE(!ip6mr_can_free_table(net)); + WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 581ce055bf520f..4541836ee3da20 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -164,20 +164,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct ip6_fraglist_iter iter; struct sk_buff *frag2; - if (first_len - hlen > mtu || - skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) + if (first_len - hlen > mtu) goto blackhole; - if (skb_cloned(skb)) + if (skb_cloned(skb) || + skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag2) { - if (frag2->len > mtu || - skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) + if (frag2->len > mtu) goto blackhole; /* Partially cloned skb? */ - if (skb_shared(frag2)) + if (skb_shared(frag2) || + skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path; } diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 7fd9d7b21cd42d..421036a3605b46 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -50,6 +50,7 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, fl6->flowi6_mark = pkt->skb->mark; fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; + fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev); return lookup_flags; } @@ -73,8 +74,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); - nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) @@ -158,6 +157,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); + const struct net_device *found = NULL; const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; struct ipv6hdr *iph, _iph; @@ -165,7 +165,6 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), - .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; struct rt6_info *rt; int lookup_flags; @@ -203,11 +202,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; - if (oif && oif != rt->rt6i_idev->dev && - l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) - goto put_rt_err; + if (!oif) { + found = rt->rt6i_idev->dev; + } else { + if (oif == rt->rt6i_idev->dev || + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex) + found = oif; + } - nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); + nft_fib_store_result(dest, priv, found); put_rt_err: ip6_rt_put(rt); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ab12b816ab9485..96f1621e2381c8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -470,10 +470,10 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, goto out; hash = fl6->mp_hash; - if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && - rt6_score_route(first->fib6_nh, first->fib6_flags, oif, - strict) >= 0) { - match = first; + if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) { + if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif, + strict) >= 0) + match = first; goto out; } @@ -1771,6 +1771,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, if (!err) { spin_lock_bh(&f6i->fib6_table->tb6_lock); fib6_update_sernum(net, f6i); + fib6_add_gc_list(f6i); spin_unlock_bh(&f6i->fib6_table->tb6_lock); fib6_force_start_gc(net); } diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ac1dbd492c22dc..a11a02b4ba95b6 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1644,10 +1644,8 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 }, - [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, - .len = sizeof(struct in_addr) }, - [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, - .len = sizeof(struct in6_addr) }, + [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)), + [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, [SEG6_LOCAL_BPF] = { .type = NLA_NESTED }, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d9b11fe41bf0c9..a8a04f441e7888 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -42,7 +42,7 @@ static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, iif, sdif); NAPI_GRO_CB(skb)->is_flist = !sk; if (sk) - sock_put(sk); + sock_gen_put(sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4abc5e9d63227a..841c81abaaf4ff 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -179,14 +179,18 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, int offset = skb_gro_offset(skb); const struct net_offload *ops; struct sk_buff *pp = NULL; - int ret; + int len, dlen; + __u8 *udpdata; + __be32 *udpdata32; if (skb->protocol == htons(ETH_P_IP)) return xfrm4_gro_udp_encap_rcv(sk, head, skb); - offset = offset - sizeof(struct udphdr); - - if (!pskb_pull(skb, offset)) + len = skb->len - offset; + dlen = offset + min(len, 8); + udpdata = skb_gro_header(skb, dlen, offset); + udpdata32 = (__be32 *)udpdata; + if (unlikely(!udpdata)) return NULL; rcu_read_lock(); @@ -194,11 +198,10 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (!ops || !ops->callbacks.gro_receive) goto out; - ret = __xfrm6_udp_encap_rcv(sk, skb, false); - if (ret) + /* check if it is a keepalive or IKE packet */ + if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; - skb_push(skb, offset); NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); @@ -208,7 +211,6 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, out: rcu_read_unlock(); - skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ca10916340b098..5432a5f2dfc8a0 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -277,8 +277,10 @@ void l3mdev_update_flow(struct net *net, struct flowi *fl) if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { - if (!fl->flowi_l3mdev) + if (!fl->flowi_l3mdev) { fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); + fl->flowi_flags |= FLOWI_FLAG_L3MDEV_OIF; + } /* oif set to L3mdev directs lookup to its table; * reset to avoid oif match in fib_lookup diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 0259cde394ba09..cc77ec5769d828 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -887,15 +887,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (sk->sk_type != SOCK_STREAM) goto copy_uaddr; + /* Partial read */ + if (used + offset < skb_len) + continue; + if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); *seq = 0; } - - /* Partial read */ - if (used + offset < skb_len) - continue; } while (len > 0); out: diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f0f4a250b10e01..969b3e2c496af5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -659,6 +659,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ieee80211_txq_remove_vlan(local, sdata); + if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + sdata->bss = NULL; if (local->open_count == 0) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 741e6c7edcb7c7..6b6de43d9420ac 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1354,10 +1354,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) hw->wiphy->software_iftypes |= BIT(NL80211_IFTYPE_MONITOR); - local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) + - sizeof(void *) * channels, GFP_KERNEL); + local->int_scan_req = kzalloc(struct_size(local->int_scan_req, + channels, channels), + GFP_KERNEL); if (!local->int_scan_req) return -ENOMEM; + local->int_scan_req->n_channels = channels; eth_broadcast_addr(local->int_scan_req->bssid); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5d1f2d6d09ad02..53d5ffad87be87 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7220,11 +7220,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { struct ieee80211_ext *ext = (void *) mgmt; - - if (ieee80211_is_s1g_short_beacon(ext->frame_control)) - variable = ext->u.s1g_short_beacon.variable; - else - variable = ext->u.s1g_beacon.variable; + variable = ext->u.s1g_beacon.variable + + ieee80211_s1g_optional_len(ext->frame_control); } baselen = (u8 *) variable - (u8 *) mgmt; @@ -7675,6 +7672,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + u16 status_code; skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); if (!skb) @@ -7697,19 +7695,18 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, WARN_ON(1); fallthrough; case NEG_TTLM_RES_REJECT: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; + status_code = WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; break; case NEG_TTLM_RES_ACCEPT: - mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS; + status_code = WLAN_STATUS_SUCCESS; break; case NEG_TTLM_RES_SUGGEST_PREFERRED: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; + status_code = WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); break; } + mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -7875,7 +7872,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index cb707907188585..5a56487dab69cb 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -260,6 +260,7 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_bss *bss; struct ieee80211_channel *channel; + struct ieee80211_ext *ext; size_t min_hdr_len = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); @@ -269,12 +270,10 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) return; if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { - if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_short_beacon.variable); - else - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_beacon); + ext = (struct ieee80211_ext *)mgmt; + min_hdr_len = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + ieee80211_s1g_optional_len(ext->frame_control); } if (skb->len < min_hdr_len) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b17b3cc7fb903d..a362254b310cd5 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -1085,7 +1085,13 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp); - if (status->free_list) + /* + * This is a bit racy but we can avoid a lot of work + * with this test... + */ + if (local->tx_mntrs) + ieee80211_tx_monitor(local, skb, retry_count, status); + else if (status->free_list) list_add_tail(&skb->list, status->free_list); else dev_kfree_skb(skb); diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index dd895617defd37..9b12ca97f41282 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -630,6 +630,9 @@ static int mctp_sk_hash(struct sock *sk) { struct net *net = sock_net(sk); + /* Bind lookup runs under RCU, remain live during that. */ + sock_set_flag(sk, SOCK_RCU_FREE); + mutex_lock(&net->mctp.bind_lock); sk_add_node_rcu(sk, &net->mctp.binds); mutex_unlock(&net->mctp.bind_lock); diff --git a/net/mctp/device.c b/net/mctp/device.c index 8e0724c56723de..7c0dcf3df31962 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -117,11 +117,18 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct ifaddrmsg *hdr; struct mctp_dev *mdev; - int ifindex, rc; - - hdr = nlmsg_data(cb->nlh); - // filter by ifindex if requested - ifindex = hdr->ifa_index; + int ifindex = 0, rc; + + /* Filter by ifindex if a header is provided */ + if (cb->nlh->nlmsg_len >= nlmsg_msg_size(sizeof(*hdr))) { + hdr = nlmsg_data(cb->nlh); + ifindex = hdr->ifa_index; + } else { + if (cb->strict_check) { + NL_SET_ERR_MSG(cb->extack, "mctp: Invalid header for addr dump request"); + return -EINVAL; + } + } rcu_read_lock(); for_each_netdev_dump(net, dev, mcb->ifindex) { diff --git a/net/mctp/route.c b/net/mctp/route.c index 4c460160914f01..d9c8e5a5f9ce9a 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -313,8 +313,10 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) key = flow->key; - if (WARN_ON(key->dev && key->dev != dev)) + if (key->dev) { + WARN_ON(key->dev != dev); return; + } mctp_dev_set_key(dev, key); } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2cb62f026b1f44..a715dcbe0146ed 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -337,7 +337,11 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) release_sock(sk); - sock_kfree_s(sk, match, sizeof(*match)); + kfree_rcu_mightsleep(match); + /* Adjust sk_omem_alloc like sock_kfree_s() does, to match + * with allocation of this memory by sock_kmemdup() + */ + atomic_sub(sizeof(*match), &sk->sk_omem_alloc); err = 0; out: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 409bd415ef1d19..24c2de1891bdf3 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -899,13 +899,17 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto dispose_child; } - if (!subflow_hmac_valid(req, &mp_opt) || - !mptcp_can_accept_new_subflow(subflow_req->msk)) { + if (!subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } + if (!mptcp_can_accept_new_subflow(owner)) { + subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); + goto dispose_child; + } + /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 4e0842df5234ea..2c260f33b55cc5 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -143,16 +143,15 @@ struct ncsi_channel_vlan_filter { }; struct ncsi_channel_stats { - u32 hnc_cnt_hi; /* Counter cleared */ - u32 hnc_cnt_lo; /* Counter cleared */ - u32 hnc_rx_bytes; /* Rx bytes */ - u32 hnc_tx_bytes; /* Tx bytes */ - u32 hnc_rx_uc_pkts; /* Rx UC packets */ - u32 hnc_rx_mc_pkts; /* Rx MC packets */ - u32 hnc_rx_bc_pkts; /* Rx BC packets */ - u32 hnc_tx_uc_pkts; /* Tx UC packets */ - u32 hnc_tx_mc_pkts; /* Tx MC packets */ - u32 hnc_tx_bc_pkts; /* Tx BC packets */ + u64 hnc_cnt; /* Counter cleared */ + u64 hnc_rx_bytes; /* Rx bytes */ + u64 hnc_tx_bytes; /* Tx bytes */ + u64 hnc_rx_uc_pkts; /* Rx UC packets */ + u64 hnc_rx_mc_pkts; /* Rx MC packets */ + u64 hnc_rx_bc_pkts; /* Rx BC packets */ + u64 hnc_tx_uc_pkts; /* Tx UC packets */ + u64 hnc_tx_mc_pkts; /* Tx MC packets */ + u64 hnc_tx_bc_pkts; /* Tx BC packets */ u32 hnc_fcs_err; /* FCS errors */ u32 hnc_align_err; /* Alignment errors */ u32 hnc_false_carrier; /* False carrier detection */ @@ -181,7 +180,7 @@ struct ncsi_channel_stats { u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */ u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */ u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */ - u32 hnc_rx_valid_bytes; /* Rx valid bytes */ + u64 hnc_rx_valid_bytes; /* Rx valid bytes */ u32 hnc_rx_runt_pkts; /* Rx error runt packets */ u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */ u32 ncsi_rx_cmds; /* Rx NCSI commands */ diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index f2f3b5c1b94126..24edb273797240 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -252,16 +252,15 @@ struct ncsi_rsp_gp_pkt { /* Get Controller Packet Statistics */ struct ncsi_rsp_gcps_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ - __be32 cnt_hi; /* Counter cleared */ - __be32 cnt_lo; /* Counter cleared */ - __be32 rx_bytes; /* Rx bytes */ - __be32 tx_bytes; /* Tx bytes */ - __be32 rx_uc_pkts; /* Rx UC packets */ - __be32 rx_mc_pkts; /* Rx MC packets */ - __be32 rx_bc_pkts; /* Rx BC packets */ - __be32 tx_uc_pkts; /* Tx UC packets */ - __be32 tx_mc_pkts; /* Tx MC packets */ - __be32 tx_bc_pkts; /* Tx BC packets */ + __be64 cnt; /* Counter cleared */ + __be64 rx_bytes; /* Rx bytes */ + __be64 tx_bytes; /* Tx bytes */ + __be64 rx_uc_pkts; /* Rx UC packets */ + __be64 rx_mc_pkts; /* Rx MC packets */ + __be64 rx_bc_pkts; /* Rx BC packets */ + __be64 tx_uc_pkts; /* Tx UC packets */ + __be64 tx_mc_pkts; /* Tx MC packets */ + __be64 tx_bc_pkts; /* Tx BC packets */ __be32 fcs_err; /* FCS errors */ __be32 align_err; /* Alignment errors */ __be32 false_carrier; /* False carrier detection */ @@ -290,11 +289,11 @@ struct ncsi_rsp_gcps_pkt { __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ - __be32 rx_valid_bytes; /* Rx valid bytes */ + __be64 rx_valid_bytes; /* Rx valid bytes */ __be32 rx_runt_pkts; /* Rx error runt packets */ __be32 rx_jabber_pkts; /* Rx error jabber packets */ __be32 checksum; /* Checksum */ -}; +} __packed __aligned(4); /* Get NCSI Statistics */ struct ncsi_rsp_gns_pkt { diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 4a8ce2949faeac..8668888c5a2f99 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -926,16 +926,15 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr) /* Update HNC's statistics */ ncs = &nc->stats; - ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi); - ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo); - ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes); - ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes); - ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts); - ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts); - ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts); - ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts); - ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts); - ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts); + ncs->hnc_cnt = be64_to_cpu(rsp->cnt); + ncs->hnc_rx_bytes = be64_to_cpu(rsp->rx_bytes); + ncs->hnc_tx_bytes = be64_to_cpu(rsp->tx_bytes); + ncs->hnc_rx_uc_pkts = be64_to_cpu(rsp->rx_uc_pkts); + ncs->hnc_rx_mc_pkts = be64_to_cpu(rsp->rx_mc_pkts); + ncs->hnc_rx_bc_pkts = be64_to_cpu(rsp->rx_bc_pkts); + ncs->hnc_tx_uc_pkts = be64_to_cpu(rsp->tx_uc_pkts); + ncs->hnc_tx_mc_pkts = be64_to_cpu(rsp->tx_mc_pkts); + ncs->hnc_tx_bc_pkts = be64_to_cpu(rsp->tx_bc_pkts); ncs->hnc_fcs_err = ntohl(rsp->fcs_err); ncs->hnc_align_err = ntohl(rsp->align_err); ncs->hnc_false_carrier = ntohl(rsp->false_carrier); @@ -964,7 +963,7 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr) ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames); ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames); ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames); - ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes); + ncs->hnc_rx_valid_bytes = be64_to_cpu(rsp->rx_valid_bytes); ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts); ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index df2dc21304efbe..047ba81865edff 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -212,7 +212,7 @@ config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED default y - select LIBCRC32C + select CRC32 help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. @@ -475,7 +475,7 @@ endif # NF_CONNTRACK config NF_TABLES select NETFILTER_NETLINK - select LIBCRC32C + select CRC32 tristate "Netfilter nf_tables support" help nftables is the new packet classification framework that intends to diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cf3ce72c3de645..5251524b96afac 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -64,7 +64,7 @@ struct hbucket { #define ahash_sizeof_regions(htable_bits) \ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) #define ahash_region(n, htable_bits) \ - ((n) % ahash_numof_locks(htable_bits)) + ((n) / jhash_size(HTABLE_REGION_BITS)) #define ahash_bucket_start(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 0 \ : (h) * jhash_size(HTABLE_REGION_BITS)) diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 2a3017b9c001b3..8c5b1fe12d0782 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -105,7 +105,7 @@ config IP_VS_PROTO_AH config IP_VS_PROTO_SCTP bool "SCTP load balancing support" - select LIBCRC32C + select CRC32 help This option enables support for load balancing SCTP transport protocol. Say Y if unsure. diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3313bceb6cc99d..014f077403695f 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -119,13 +119,12 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) return false; } -/* Get route to daddr, update *saddr, optionally bind route to saddr */ +/* Get route to daddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - int rt_mode, __be32 *saddr) + int rt_mode, __be32 *ret_saddr) { struct flowi4 fl4; struct rtable *rt; - bool loop = false; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; @@ -135,23 +134,17 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, retry: rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { - /* Invalid saddr ? */ - if (PTR_ERR(rt) == -EINVAL && *saddr && - rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { - *saddr = 0; - flowi4_update_output(&fl4, 0, daddr, 0); - goto retry; - } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; - } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + } + if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); - *saddr = fl4.saddr; flowi4_update_output(&fl4, 0, daddr, fl4.saddr); - loop = true; + rt_mode = 0; goto retry; } - *saddr = fl4.saddr; + if (ret_saddr) + *ret_saddr = fl4.saddr; return rt; } @@ -344,19 +337,15 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { - __be32 saddr = htonl(INADDR_ANY); - noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, ret_saddr); if (!rt) goto err_unreach; - if (ret_saddr) - *ret_saddr = saddr; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 9d8361526f82ac..9441ac3d8c1a2e 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -383,8 +383,8 @@ static void flow_offload_del(struct nf_flowtable *flow_table, void flow_offload_teardown(struct flow_offload *flow) { clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); - set_bit(NF_FLOW_TEARDOWN, &flow->flags); - flow_offload_fixup_ct(flow); + if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) + flow_offload_fixup_ct(flow); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -558,10 +558,12 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - nf_flow_custom_gc(flow_table, flow)) + nf_flow_custom_gc(flow_table, flow)) { flow_offload_teardown(flow); - else if (!teardown) + teardown = true; + } else if (!teardown) { nf_flow_table_extend_ct_timeout(flow->ct); + } if (teardown) { if (test_bit(NF_FLOW_HW, &flow->flags)) { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index aad84aabd7f1d1..f391cd267922b3 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -248,7 +248,7 @@ static noinline bool nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_ct) { - static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT; + static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST; const struct nf_conntrack_tuple_hash *thash; const struct nf_conntrack_zone *zone; struct nf_conn *ct; @@ -287,8 +287,14 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple, zone = nf_ct_zone(ignored_ct); thash = nf_conntrack_find_get(net, zone, tuple); - if (unlikely(!thash)) /* clashing entry went away */ - return false; + if (unlikely(!thash)) { + struct nf_conntrack_tuple reply; + + nf_ct_invert_tuple(&reply, tuple); + thash = nf_conntrack_find_get(net, zone, &reply); + if (!thash) /* clashing entry went away */ + return false; + } ct = nf_ct_tuplehash_to_ctrack(thash); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 9b2d7463d3d326..df0798da2329b9 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -19,10 +19,16 @@ struct nft_quota { }; static inline bool nft_overquota(struct nft_quota *priv, - const struct sk_buff *skb) + const struct sk_buff *skb, + bool *report) { - return atomic64_add_return(skb->len, priv->consumed) >= - atomic64_read(&priv->quota); + u64 consumed = atomic64_add_return(skb->len, priv->consumed); + u64 quota = atomic64_read(&priv->quota); + + if (report) + *report = consumed >= quota; + + return consumed > quota; } static inline bool nft_quota_invert(struct nft_quota *priv) @@ -34,7 +40,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv)) + if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; } @@ -51,13 +57,13 @@ static void nft_quota_obj_eval(struct nft_object *obj, const struct nft_pktinfo *pkt) { struct nft_quota *priv = nft_obj_data(obj); - bool overquota; + bool overquota, report; - overquota = nft_overquota(priv, pkt->skb); + overquota = nft_overquota(priv, pkt->skb, &report); if (overquota ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; - if (overquota && + if (report && !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0, NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7be342b495f5f7..0529e4ef752070 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -683,6 +683,30 @@ static int pipapo_realloc_mt(struct nft_pipapo_field *f, return 0; } + +/** + * lt_calculate_size() - Get storage size for lookup table with overflow check + * @groups: Amount of bit groups + * @bb: Number of bits grouped together in lookup table buckets + * @bsize: Size of each bucket in lookup table, in longs + * + * Return: allocation size including alignment overhead, negative on overflow + */ +static ssize_t lt_calculate_size(unsigned int groups, unsigned int bb, + unsigned int bsize) +{ + ssize_t ret = groups * NFT_PIPAPO_BUCKETS(bb) * sizeof(long); + + if (check_mul_overflow(ret, bsize, &ret)) + return -1; + if (check_add_overflow(ret, NFT_PIPAPO_ALIGN_HEADROOM, &ret)) + return -1; + if (ret > INT_MAX) + return -1; + + return ret; +} + /** * pipapo_resize() - Resize lookup or mapping table, or both * @f: Field containing lookup and mapping tables @@ -701,6 +725,7 @@ static int pipapo_resize(struct nft_pipapo_field *f, long *new_lt = NULL, *new_p, *old_lt = f->lt, *old_p; unsigned int new_bucket_size, copy; int group, bucket, err; + ssize_t lt_size; if (rules >= NFT_PIPAPO_RULE0_MAX) return -ENOSPC; @@ -719,10 +744,11 @@ static int pipapo_resize(struct nft_pipapo_field *f, else copy = new_bucket_size; - new_lt = kvzalloc(f->groups * NFT_PIPAPO_BUCKETS(f->bb) * - new_bucket_size * sizeof(*new_lt) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL); + lt_size = lt_calculate_size(f->groups, f->bb, new_bucket_size); + if (lt_size < 0) + return -ENOMEM; + + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) return -ENOMEM; @@ -907,7 +933,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) { unsigned int groups, bb; unsigned long *new_lt; - size_t lt_size; + ssize_t lt_size; lt_size = f->groups * NFT_PIPAPO_BUCKETS(f->bb) * f->bsize * sizeof(*f->lt); @@ -917,15 +943,17 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) groups = f->groups * 2; bb = NFT_PIPAPO_GROUP_BITS_LARGE_SET; - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * - sizeof(*f->lt); + lt_size = lt_calculate_size(groups, bb, f->bsize); + if (lt_size < 0) + return; } else if (f->bb == NFT_PIPAPO_GROUP_BITS_LARGE_SET && lt_size < NFT_PIPAPO_LT_SIZE_LOW) { groups = f->groups / 2; bb = NFT_PIPAPO_GROUP_BITS_SMALL_SET; - lt_size = groups * NFT_PIPAPO_BUCKETS(bb) * f->bsize * - sizeof(*f->lt); + lt_size = lt_calculate_size(groups, bb, f->bsize); + if (lt_size < 0) + return; /* Don't increase group width if the resulting lookup table size * would exceed the upper size threshold for a "small" set. @@ -936,7 +964,7 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) return; } - new_lt = kvzalloc(lt_size + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL_ACCOUNT); + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) return; @@ -1451,13 +1479,15 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) for (i = 0; i < old->field_count; i++) { unsigned long *new_lt; + ssize_t lt_size; memcpy(dst, src, offsetof(struct nft_pipapo_field, lt)); - new_lt = kvzalloc(src->groups * NFT_PIPAPO_BUCKETS(src->bb) * - src->bsize * sizeof(*dst->lt) + - NFT_PIPAPO_ALIGN_HEADROOM, - GFP_KERNEL_ACCOUNT); + lt_size = lt_calculate_size(src->groups, src->bb, src->bsize); + if (lt_size < 0) + goto out_lt; + + new_lt = kvzalloc(lt_size, GFP_KERNEL_ACCOUNT); if (!new_lt) goto out_lt; diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index b8d3c3213efee5..be7c16c79f711e 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -994,8 +994,9 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, NFT_PIPAPO_AVX2_BUCKET_LOAD8(5, lt, 8, pkt[8], bsize); NFT_PIPAPO_AVX2_AND(6, 2, 3); + NFT_PIPAPO_AVX2_AND(3, 4, 7); NFT_PIPAPO_AVX2_BUCKET_LOAD8(7, lt, 9, pkt[9], bsize); - NFT_PIPAPO_AVX2_AND(0, 4, 5); + NFT_PIPAPO_AVX2_AND(0, 3, 5); NFT_PIPAPO_AVX2_BUCKET_LOAD8(1, lt, 10, pkt[10], bsize); NFT_PIPAPO_AVX2_AND(2, 6, 7); NFT_PIPAPO_AVX2_BUCKET_LOAD8(3, lt, 11, pkt[11], bsize); @@ -1112,6 +1113,25 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features, return true; } +/** + * pipapo_resmap_init_avx2() - Initialise result map before first use + * @m: Matching data, including mapping table + * @res_map: Result map + * + * Like pipapo_resmap_init() but do not set start map bits covered by the first field. + */ +static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, unsigned long *res_map) +{ + const struct nft_pipapo_field *f = m->f; + int i; + + /* Starting map doesn't need to be set to all-ones for this implementation, + * but we do need to zero the remaining bits, if any. + */ + for (i = f->bsize; i < m->bsize_max; i++) + res_map[i] = 0ul; +} + /** * nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation * @net: Network namespace @@ -1170,7 +1190,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, res = scratch->map + (map_index ? m->bsize_max : 0); fill = scratch->map + (map_index ? 0 : m->bsize_max); - /* Starting map doesn't need to be set for this implementation */ + pipapo_resmap_init_avx2(m, res); nft_pipapo_avx2_prepare(); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 0c63d1367cf7a7..a12486ae089d6f 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -621,10 +621,10 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, struct geneve_opt *opt; int offset = 0; - inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); - if (!inner) - goto failure; while (opts->len > offset) { + inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE); + if (!inner) + goto failure; opt = (struct geneve_opt *)(opts->u.data + offset); if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS, opt->opt_class) || @@ -634,8 +634,8 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, opt->length * 4, opt->opt_data)) goto inner_failure; offset += sizeof(*opt) + opt->length * 4; + nla_nest_end(skb, inner); } - nla_nest_end(skb, inner); } nla_nest_end(skb, nest); return 0; diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 30e99464171b7b..93f064306901c0 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -91,7 +91,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par) return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb)); } -#if IS_ENABLED(CONFIG_IP6_NF_MANGLE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static unsigned int tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par) { @@ -119,7 +119,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_tcpoptstrip_target_info), .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IP6_NF_MANGLE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "TCPOPTSTRIP", .family = NFPROTO_IPV6, diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 65b965ca40ea7e..59b9d04400cac2 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -48,7 +48,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = { .targetsize = sizeof(struct xt_mark_tginfo2), .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) +#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) || IS_ENABLED(CONFIG_NFT_COMPAT_ARP) { .name = "MARK", .revision = 2, diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index cd9160bbc91974..33b77084a4e5f3 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -1165,6 +1165,11 @@ int netlbl_conn_setattr(struct sock *sk, break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: + if (sk->sk_family != AF_INET6) { + ret_val = -EAFNOSUPPORT; + goto conn_setattr_return; + } + addr6 = (struct sockaddr_in6 *)addr; entry = netlbl_domhsh_getentry_af6(secattr->domain, &addr6->sin6_addr); diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 2535f3f9f4623b..5481bd561eb414 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -11,7 +11,7 @@ config OPENVSWITCH (!NF_NAT || NF_NAT) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) depends on PSAMPLE || !PSAMPLE - select LIBCRC32C + select CRC32 select MPLS select NET_MPLS_GSO select DST_CACHE diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 61fea7baae5d5c..2f22ca59586f25 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -975,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 8a848ce72e2910..b80bd3a9077397 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -788,7 +788,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ipv4, 0, sizeof(key->ipv4)); } } else if (eth_p_mpls(key->eth.type)) { - u8 label_count = 1; + size_t label_count = 1; memset(&key->mpls, 0, sizeof(key->mpls)); skb_set_inner_network_header(skb, skb->mac_len); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 95e0dd14dc1a32..518be23e48ea93 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2876,7 +2876,8 @@ static int validate_set(const struct nlattr *a, size_t key_len; /* There can be only one key in a action */ - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + if (!nla_ok(ovs_key, nla_len(a)) || + nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; key_len = nla_len(ovs_key); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d4dba06297c33e..20be2c47cf4191 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3713,15 +3713,15 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, } static void packet_dev_mclist_delete(struct net_device *dev, - struct packet_mclist **mlp) + struct packet_mclist **mlp, + struct list_head *list) { struct packet_mclist *ml; while ((ml = *mlp) != NULL) { if (ml->ifindex == dev->ifindex) { - packet_dev_mc(dev, ml, -1); + list_add(&ml->remove_list, list); *mlp = ml->next; - kfree(ml); } else mlp = &ml->next; } @@ -3769,6 +3769,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) memcpy(i->addr, mreq->mr_address, i->alen); memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); i->count = 1; + INIT_LIST_HEAD(&i->remove_list); i->next = po->mclist; po->mclist = i; err = packet_dev_mc(dev, i, 1); @@ -4233,9 +4234,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { - struct sock *sk; struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); + struct packet_mclist *ml, *tmp; + LIST_HEAD(mclist); + struct sock *sk; rcu_read_lock(); sk_for_each_rcu(sk, &net->packet.sklist) { @@ -4244,7 +4247,8 @@ static int packet_notifier(struct notifier_block *this, switch (msg) { case NETDEV_UNREGISTER: if (po->mclist) - packet_dev_mclist_delete(dev, &po->mclist); + packet_dev_mclist_delete(dev, &po->mclist, + &mclist); fallthrough; case NETDEV_DOWN: @@ -4277,6 +4281,13 @@ static int packet_notifier(struct notifier_block *this, } } rcu_read_unlock(); + + /* packet_dev_mc might grab instance locks so can't run under rcu */ + list_for_each_entry_safe(ml, tmp, &mclist, remove_list) { + packet_dev_mc(dev, ml, -1); + kfree(ml); + } + return NOTIFY_DONE; } diff --git a/net/packet/internal.h b/net/packet/internal.h index d5d70712007ad3..1e743d0316fdda 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -11,6 +11,7 @@ struct packet_mclist { unsigned short type; unsigned short alen; unsigned char addr[MAX_ADDR_LEN]; + struct list_head remove_list; }; /* kbdq - kernel block descriptor queue */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 8180d0c12fceaf..a800127effcd73 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -784,7 +784,7 @@ config NET_ACT_SKBEDIT config NET_ACT_CSUM tristate "Checksum Updating" depends on NET_CLS_ACT && INET - select LIBCRC32C + select CRC32 help Say Y here to update some common checksum after some direct packet alterations. diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4f648af8cfaafe..ecec0a1e1c1a07 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2057,6 +2057,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); + int ret = -EMSGSIZE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) @@ -2101,11 +2102,45 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, return skb->len; +cls_op_not_supp: + ret = -EOPNOTSUPP; out_nlmsg_trim: nla_put_failure: -cls_op_not_supp: nlmsg_trim(skb, b); - return -1; + return ret; +} + +static struct sk_buff *tfilter_notify_prep(struct net *net, + struct sk_buff *oskb, + struct nlmsghdr *n, + struct tcf_proto *tp, + struct tcf_block *block, + struct Qdisc *q, u32 parent, + void *fh, int event, + u32 portid, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + unsigned int size = oskb ? max(NLMSG_GOODSIZE, oskb->len) : NLMSG_GOODSIZE; + struct sk_buff *skb; + int ret; + +retry: + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOBUFS); + + ret = tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, + n->nlmsg_seq, n->nlmsg_flags, event, false, + rtnl_held, extack); + if (ret <= 0) { + kfree_skb(skb); + if (ret == -EMSGSIZE) { + size += NLMSG_GOODSIZE; + goto retry; + } + return ERR_PTR(-EINVAL); + } + return skb; } static int tfilter_notify(struct net *net, struct sk_buff *oskb, @@ -2121,16 +2156,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return 0; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, event, - false, rtnl_held, extack) <= 0) { - kfree_skb(skb); - return -EINVAL; - } + skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, event, + portid, rtnl_held, extack); + if (IS_ERR(skb)) + return PTR_ERR(skb); if (unicast) err = rtnl_unicast(skb, net, portid); @@ -2153,16 +2182,11 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return tp->ops->delete(tp, fh, last, rtnl_held, extack); - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, - false, rtnl_held, extack) <= 0) { + skb = tfilter_notify_prep(net, oskb, n, tp, block, q, parent, fh, + RTM_DELTFILTER, portid, rtnl_held, extack); + if (IS_ERR(skb)) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); - kfree_skb(skb); - return -EINVAL; + return PTR_ERR(skb); } err = tp->ops->delete(tp, fh, last, rtnl_held, extack); diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 81189d02fee761..c93761040c6e77 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -65,10 +65,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch) &q->stats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->stats.drop_count && sch->q.qlen) { + if (q->stats.drop_count) { qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len); q->stats.drop_count = 0; q->stats.drop_len = 0; @@ -147,7 +144,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt, qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + struct sk_buff *skb = qdisc_dequeue_internal(sch, true); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index c69b999fae171c..9b6d79bd873712 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -35,6 +35,11 @@ struct drr_sched { struct Qdisc_class_hash clhash; }; +static bool cl_is_active(struct drr_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) { struct drr_sched *q = qdisc_priv(sch); @@ -105,6 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOBUFS; gnet_stats_basic_sync_init(&cl->bstats); + INIT_LIST_HEAD(&cl->alist); cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -229,7 +235,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - list_del(&cl->alist); + list_del_init(&cl->alist); } static int drr_dump_class(struct Qdisc *sch, unsigned long arg, @@ -336,7 +342,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; - bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -346,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -356,7 +360,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first) { + if (!cl_is_active(cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } @@ -390,7 +394,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) if (unlikely(skb == NULL)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); @@ -431,7 +435,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { if (cl->qdisc->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); qdisc_reset(cl->qdisc); } } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 516038a4416380..037f764822b965 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -74,6 +74,11 @@ static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = { [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 }, }; +static bool cl_is_active(struct ets_class *cl) +{ + return !list_empty(&cl->alist); +} + static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr, unsigned int *quantum, struct netlink_ext_ack *extack) @@ -293,7 +298,7 @@ static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg) * to remove them. */ if (!ets_class_is_strict(q, cl) && sch->q.qlen) - list_del(&cl->alist); + list_del_init(&cl->alist); } static int ets_class_dump(struct Qdisc *sch, unsigned long arg, @@ -416,7 +421,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct ets_sched *q = qdisc_priv(sch); struct ets_class *cl; int err = 0; - bool first; cl = ets_classify(skb, sch, &err); if (!cl) { @@ -426,7 +430,6 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -436,7 +439,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (first && !ets_class_is_strict(q, cl)) { + if (!cl_is_active(cl) && !ets_class_is_strict(q, cl)) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } @@ -488,7 +491,7 @@ static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch) if (unlikely(!skb)) goto out; if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del_init(&cl->alist); return ets_qdisc_dequeue_skb(sch, skb); } @@ -657,8 +660,8 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, } for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) - list_del(&q->classes[i].alist); - qdisc_tree_flush_backlog(q->classes[i].qdisc); + list_del_init(&q->classes[i].alist); + qdisc_purge_queue(q->classes[i].qdisc); } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); @@ -713,7 +716,7 @@ static void ets_qdisc_reset(struct Qdisc *sch) for (band = q->nstrict; band < q->nbands; band++) { if (q->classes[band].qdisc->q.qlen) - list_del(&q->classes[band].alist); + list_del_init(&q->classes[band].alist); } for (band = 0; band < q->nbands; band++) qdisc_reset(q->classes[band].qdisc); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 2ca5332cfcc5c5..902ff54706072b 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -1136,7 +1136,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); } while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = fq_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); if (!skb) break; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 799f5397ad4c17..2a0f3a513bfaa1 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -315,10 +315,8 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) } qdisc_bstats_update(sch, skb); flow->deficit -= qdisc_pkt_len(skb); - /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0, - * or HTB crashes. Defer it for next round. - */ - if (q->cstats.drop_count && sch->q.qlen) { + + if (q->cstats.drop_count) { qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); q->cstats.drop_count = 0; @@ -443,7 +441,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, while (sch->q.qlen > sch->limit || q->memory_usage > q->memory_limit) { - struct sk_buff *skb = fq_codel_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); q->cstats.drop_len += qdisc_pkt_len(skb); rtnl_kfree_skbs(skb, skb); diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index f3b8203d3e855b..df7fac95ab1513 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -366,7 +366,7 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt, /* Drop excess packets if new limit is lower */ while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = fq_pie_qdisc_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); len_dropped += qdisc_pkt_len(skb); num_dropped += 1; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c287bf8423b47b..5a7745170e84b1 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -175,6 +175,11 @@ struct hfsc_sched { #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ +static bool cl_in_el_or_vttree(struct hfsc_class *cl) +{ + return ((cl->cl_flags & HFSC_FSC) && cl->cl_nactive) || + ((cl->cl_flags & HFSC_RSC) && !RB_EMPTY_NODE(&cl->el_node)); +} /* * eligible tree holds backlogged classes being sorted by their eligible times. @@ -203,7 +208,10 @@ eltree_insert(struct hfsc_class *cl) static inline void eltree_remove(struct hfsc_class *cl) { - rb_erase(&cl->el_node, &cl->sched->eligible); + if (!RB_EMPTY_NODE(&cl->el_node)) { + rb_erase(&cl->el_node, &cl->sched->eligible); + RB_CLEAR_NODE(&cl->el_node); + } } static inline void @@ -958,6 +966,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl != NULL) { int old_flags; + int len = 0; if (parentid) { if (cl->cl_parent && @@ -988,9 +997,13 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (usc != NULL) hfsc_change_usc(cl, usc, cur_time); + if (cl->qdisc->q.qlen != 0) + len = qdisc_peek_len(cl->qdisc); + /* Check queue length again since some qdisc implementations + * (e.g., netem/codel) might empty the queue during the peek + * operation. + */ if (cl->qdisc->q.qlen != 0) { - int len = qdisc_peek_len(cl->qdisc); - if (cl->cl_flags & HFSC_RSC) { if (old_flags & HFSC_RSC) update_ed(cl, len); @@ -1032,6 +1045,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; + RB_CLEAR_NODE(&cl->el_node); + err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); @@ -1220,7 +1235,8 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. */ - update_vf(cl, 0, 0); + if (cl->cl_nactive) + update_vf(cl, 0, 0); if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); } @@ -1560,7 +1576,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (first) { + sch->qstats.backlog += len; + sch->q.qlen++; + + if (first && !cl_in_el_or_vttree(cl)) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) @@ -1575,9 +1594,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } - sch->qstats.backlog += len; - sch->q.qlen++; - return NET_XMIT_SUCCESS; } @@ -1632,10 +1648,16 @@ hfsc_dequeue(struct Qdisc *sch) if (cl->qdisc->q.qlen != 0) { /* update ed */ next_len = qdisc_peek_len(cl->qdisc); - if (realtime) - update_ed(cl, next_len); - else - update_d(cl, next_len); + /* Check queue length again since some qdisc implementations + * (e.g., netem/codel) might empty the queue during the peek + * operation. + */ + if (cl->qdisc->q.qlen != 0) { + if (realtime) + update_ed(cl, next_len); + else + update_d(cl, next_len); + } } else { /* the class becomes passive */ eltree_remove(cl); diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 44d9efe1a96a89..5aa434b4670738 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -564,7 +564,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, qlen = sch->q.qlen; prev_backlog = sch->qstats.backlog; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = hhf_dequeue(sch); + struct sk_buff *skb = qdisc_dequeue_internal(sch, false); rtnl_kfree_skbs(skb, skb); } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index c31bc5489bddc0..14bf71f570570f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -348,7 +348,8 @@ static void htb_add_to_wait_tree(struct htb_sched *q, */ static inline void htb_next_rb_node(struct rb_node **n) { - *n = rb_next(*n); + if (*n) + *n = rb_next(*n); } /** @@ -609,8 +610,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) */ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) { - WARN_ON(!cl->prio_activity); - + if (!cl->prio_activity) + return; htb_deactivate_prios(q, cl); cl->prio_activity = 0; } @@ -1738,8 +1739,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->parent) cl->parent->children--; - if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) htb_safe_rb_erase(&cl->pq_node, @@ -1947,8 +1947,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_purge_queue(parent->leaf.q); parent_qdisc = parent->leaf.q; - if (parent->prio_activity) - htb_deactivate(q, parent); + htb_deactivate(q, parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 3771d000b30d08..ff49a6c97033aa 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -195,7 +195,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt, /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + struct sk_buff *skb = qdisc_dequeue_internal(sch, true); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index cc30f7a32f1a78..9e2b9a490db23d 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < oldbands; i++) - qdisc_tree_flush_backlog(q->queues[i]); + qdisc_purge_queue(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 2cfbc977fe6d0b..bf1282cb22ebae 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -202,6 +202,11 @@ struct qfq_sched { */ enum update_reason {enqueue, requeue}; +static bool cl_is_active(struct qfq_class *cl) +{ + return !list_empty(&cl->alist); +} + static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) { struct qfq_sched *q = qdisc_priv(sch); @@ -347,7 +352,7 @@ static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) struct qfq_aggregate *agg = cl->agg; - list_del(&cl->alist); /* remove from RR queue of the aggregate */ + list_del_init(&cl->alist); /* remove from RR queue of the aggregate */ if (list_empty(&agg->active)) /* agg is now inactive */ qfq_deactivate_agg(q, agg); } @@ -474,6 +479,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, gnet_stats_basic_sync_init(&cl->bstats); cl->common.classid = classid; cl->deficit = lmax; + INIT_LIST_HEAD(&cl->alist); cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, NULL); @@ -982,7 +988,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, cl->deficit -= (int) len; if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */ - list_del(&cl->alist); + list_del_init(&cl->alist); else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) { cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); @@ -1214,7 +1220,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; - bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1236,7 +1241,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; - first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1252,8 +1256,8 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, ++sch->q.qlen; agg = cl->agg; - /* if the queue was not empty, then done here */ - if (!first) { + /* if the class is active, then done here */ + if (cl_is_active(cl)) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) == cl && cl->deficit < len) @@ -1415,6 +1419,8 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; + if (list_empty(&cl->alist)) + return; qfq_deactivate_class(q, cl); } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 1ba3e0bba54f0c..4696c893cf553c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, q->userbits = userbits; q->limit = ctl->limit; if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old_child = q->qdisc; q->qdisc = child; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 9ed197e9639626..77fa02f2bfcd56 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -310,7 +310,10 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ x = q->tail->next; slot = &q->slots[x]; - q->tail->next = slot->next; + if (slot->next == x) + q->tail = NULL; /* no more active slots */ + else + q->tail->next = slot->next; q->ht[slot->hash] = SFQ_EMPTY_SLOT; goto drop; } @@ -631,6 +634,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, struct red_parms *p = NULL; struct sk_buff *to_free = NULL; struct sk_buff *tail = NULL; + unsigned int maxflows; + unsigned int quantum; + unsigned int divisor; + int perturb_period; + u8 headdrop; + u8 maxdepth; + int limit; + u8 flags; + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; @@ -652,40 +664,65 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, if (!p) return -ENOMEM; } - if (ctl->limit == 1) { - NL_SET_ERR_MSG_MOD(extack, "invalid limit"); - return -EINVAL; - } + sch_tree_lock(sch); + + limit = q->limit; + divisor = q->divisor; + headdrop = q->headdrop; + maxdepth = q->maxdepth; + maxflows = q->maxflows; + perturb_period = q->perturb_period; + quantum = q->quantum; + flags = q->flags; + + /* update and validate configuration */ if (ctl->quantum) - q->quantum = ctl->quantum; - WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); + quantum = ctl->quantum; + perturb_period = ctl->perturb_period * HZ; if (ctl->flows) - q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { - q->divisor = ctl->divisor; - q->maxflows = min_t(u32, q->maxflows, q->divisor); + divisor = ctl->divisor; + maxflows = min_t(u32, maxflows, divisor); } if (ctl_v1) { if (ctl_v1->depth) - q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); if (p) { - swap(q->red_parms, p); - red_set_parms(q->red_parms, + red_set_parms(p, ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Plog, ctl_v1->Scell_log, NULL, ctl_v1->max_P); } - q->flags = ctl_v1->flags; - q->headdrop = ctl_v1->headdrop; + flags = ctl_v1->flags; + headdrop = ctl_v1->headdrop; } if (ctl->limit) { - q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); - q->maxflows = min_t(u32, q->maxflows, q->limit); + limit = min_t(u32, ctl->limit, maxdepth * maxflows); + maxflows = min_t(u32, maxflows, limit); + } + if (limit == 1) { + sch_tree_unlock(sch); + kfree(p); + NL_SET_ERR_MSG_MOD(extack, "invalid limit"); + return -EINVAL; } + /* commit configuration */ + q->limit = limit; + q->divisor = divisor; + q->headdrop = headdrop; + q->maxdepth = maxdepth; + q->maxflows = maxflows; + WRITE_ONCE(q->perturb_period, perturb_period); + q->quantum = quantum; + q->flags = flags; + if (p) + swap(q->red_parms, p); + qlen = sch->q.qlen; while (sch->q.qlen > q->limit) { dropped += sfq_drop(sch, &to_free); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index dc26b22d53c734..4c977f049670a6 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old = q->qdisc; q->qdisc = child; } diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 5da599ff84a90f..d18a72df3654eb 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -7,10 +7,10 @@ menuconfig IP_SCTP tristate "The SCTP Protocol" depends on INET depends on IPV6 || IPV6=n + select CRC32 select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 - select LIBCRC32C select NET_UDP_TUNNEL help Stream Control Transmission Protocol diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 36ee34f483d703..53725ee7ba06d7 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -72,8 +72,9 @@ /* Forward declarations for internal helper functions. */ static bool sctp_writeable(const struct sock *sk); static void sctp_wfree(struct sk_buff *skb); -static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len); +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, + struct sctp_transport *transport, + long *timeo_p, size_t msg_len); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -1828,7 +1829,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, if (sctp_wspace(asoc) <= 0 || !sk_wmem_schedule(sk, msg_len)) { timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + err = sctp_wait_for_sndbuf(asoc, transport, &timeo, msg_len); if (err) goto err; if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) { @@ -9214,8 +9215,9 @@ void sctp_sock_rfree(struct sk_buff *skb) /* Helper function to wait for space in the sndbuf. */ -static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len) +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, + struct sctp_transport *transport, + long *timeo_p, size_t msg_len) { struct sock *sk = asoc->base.sk; long current_timeo = *timeo_p; @@ -9225,7 +9227,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, *timeo_p, msg_len); - /* Increment the association's refcnt. */ + /* Increment the transport and association's refcnt. */ + if (transport) + sctp_transport_hold(transport); sctp_association_hold(asoc); /* Wait on the association specific sndbuf space. */ @@ -9234,7 +9238,7 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, TASK_INTERRUPTIBLE); if (asoc->base.dead) goto do_dead; - if (!*timeo_p) + if ((!*timeo_p) || (transport && transport->dead)) goto do_nonblock; if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING) goto do_error; @@ -9259,7 +9263,9 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, out: finish_wait(&asoc->wait, &wait); - /* Release the association's refcnt. */ + /* Release the transport and association's refcnt. */ + if (transport) + sctp_transport_put(transport); sctp_association_put(asoc); return err; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 59675f6d9e7d84..6946c14627931d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -117,6 +117,8 @@ struct sctp_transport *sctp_transport_new(struct net *net, */ void sctp_transport_free(struct sctp_transport *transport) { + transport->dead = 1; + /* Try to delete the heartbeat timer. */ if (timer_delete(&transport->hb_timer)) sctp_transport_put(transport); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3e6cb35baf25af..3760131f148450 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -362,6 +362,9 @@ static void smc_destruct(struct sock *sk) return; } +static struct lock_class_key smc_key; +static struct lock_class_key smc_slock_key; + void smc_sk_init(struct net *net, struct sock *sk, int protocol) { struct smc_sock *smc = smc_sk(sk); @@ -375,6 +378,8 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol) INIT_WORK(&smc->connect_work, smc_connect_work); INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); INIT_LIST_HEAD(&smc->accept_q); + sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key, + "sk_lock-AF_SMC", &smc_key); spin_lock_init(&smc->accept_q_lock); spin_lock_init(&smc->conn.send_lock); sk->sk_prot->hash(sk); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 004cdb59f0106c..7ce5e28a6c0316 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1536,13 +1536,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. - * - * If someone frequently calls the flush interface, we should - * immediately clean the corresponding cache_detail instead of - * continuously accumulating nextcheck. */ - if (cd->flush_time >= now && cd->flush_time < (now + 5)) + if (cd->flush_time >= now) now = cd->flush_time + 1; cd->flush_time = now; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index aca8bdf65d729f..ca6172822b68ae 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -406,12 +406,12 @@ static void svc_rdma_xprt_done(struct rpcrdma_notification *rn) */ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) { + unsigned int ctxts, rq_depth, maxpayload; struct svcxprt_rdma *listen_rdma; struct svcxprt_rdma *newxprt = NULL; struct rdma_conn_param conn_param; struct rpcrdma_connect_private pmsg; struct ib_qp_init_attr qp_attr; - unsigned int ctxts, rq_depth; struct ib_device *dev; int ret = 0; RPC_IFDEBUG(struct sockaddr *sap); @@ -462,12 +462,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_bc_requests = 2; } - /* Arbitrarily estimate the number of rw_ctxs needed for - * this transport. This is enough rw_ctxs to make forward - * progress even if the client is using one rkey per page - * in each Read chunk. + /* Arbitrary estimate of the needed number of rdma_rw contexts. */ - ctxts = 3 * RPCSVC_MAXPAGES; + maxpayload = min(xprt->xpt_server->sv_max_payload, + RPCSVC_MAXPAYLOAD_RDMA); + ctxts = newxprt->sc_max_requests * 3 * + rdma_rw_mr_factor(dev, newxprt->sc_port_num, + maxpayload >> PAGE_SHIFT); + newxprt->sc_sq_depth = rq_depth + ctxts; if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) newxprt->sc_sq_depth = dev->attrs.max_qp_wr; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c524421ec65252..79f91b6ca8c847 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -817,12 +817,20 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, goto exit; } + /* Get net to avoid freed tipc_crypto when delete namespace */ + if (!maybe_get_net(aead->crypto->net)) { + tipc_bearer_put(b); + rc = -ENODEV; + goto exit; + } + /* Now, do encrypt */ rc = crypto_aead_encrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) return rc; tipc_bearer_put(b); + put_net(aead->crypto->net); exit: kfree(ctx); @@ -860,6 +868,7 @@ static void tipc_aead_encrypt_done(void *data, int err) kfree(tx_ctx); tipc_bearer_put(b); tipc_aead_put(aead); + put_net(net); } /** diff --git a/net/tipc/link.c b/net/tipc/link.c index 50c2e0846ea4df..18be6ff4c3db0a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1046,6 +1046,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { pr_warn("%s<%s>, link overflow", link_rst_msg, l->name); + __skb_queue_purge(list); return -ENOBUFS; } rc = link_schedule_user(l, hdr); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index e2f19627e43d52..b45c5b91bc7afb 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -716,7 +716,8 @@ void tipc_mon_reinit_self(struct net *net) if (!mon) continue; write_lock_bh(&mon->lock); - mon->self->addr = tipc_own_addr(net); + if (mon->self) + mon->self->addr = tipc_own_addr(net); write_unlock_bh(&mon->lock); } } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index cb86b0bf9a53e1..a3ccb3135e51ac 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -852,6 +852,11 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, return do_tls_setsockopt(sk, optname, optval, optlen); } +static int tls_disconnect(struct sock *sk, int flags) +{ + return -EOPNOTSUPP; +} + struct tls_context *tls_ctx_create(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -947,6 +952,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_BASE] = *base; prot[TLS_BASE][TLS_BASE].setsockopt = tls_setsockopt; prot[TLS_BASE][TLS_BASE].getsockopt = tls_getsockopt; + prot[TLS_BASE][TLS_BASE].disconnect = tls_disconnect; prot[TLS_BASE][TLS_BASE].close = tls_sk_proto_close; prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index 77e33e1e340e31..65b0da6fdf6a79 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -396,7 +396,6 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) return 0; shinfo = skb_shinfo(strp->anchor); - shinfo->frag_list = NULL; /* If we don't know the length go max plus page for cipher overhead */ need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE; @@ -412,6 +411,8 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort) page, 0, 0); } + shinfo->frag_list = NULL; + strp->copy_mode = 1; strp->stm.offset = 0; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 914d4e1516a3cd..fc88e34b7f33fe 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -908,6 +908,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, &msg_redir, send, flags); lock_sock(sk); if (err < 0) { + /* Regardless of whether the data represented by + * msg_redir is sent successfully, we have already + * uncharged it via sk_msg_return_zero(). The + * msg->sg.size represents the remaining unprocessed + * data, which needs to be uncharged here. + */ + sk_mem_uncharge(sk, msg->sg.size); *copied -= sk_msg_free_nocharge(sk, &msg_redir); msg->sg.size = 0; } @@ -1120,9 +1127,13 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, num_async++; else if (ret == -ENOMEM) goto wait_for_memory; - else if (ctx->open_rec && ret == -ENOSPC) + else if (ctx->open_rec && ret == -ENOSPC) { + if (msg_pl->cork_bytes) { + ret = 0; + goto send_end; + } goto rollback_iter; - else if (ret != -EAGAIN) + } else if (ret != -EAGAIN) goto send_end; } continue; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 7f7de6d8809655..2c9b1011cdcc80 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -441,18 +441,20 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - if (vvs->rx_bytes + len > vvs->buf_alloc) + if (vvs->buf_used + len > vvs->buf_alloc) return false; vvs->rx_bytes += len; + vvs->buf_used += len; return true; } static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, - u32 len) + u32 bytes_read, u32 bytes_dequeued) { - vvs->rx_bytes -= len; - vvs->fwd_cnt += len; + vvs->rx_bytes -= bytes_read; + vvs->buf_used -= bytes_dequeued; + vvs->fwd_cnt += bytes_dequeued; } void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb) @@ -581,11 +583,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, size_t len) { struct virtio_vsock_sock *vvs = vsk->trans; - size_t bytes, total = 0; struct sk_buff *skb; u32 fwd_cnt_delta; bool low_rx_bytes; int err = -EFAULT; + size_t total = 0; u32 free_space; spin_lock_bh(&vvs->rx_lock); @@ -597,6 +599,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, } while (total < len && !skb_queue_empty(&vvs->rx_queue)) { + size_t bytes, dequeued = 0; + skb = skb_peek(&vvs->rx_queue); bytes = min_t(size_t, len - total, @@ -620,12 +624,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { - u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); - - virtio_transport_dec_rx_pkt(vvs, pkt_len); + dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len); __skb_unlink(skb, &vvs->rx_queue); consume_skb(skb); } + + virtio_transport_dec_rx_pkt(vvs, bytes, dequeued); } fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; @@ -781,7 +785,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, msg->msg_flags |= MSG_EOR; } - virtio_transport_dec_rx_pkt(vvs, pkt_len); + virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); kfree_skb(skb); } @@ -1735,6 +1739,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto struct sock *sk = sk_vsock(vsk); struct virtio_vsock_hdr *hdr; struct sk_buff *skb; + u32 pkt_len; int off = 0; int err; @@ -1752,7 +1757,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count--; - virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len)); + pkt_len = le32_to_cpu(hdr->len); + virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len); spin_unlock_bh(&vvs->rx_lock); virtio_transport_send_credit_update(vsk); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9865f305275da6..e8a4fe44ec2d80 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2681,7 +2681,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, /* Required length for first defragmentation */ buf_len = mle->datalen - 1; for_each_element(elem, mle->data + mle->datalen, - ielen - sizeof(*mle) + mle->datalen) { + ie + ielen - mle->data - mle->datalen) { if (elem->id != WLAN_EID_FRAGMENT) break; @@ -3250,6 +3250,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const u8 *ie; size_t ielen; u64 tsf; + size_t s1g_optional_len; if (WARN_ON(!mgmt)) return NULL; @@ -3264,12 +3265,11 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (void *) mgmt; - if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_short_beacon.variable); - else - min_hdr_len = offsetof(struct ieee80211_ext, - u.s1g_beacon.variable); + s1g_optional_len = + ieee80211_s1g_optional_len(ext->frame_control); + min_hdr_len = + offsetof(struct ieee80211_ext, u.s1g_beacon.variable) + + s1g_optional_len; } else { /* same for beacons */ min_hdr_len = offsetof(struct ieee80211_mgmt, @@ -3285,11 +3285,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct ieee80211_s1g_bcn_compat_ie *compat; const struct element *elem; - if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) - ie = ext->u.s1g_short_beacon.variable; - else - ie = ext->u.s1g_beacon.variable; - + ie = ext->u.s1g_beacon.variable + s1g_optional_len; elem = cfg80211_find_elem(WLAN_EID_S1G_BCN_COMPAT, ie, ielen); if (!elem) return NULL; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 5696af45bcf711..72c000c0ae5f57 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -338,13 +338,14 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) u32 len = xdp_get_buff_len(xdp); int err; - spin_lock_bh(&xs->rx_lock); err = xsk_rcv_check(xs, xdp, len); if (!err) { + spin_lock_bh(&xs->pool->rx_lock); err = __xsk_rcv(xs, xdp, len); xsk_flush(xs); + spin_unlock_bh(&xs->pool->rx_lock); } - spin_unlock_bh(&xs->rx_lock); + return err; } @@ -1303,7 +1304,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->queue_id = qid; xp_add_xsk(xs->pool, xs); - if (xs->zc && qid < dev->real_num_rx_queues) { + if (qid < dev->real_num_rx_queues) { struct netdev_rx_queue *rxq; rxq = __netif_get_rx_queue(dev, qid); @@ -1734,7 +1735,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs = xdp_sk(sk); xs->state = XSK_READY; mutex_init(&xs->mutex); - spin_lock_init(&xs->rx_lock); INIT_LIST_HEAD(&xs->map_list); spin_lock_init(&xs->map_list_lock); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 25a76c5ce0f128..c5181a9044add5 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -89,6 +89,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->addrs = umem->addrs; pool->tx_metadata_len = umem->tx_metadata_len; pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; + spin_lock_init(&pool->rx_lock); INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index fe82e2d073006e..fc7a603b04f130 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -171,8 +171,10 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb) struct espintcp_ctx *ctx = espintcp_getctx(sk); if (skb_queue_len(&ctx->out_queue) >= - READ_ONCE(net_hotdata.max_backlog)) + READ_ONCE(net_hotdata.max_backlog)) { + kfree_skb(skb); return -ENOBUFS; + } __skb_queue_tail(&ctx->out_queue, skb); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index d62f76161d83e2..f46a9e5764f014 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -314,7 +314,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = dev; netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC); - xso->real_dev = dev; if (xuo->flags & XFRM_OFFLOAD_INBOUND) xso->dir = XFRM_DEV_OFFLOAD_IN; @@ -326,11 +325,10 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, else xso->type = XFRM_DEV_OFFLOAD_CRYPTO; - err = dev->xfrmdev_ops->xdo_dev_state_add(x, extack); + err = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, extack); if (err) { xso->dev = NULL; xso->dir = 0; - xso->real_dev = NULL; netdev_put(dev, &xso->dev_tracker); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; @@ -378,7 +376,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, xdo->dev = dev; netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC); - xdo->real_dev = dev; xdo->type = XFRM_DEV_OFFLOAD_PACKET; switch (dir) { case XFRM_POLICY_IN: @@ -400,7 +397,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack); if (err) { xdo->dev = NULL; - xdo->real_dev = NULL; xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; xdo->dir = 0; netdev_put(dev, &xdo->dev_tracker); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 0c14205343941d..907c3ccb440dab 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -48,7 +48,6 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) { struct acomp_req *req = ipcomp_cb(skb)->req; struct ipcomp_req_extra *extra; - const int plen = skb->data_len; struct scatterlist *dsg; int len, dlen; @@ -64,7 +63,7 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) /* Only update truesize on input. */ if (!hlen) - skb->truesize += dlen - plen; + skb->truesize += dlen; skb->data_len = dlen; skb->len += dlen; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 143ac3aa753746..f4bad8c895d668 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1581,6 +1581,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct xfrm_policy *delpol; struct hlist_head *chain; + /* Sanitize mark before store */ + policy->mark.v &= policy->mark.m; + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (chain) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 341d79ecb5c21c..5ece039846e201 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -767,7 +767,7 @@ void xfrm_dev_state_delete(struct xfrm_state *x) struct net_device *dev = READ_ONCE(xso->dev); if (dev) { - dev->xfrmdev_ops->xdo_dev_state_delete(x); + dev->xfrmdev_ops->xdo_dev_state_delete(dev, x); spin_lock_bh(&xfrm_state_dev_gc_lock); hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list); spin_unlock_bh(&xfrm_state_dev_gc_lock); @@ -789,7 +789,7 @@ void xfrm_dev_state_free(struct xfrm_state *x) spin_unlock_bh(&xfrm_state_dev_gc_lock); if (dev->xfrmdev_ops->xdo_dev_state_free) - dev->xfrmdev_ops->xdo_dev_state_free(x); + dev->xfrmdev_ops->xdo_dev_state_free(dev, x); WRITE_ONCE(xso->dev, NULL); xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; netdev_put(dev, &xso->dev_tracker); @@ -838,9 +838,6 @@ int __xfrm_state_delete(struct xfrm_state *x) xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); - if (x->encap_sk) - sock_put(rcu_dereference_raw(x->encap_sk)); - xfrm_dev_state_delete(x); /* All xfrm_state objects are created by xfrm_state_alloc. @@ -1551,19 +1548,19 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { struct xfrm_dev_offload *xdo = &pol->xdo; struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = xdo->dev; xso->type = XFRM_DEV_OFFLOAD_PACKET; xso->dir = xdo->dir; - xso->dev = xdo->dev; - xso->real_dev = xdo->real_dev; + xso->dev = dev; xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ; - netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC); - error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL); + netdev_hold(dev, &xso->dev_tracker, GFP_ATOMIC); + error = dev->xfrmdev_ops->xdo_dev_state_add(dev, x, + NULL); if (error) { xso->dir = 0; - netdev_put(xso->dev, &xso->dev_tracker); + netdev_put(dev, &xso->dev_tracker); xso->dev = NULL; - xso->real_dev = NULL; xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -1721,6 +1718,9 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->km.all, &net->xfrm.state_all); + /* Sanitize mark before store */ + x->mark.v &= x->mark.m; + h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h, diff --git a/rust/Makefile b/rust/Makefile index fa0eea8a9eca22..313a200112ce18 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -60,6 +60,8 @@ endif core-cfgs = \ --cfg no_fp_fmt_parse +core-edition := $(if $(call rustc-min-version,108700),2024,2021) + # `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only # since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust # 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both @@ -106,8 +108,8 @@ rustdoc-macros: $(src)/macros/lib.rs FORCE # Starting with Rust 1.82.0, skipping `-Wrustdoc::unescaped_backticks` should # not be needed -- see https://github.com/rust-lang/rust/pull/128307. -rustdoc-core: private skip_flags = -Wrustdoc::unescaped_backticks -rustdoc-core: private rustc_target_flags = $(core-cfgs) +rustdoc-core: private skip_flags = --edition=2021 -Wrustdoc::unescaped_backticks +rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs) rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs FORCE +$(call if_changed,rustdoc) @@ -368,7 +370,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) -rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ && $$3!~/__odr_asan/ { printf $(2),$$3 }' +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__(pfx|cfi|odr_asan)/ { printf $(2),$$3 }' quiet_cmd_exports = EXPORTS $@ cmd_exports = \ @@ -416,7 +418,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L cmd_rustc_library = \ OBJTREE=$(abspath $(objtree)) \ $(if $(skip_clippy),$(RUSTC),$(RUSTC_OR_CLIPPY)) \ - $(filter-out $(skip_flags),$(rust_flags) $(rustc_target_flags)) \ + $(filter-out $(skip_flags),$(rust_flags)) $(rustc_target_flags) \ --emit=dep-info=$(depfile) --emit=obj=$@ \ --emit=metadata=$(dir $@)$(patsubst %.o,lib%.rmeta,$(notdir $@)) \ --crate-type rlib -L$(objtree)/$(obj) \ @@ -427,7 +429,7 @@ quiet_cmd_rustc_library = $(if $(skip_clippy),RUSTC,$(RUSTC_OR_CLIPPY_QUIET)) L rust-analyzer: $(Q)MAKEFLAGS= $(srctree)/scripts/generate_rust_analyzer.py \ - --cfgs='core=$(core-cfgs)' \ + --cfgs='core=$(core-cfgs)' $(core-edition) \ $(realpath $(srctree)) $(realpath $(objtree)) \ $(rustc_sysroot) $(RUST_LIB_SRC) $(if $(KBUILD_EXTMOD),$(srcroot)) \ > rust-project.json @@ -483,9 +485,9 @@ $(obj)/helpers/helpers.o: $(src)/helpers/helpers.c $(recordmcount_source) FORCE $(obj)/exports.o: private skip_gendwarfksyms = 1 $(obj)/core.o: private skip_clippy = 1 -$(obj)/core.o: private skip_flags = -Wunreachable_pub +$(obj)/core.o: private skip_flags = --edition=2021 -Wunreachable_pub $(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) -$(obj)/core.o: private rustc_target_flags = $(core-cfgs) +$(obj)/core.o: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs) $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs \ $(wildcard $(objtree)/include/config/RUSTC_VERSION_TEXT) FORCE +$(call if_changed_rule,rustc_library) diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 014af0d1fc70cb..a08eb5518cac5d 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -26,6 +26,7 @@ #[allow(dead_code)] #[allow(clippy::undocumented_unsafe_blocks)] +#[cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] mod bindings_raw { // Manual definition for blocklisted types. type __kernel_size_t = usize; diff --git a/rust/helpers/dma.c b/rust/helpers/dma.c new file mode 100644 index 00000000000000..df8b8a77355a32 --- /dev/null +++ b/rust/helpers/dma.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +void *rust_helper_dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + unsigned long attrs) +{ + return dma_alloc_attrs(dev, size, dma_handle, flag, attrs); +} + +void rust_helper_dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + dma_free_attrs(dev, size, cpu_addr, dma_handle, attrs); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index e1c21eba9b15b6..1e7c84df725211 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -14,6 +14,7 @@ #include "cpumask.c" #include "cred.c" #include "device.c" +#include "dma.c" #include "err.c" #include "fs.c" #include "io.c" diff --git a/rust/helpers/io.c b/rust/helpers/io.c index 4c2401ccd72078..15ea187c546625 100644 --- a/rust/helpers/io.c +++ b/rust/helpers/io.c @@ -7,94 +7,94 @@ void __iomem *rust_helper_ioremap(phys_addr_t offset, size_t size) return ioremap(offset, size); } -void rust_helper_iounmap(volatile void __iomem *addr) +void rust_helper_iounmap(void __iomem *addr) { iounmap(addr); } -u8 rust_helper_readb(const volatile void __iomem *addr) +u8 rust_helper_readb(const void __iomem *addr) { return readb(addr); } -u16 rust_helper_readw(const volatile void __iomem *addr) +u16 rust_helper_readw(const void __iomem *addr) { return readw(addr); } -u32 rust_helper_readl(const volatile void __iomem *addr) +u32 rust_helper_readl(const void __iomem *addr) { return readl(addr); } #ifdef CONFIG_64BIT -u64 rust_helper_readq(const volatile void __iomem *addr) +u64 rust_helper_readq(const void __iomem *addr) { return readq(addr); } #endif -void rust_helper_writeb(u8 value, volatile void __iomem *addr) +void rust_helper_writeb(u8 value, void __iomem *addr) { writeb(value, addr); } -void rust_helper_writew(u16 value, volatile void __iomem *addr) +void rust_helper_writew(u16 value, void __iomem *addr) { writew(value, addr); } -void rust_helper_writel(u32 value, volatile void __iomem *addr) +void rust_helper_writel(u32 value, void __iomem *addr) { writel(value, addr); } #ifdef CONFIG_64BIT -void rust_helper_writeq(u64 value, volatile void __iomem *addr) +void rust_helper_writeq(u64 value, void __iomem *addr) { writeq(value, addr); } #endif -u8 rust_helper_readb_relaxed(const volatile void __iomem *addr) +u8 rust_helper_readb_relaxed(const void __iomem *addr) { return readb_relaxed(addr); } -u16 rust_helper_readw_relaxed(const volatile void __iomem *addr) +u16 rust_helper_readw_relaxed(const void __iomem *addr) { return readw_relaxed(addr); } -u32 rust_helper_readl_relaxed(const volatile void __iomem *addr) +u32 rust_helper_readl_relaxed(const void __iomem *addr) { return readl_relaxed(addr); } #ifdef CONFIG_64BIT -u64 rust_helper_readq_relaxed(const volatile void __iomem *addr) +u64 rust_helper_readq_relaxed(const void __iomem *addr) { return readq_relaxed(addr); } #endif -void rust_helper_writeb_relaxed(u8 value, volatile void __iomem *addr) +void rust_helper_writeb_relaxed(u8 value, void __iomem *addr) { writeb_relaxed(value, addr); } -void rust_helper_writew_relaxed(u16 value, volatile void __iomem *addr) +void rust_helper_writew_relaxed(u16 value, void __iomem *addr) { writew_relaxed(value, addr); } -void rust_helper_writel_relaxed(u32 value, volatile void __iomem *addr) +void rust_helper_writel_relaxed(u32 value, void __iomem *addr) { writel_relaxed(value, addr); } #ifdef CONFIG_64BIT -void rust_helper_writeq_relaxed(u64 value, volatile void __iomem *addr) +void rust_helper_writeq_relaxed(u64 value, void __iomem *addr) { writeq_relaxed(value, addr); } diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index ae9d072741cedb..f62204fe563f58 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -2,6 +2,9 @@ //! Implementation of [`Vec`]. +// May not be needed in Rust 1.87.0 (pending beta backport). +#![allow(clippy::ptr_eq)] + use super::{ allocator::{KVmalloc, Kmalloc, Vmalloc}, layout::ArrayLayout, @@ -193,6 +196,9 @@ where #[inline] pub unsafe fn set_len(&mut self, new_len: usize) { debug_assert!(new_len <= self.capacity()); + + // INVARIANT: By the safety requirements of this method `new_len` represents the exact + // number of elements stored within `self`. self.len = new_len; } diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index f04b058b09b2d2..2494c96e105f3a 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -4,7 +4,7 @@ //! //! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) -use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; +use crate::{bindings, device::Device, error::Error, error::Result, ffi, str::CStr}; use core::ptr::NonNull; /// # Invariants @@ -12,7 +12,11 @@ use core::ptr::NonNull; /// One of the following: `bindings::request_firmware`, `bindings::firmware_request_nowarn`, /// `bindings::firmware_request_platform`, `bindings::request_firmware_direct`. struct FwFunc( - unsafe extern "C" fn(*mut *const bindings::firmware, *const u8, *mut bindings::device) -> i32, + unsafe extern "C" fn( + *mut *const bindings::firmware, + *const ffi::c_char, + *mut bindings::device, + ) -> i32, ); impl FwFunc { diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 13a0e44cd1aa81..138693bdeb3fdf 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -219,6 +219,7 @@ unsafe impl AlwaysRefCounted for File { /// must be on the same thread as this file. /// /// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos +#[repr(transparent)] pub struct LocalFile { inner: Opaque, } diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs index a335c3b1ff5e48..2054682c5724ce 100644 --- a/rust/kernel/list.rs +++ b/rust/kernel/list.rs @@ -4,6 +4,9 @@ //! A linked list implementation. +// May not be needed in Rust 1.87.0 (pending beta backport). +#![allow(clippy::ptr_eq)] + use crate::sync::ArcBorrow; use crate::types::Opaque; use core::iter::{DoubleEndedIterator, FusedIterator}; diff --git a/rust/kernel/list/arc.rs b/rust/kernel/list/arc.rs index 13c50df37b89d1..a88a2dc65aa7cf 100644 --- a/rust/kernel/list/arc.rs +++ b/rust/kernel/list/arc.rs @@ -96,7 +96,7 @@ macro_rules! impl_list_arc_safe { } $($rest:tt)*) => { impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t { unsafe fn on_create_list_arc_from_unique(self: ::core::pin::Pin<&mut Self>) { - $crate::assert_pinned!($t, $field, $fty, inline); + ::pin_init::assert_pinned!($t, $field, $fty, inline); // SAFETY: This field is structurally pinned as per the above assertion. let field = unsafe { diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs index fa9ecc42602a47..15d10e5c1db7da 100644 --- a/rust/kernel/miscdevice.rs +++ b/rust/kernel/miscdevice.rs @@ -121,7 +121,7 @@ pub trait MiscDevice: Sized { /// Handler for ioctls. /// - /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`]. + /// The `cmd` argument is usually manipulated using the utilities in [`kernel::ioctl`]. /// /// [`kernel::ioctl`]: mod@crate::ioctl fn ioctl( diff --git a/rust/kernel/pci.rs b/rust/kernel/pci.rs index c97d6d470b2822..bbc453c6d9ea88 100644 --- a/rust/kernel/pci.rs +++ b/rust/kernel/pci.rs @@ -118,7 +118,9 @@ macro_rules! module_pci_driver { }; } -/// Abstraction for bindings::pci_device_id. +/// Abstraction for the PCI device ID structure ([`struct pci_device_id`]). +/// +/// [`struct pci_device_id`]: https://docs.kernel.org/PCI/pci.html#c.pci_device_id #[repr(transparent)] #[derive(Clone, Copy)] pub struct DeviceId(bindings::pci_device_id); @@ -173,7 +175,7 @@ unsafe impl RawDeviceId for DeviceId { } } -/// IdTable type for PCI +/// `IdTable` type for PCI. pub type IdTable = &'static dyn kernel::device_id::IdTable; /// Create a PCI `IdTable` with its alias for modpost. @@ -224,10 +226,11 @@ macro_rules! pci_device_table { /// `Adapter` documentation for an example. pub trait Driver: Send { /// The type holding information about each device id supported by the driver. - /// - /// TODO: Use associated_type_defaults once stabilized: - /// - /// type IdInfo: 'static = (); + // TODO: Use `associated_type_defaults` once stabilized: + // + // ``` + // type IdInfo: 'static = (); + // ``` type IdInfo: 'static; /// The table of device ids supported by the driver. diff --git a/rust/kernel/str.rs b/rust/kernel/str.rs index 878111cb77bc84..fb61ce81ea2868 100644 --- a/rust/kernel/str.rs +++ b/rust/kernel/str.rs @@ -73,7 +73,7 @@ impl fmt::Display for BStr { b'\r' => f.write_str("\\r")?, // Printable characters. 0x20..=0x7e => f.write_char(b as char)?, - _ => write!(f, "\\x{:02x}", b)?, + _ => write!(f, "\\x{b:02x}")?, } } Ok(()) @@ -109,7 +109,7 @@ impl fmt::Debug for BStr { b'\\' => f.write_str("\\\\")?, // Printable characters. 0x20..=0x7e => f.write_char(b as char)?, - _ => write!(f, "\\x{:02x}", b)?, + _ => write!(f, "\\x{b:02x}")?, } } f.write_char('"') @@ -447,7 +447,7 @@ impl fmt::Display for CStr { // Printable character. f.write_char(c as char)?; } else { - write!(f, "\\x{:02x}", c)?; + write!(f, "\\x{c:02x}")?; } } Ok(()) @@ -479,7 +479,7 @@ impl fmt::Debug for CStr { // Printable characters. b'\"' => f.write_str("\\\"")?, 0x20..=0x7e => f.write_char(c as char)?, - _ => write!(f, "\\x{:02x}", c)?, + _ => write!(f, "\\x{c:02x}")?, } } f.write_str("\"") @@ -641,13 +641,13 @@ mod tests { #[test] fn test_cstr_display() { let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); - assert_eq!(format!("{}", hello_world), "hello, world!"); + assert_eq!(format!("{hello_world}"), "hello, world!"); let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); - assert_eq!(format!("{}", non_printables), "\\x01\\x09\\x0a"); + assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); - assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); + assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); - assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); + assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); } #[test] @@ -658,47 +658,47 @@ mod tests { bytes[i as usize] = i.wrapping_add(1); } let cstr = CStr::from_bytes_with_nul(&bytes).unwrap(); - assert_eq!(format!("{}", cstr), ALL_ASCII_CHARS); + assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); } #[test] fn test_cstr_debug() { let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); - assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); + assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); - assert_eq!(format!("{:?}", non_printables), "\"\\x01\\x09\\x0a\""); + assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); - assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); + assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); - assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); + assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); } #[test] fn test_bstr_display() { let hello_world = BStr::from_bytes(b"hello, world!"); - assert_eq!(format!("{}", hello_world), "hello, world!"); + assert_eq!(format!("{hello_world}"), "hello, world!"); let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); - assert_eq!(format!("{}", escapes), "_\\t_\\n_\\r_\\_'_\"_"); + assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); let others = BStr::from_bytes(b"\x01"); - assert_eq!(format!("{}", others), "\\x01"); + assert_eq!(format!("{others}"), "\\x01"); let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); - assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); + assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); - assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); + assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); } #[test] fn test_bstr_debug() { let hello_world = BStr::from_bytes(b"hello, world!"); - assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); + assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); - assert_eq!(format!("{:?}", escapes), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); + assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); let others = BStr::from_bytes(b"\x01"); - assert_eq!(format!("{:?}", others), "\"\\x01\""); + assert_eq!(format!("{others:?}"), "\"\\x01\""); let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); - assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); + assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); - assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); + assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); } } diff --git a/rust/macros/kunit.rs b/rust/macros/kunit.rs index 4f553ecf40c0a7..99ccac82edde3a 100644 --- a/rust/macros/kunit.rs +++ b/rust/macros/kunit.rs @@ -15,10 +15,7 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream { } if attr.len() > 255 { - panic!( - "The test suite name `{}` exceeds the maximum length of 255 bytes", - attr - ) + panic!("The test suite name `{attr}` exceeds the maximum length of 255 bytes") } let mut tokens: Vec<_> = ts.into_iter().collect(); @@ -102,16 +99,14 @@ pub(crate) fn kunit_tests(attr: TokenStream, ts: TokenStream) -> TokenStream { let mut kunit_macros = "".to_owned(); let mut test_cases = "".to_owned(); for test in &tests { - let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); + let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{test}"); let kunit_wrapper = format!( - "unsafe extern \"C\" fn {}(_test: *mut kernel::bindings::kunit) {{ {}(); }}", - kunit_wrapper_fn_name, test + "unsafe extern \"C\" fn {kunit_wrapper_fn_name}(_test: *mut kernel::bindings::kunit) {{ {test}(); }}" ); writeln!(kunit_macros, "{kunit_wrapper}").unwrap(); writeln!( test_cases, - " kernel::kunit::kunit_case(kernel::c_str!(\"{}\"), {}),", - test, kunit_wrapper_fn_name + " kernel::kunit::kunit_case(kernel::c_str!(\"{test}\"), {kunit_wrapper_fn_name})," ) .unwrap(); } diff --git a/rust/macros/module.rs b/rust/macros/module.rs index a9418fbc9b4453..2f66107847f785 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -48,7 +48,7 @@ impl<'a> ModInfoBuilder<'a> { ) } else { // Loadable modules' modinfo strings go as-is. - format!("{field}={content}\0", field = field, content = content) + format!("{field}={content}\0") }; write!( @@ -126,10 +126,7 @@ impl ModuleInfo { }; if seen_keys.contains(&key) { - panic!( - "Duplicated key \"{}\". Keys can only be specified once.", - key - ); + panic!("Duplicated key \"{key}\". Keys can only be specified once."); } assert_eq!(expect_punct(it), ':'); @@ -143,10 +140,7 @@ impl ModuleInfo { "license" => info.license = expect_string_ascii(it), "alias" => info.alias = Some(expect_string_array(it)), "firmware" => info.firmware = Some(expect_string_array(it)), - _ => panic!( - "Unknown key \"{}\". Valid keys are: {:?}.", - key, EXPECTED_KEYS - ), + _ => panic!("Unknown key \"{key}\". Valid keys are: {EXPECTED_KEYS:?}."), } assert_eq!(expect_punct(it), ','); @@ -158,7 +152,7 @@ impl ModuleInfo { for key in REQUIRED_KEYS { if !seen_keys.iter().any(|e| e == key) { - panic!("Missing required key \"{}\".", key); + panic!("Missing required key \"{key}\"."); } } @@ -170,10 +164,7 @@ impl ModuleInfo { } if seen_keys != ordered_keys { - panic!( - "Keys are not ordered as expected. Order them like: {:?}.", - ordered_keys - ); + panic!("Keys are not ordered as expected. Order them like: {ordered_keys:?}."); } info diff --git a/rust/macros/paste.rs b/rust/macros/paste.rs index 6529a387673fb5..cce712d19855b5 100644 --- a/rust/macros/paste.rs +++ b/rust/macros/paste.rs @@ -50,7 +50,7 @@ fn concat_helper(tokens: &[TokenTree]) -> Vec<(String, Span)> { let tokens = group.stream().into_iter().collect::>(); segments.append(&mut concat_helper(tokens.as_slice())); } - token => panic!("unexpected token in paste segments: {:?}", token), + token => panic!("unexpected token in paste segments: {token:?}"), }; } diff --git a/rust/pin-init/examples/pthread_mutex.rs b/rust/pin-init/examples/pthread_mutex.rs index 9164298c44c021..5ac22f1880d2f7 100644 --- a/rust/pin-init/examples/pthread_mutex.rs +++ b/rust/pin-init/examples/pthread_mutex.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -// inspired by https://github.com/nbdd0121/pin-init/blob/trunk/examples/pthread_mutex.rs +// inspired by #![allow(clippy::undocumented_unsafe_blocks)] #![cfg_attr(feature = "alloc", feature(allocator_api))] #[cfg(not(windows))] diff --git a/rust/pin-init/internal/src/pinned_drop.rs b/rust/pin-init/internal/src/pinned_drop.rs index c824dd8b436dfb..c4ca7a70b726a5 100644 --- a/rust/pin-init/internal/src/pinned_drop.rs +++ b/rust/pin-init/internal/src/pinned_drop.rs @@ -28,8 +28,7 @@ pub(crate) fn pinned_drop(_args: TokenStream, input: TokenStream) -> TokenStream // Found the end of the generics, this should be `PinnedDrop`. assert!( matches!(tt, TokenTree::Ident(i) if i.to_string() == "PinnedDrop"), - "expected 'PinnedDrop', found: '{:?}'", - tt + "expected 'PinnedDrop', found: '{tt:?}'" ); pinned_drop_idx = Some(i); break; diff --git a/rust/pin-init/src/alloc.rs b/rust/pin-init/src/alloc.rs index e16baa3b434e08..5017f57442d862 100644 --- a/rust/pin-init/src/alloc.rs +++ b/rust/pin-init/src/alloc.rs @@ -17,11 +17,9 @@ use crate::{ pub extern crate alloc; -// SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee). -// -// In this case we are allowed to use `T: ?Sized`, since all zeros is the `None` variant and there -// is no problem with a VTABLE pointer being null. -unsafe impl ZeroableOption for Box {} +// SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee: +// ). +unsafe impl ZeroableOption for Box {} /// Smart pointer that can initialize memory in-place. pub trait InPlaceInit: Sized { diff --git a/rust/pin-init/src/lib.rs b/rust/pin-init/src/lib.rs index 05c44514765ef8..0806c689f693c1 100644 --- a/rust/pin-init/src/lib.rs +++ b/rust/pin-init/src/lib.rs @@ -1447,7 +1447,7 @@ impl_zeroable! { {} UnsafeCell, // SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee: - // https://doc.rust-lang.org/stable/std/option/index.html#representation). + // ). Option, Option, Option, Option, Option, Option, Option, Option, Option, Option, diff --git a/rust/uapi/lib.rs b/rust/uapi/lib.rs index 13495910271faf..c98d7a8cde77da 100644 --- a/rust/uapi/lib.rs +++ b/rust/uapi/lib.rs @@ -24,6 +24,7 @@ unreachable_pub, unsafe_op_in_unsafe_fn )] +#![cfg_attr(CONFIG_RUSTC_HAS_UNNECESSARY_TRANSMUTES, allow(unnecessary_transmutes))] // Manual definition of blocklisted types. type __kernel_size_t = usize; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5b632635e00dde..95a4fa1f1e4474 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -376,7 +376,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index dac67c3674576d..4147616102f906 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -112,7 +112,7 @@ static int __init sample_trace_array_init(void) /* * If context specific per-cpu buffers havent already been allocated. */ - trace_printk_init_buffers(); + trace_array_init_printk(tr); simple_tsk = kthread_run(simple_thread, NULL, "sample-instance"); if (IS_ERR(simple_tsk)) { diff --git a/samples/livepatch/livepatch-callbacks-busymod.c b/samples/livepatch/livepatch-callbacks-busymod.c index 69105596e72e68..fadc2a85cb35dd 100644 --- a/samples/livepatch/livepatch-callbacks-busymod.c +++ b/samples/livepatch/livepatch-callbacks-busymod.c @@ -56,4 +56,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c index 11c3f4357812db..9e69d9caed258e 100644 --- a/samples/livepatch/livepatch-callbacks-demo.c +++ b/samples/livepatch/livepatch-callbacks-demo.c @@ -192,5 +192,6 @@ static void livepatch_callbacks_demo_exit(void) module_init(livepatch_callbacks_demo_init); module_exit(livepatch_callbacks_demo_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-callbacks-mod.c b/samples/livepatch/livepatch-callbacks-mod.c index 2a074f422a51d9..d1851b471ad915 100644 --- a/samples/livepatch/livepatch-callbacks-mod.c +++ b/samples/livepatch/livepatch-callbacks-mod.c @@ -38,4 +38,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index cd76d7ebe59859..5263a2f31c480e 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -66,5 +66,6 @@ static void livepatch_exit(void) module_init(livepatch_init); module_exit(livepatch_exit); +MODULE_DESCRIPTION("Kernel Live Patching Sample Module"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index f3f153895d6ce7..cbf68ca4009732 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -168,5 +168,6 @@ static void livepatch_shadow_fix1_exit(void) module_init(livepatch_shadow_fix1_init); module_exit(livepatch_shadow_fix1_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index 361046a4f10cf4..b99122cb221fc4 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -128,5 +128,6 @@ static void livepatch_shadow_fix2_exit(void) module_init(livepatch_shadow_fix2_init); module_exit(livepatch_shadow_fix2_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 13dcd86e74ca83..338e1aec0eaa3f 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -50,18 +50,23 @@ endif # =========================================================================== +builtin_suffix := $(if $(filter %.a_thinlto_native, $(MAKECMDGOALS)),.a_thinlto_native,.a) +ifeq ($(thinlto_final_pass),1) +builtin_suffix :=.a_thinlto_native +endif + # subdir-builtin and subdir-modorder may contain duplications. Use $(sort ...) -subdir-builtin := $(sort $(filter %/built-in.a, $(real-obj-y))) +subdir-builtin := $(sort $(filter %/built-in$(builtin_suffix), $(real-obj-y))) subdir-modorder := $(sort $(filter %/modules.order, $(obj-m))) targets-for-builtin := $(extra-y) ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),) -targets-for-builtin += $(obj)/lib.a +targets-for-builtin += $(obj)/lib$(builtin_suffix) endif ifdef need-builtin -targets-for-builtin += $(obj)/built-in.a +targets-for-builtin += $(obj)/built-in$(builtin_suffix) endif targets-for-modules := $(foreach x, o mod, \ @@ -337,6 +342,10 @@ $(obj)/%.o: $(obj)/%.S FORCE targets += $(filter-out $(subdir-builtin), $(real-obj-y)) targets += $(filter-out $(subdir-modorder), $(real-obj-m)) targets += $(lib-y) $(always-y) +ifeq ($(builtin_suffix),.a_thinlto_native) +native_targets = $(patsubst,%.o,%.o_thinlto_native,$(targets)) +targets += $(native_targets) +endif # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- @@ -347,6 +356,24 @@ quiet_cmd_cpp_lds_S = LDS $@ $(obj)/%.lds: $(src)/%.lds.S FORCE $(call if_changed_dep,cpp_lds_S) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Generate .o_thinlto_native (obj) from .o (bitcode) file +# --------------------------------------------------------------------------- +quiet_cmd_cc_o_bc = CC $(quiet_modtag) $@ + +cmd_cc_o_bc = $(if $(filter bitcode, $(shell file -b $<)),$(CC) \ + $(filter-out -Wp% $(LINUXINCLUDE) %.h.gch %.h -D% \ + -flto=thin, $(c_flags)) \ + -Wno-unused-command-line-argument \ + -x ir -fthinlto-index=$<.thinlto.bc -c -o $@ \ + $(if $(findstring ../,$<), \ + $$(realpath --relative-to=$(srcroot) $<), $<), \ + cp $< $@) + +$(obj)/%.o_thinlto_native: $(obj)/%.o FORCE + $(call if_changed,cc_o_bc) +endif + # ASN.1 grammar # --------------------------------------------------------------------------- quiet_cmd_asn1_compiler = ASN.1 $(basename $@).[ch] @@ -360,7 +387,7 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler # --------------------------------------------------------------------------- # To build objects in subdirs, we need to descend into the directories -$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; +$(subdir-builtin): $(obj)/%/built-in$(builtin_suffix): $(obj)/% ; $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; # @@ -377,6 +404,12 @@ quiet_cmd_ar_builtin = AR $@ $(obj)/built-in.a: $(real-obj-y) FORCE $(call if_changed,ar_builtin) +ifdef CONFIG_LTO_CLANG_THIN_DIST +# Rule to compile a set of .o_thinlto_native files into one .a_thinlto_native file. +$(obj)/built-in.a_thinlto_native: $(patsubst %.o,%.o_thinlto_native,$(real-obj-y)) FORCE + $(call if_changed,ar_builtin) +endif + # This is a list of build artifacts from the current Makefile and its # sub-directories. The timestamp should be updated when any of the member files. @@ -394,6 +427,14 @@ $(obj)/modules.order: $(obj-m) FORCE $(obj)/lib.a: $(lib-y) FORCE $(call if_changed,ar) +ifdef CONFIG_LTO_CLANG_THIN_DIST +quiet_cmd_ar_native = AR $@ + cmd_ar_native = rm -f $@; $(AR) cDPrsT $@ $(patsubst %.o,%.o_thinlto_native,$(real-prereqs)) + +$(obj)/lib.a_thinlto_native: $(patsubst %.o,%.o_thinlto_native,$(lib-y)) FORCE + $(call if_changed,ar_native) +endif + quiet_cmd_ld_multi_m = LD [M] $@ cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$< $(cmd_objtool) @@ -459,7 +500,8 @@ $(single-subdir-goals): $(single-subdirs) PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(build)=$@ \ - need-builtin=$(if $(filter $@/built-in.a, $(subdir-builtin)),1) \ + need-builtin=$(if $(filter $@/built-in$(builtin_suffix), $(subdir-builtin)),1) \ + thinlto_final_pass=$(if $(filter .a_thinlto_native, $(builtin_suffix)),1) \ need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 8956587b85470a..f4fcc1eaaeaee8 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -79,8 +79,8 @@ ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) # Usage: MY_RUSTFLAGS += $(call __rustc-option,$(RUSTC),$(MY_RUSTFLAGS),-Cinstrument-coverage,-Zinstrument-coverage) # TODO: remove RUSTC_BOOTSTRAP=1 when we raise the minimum GNU Make version to 4.4 __rustc-option = $(call try-run,\ - echo '#![allow(missing_docs)]#![feature(no_core)]#![no_core]' | RUSTC_BOOTSTRAP=1\ - $(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null,$(2)) $(3)\ + echo '$(pound)![allow(missing_docs)]$(pound)![feature(no_core)]$(pound)![no_core]' | RUSTC_BOOTSTRAP=1\ + $(1) --sysroot=/dev/null $(filter-out --sysroot=/dev/null --target=%,$(2)) $(3)\ --crate-type=rlib --out-dir=$(TMPOUT) --emit=obj=- - >/dev/null,$(3),$(4)) # rustc-option diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index d75897559d1840..fd649c68e198ba 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -8,6 +8,7 @@ # Default set of warnings, always enabled KBUILD_CFLAGS += -Wall +KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -Werror=implicit-function-declaration KBUILD_CFLAGS += -Werror=implicit-int @@ -15,7 +16,7 @@ KBUILD_CFLAGS += -Werror=return-type KBUILD_CFLAGS += -Werror=strict-prototypes KBUILD_CFLAGS += -Wno-format-security KBUILD_CFLAGS += -Wno-trigraphs -KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-disable-warning, frame-address) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += -Wmissing-declarations KBUILD_CFLAGS += -Wmissing-prototypes @@ -36,6 +37,18 @@ KBUILD_CFLAGS += -Wno-gnu # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) + +# Clang may emit a warning when a const variable, such as the dummy variables +# in typecheck(), or const member of an aggregate type are not initialized, +# which can result in unexpected behavior. However, in many audited cases of +# the "field" variant of the warning, this is intentional because the field is +# never used within a particular call path, the field is within a union with +# other non-const members, or the containing object is not const so the field +# can be modified via memcpy() / memset(). While the variable warning also gets +# disabled with this same switch, there should not be too much coverage lost +# because -Wuninitialized will still flag when an uninitialized const variable +# is used. +KBUILD_CFLAGS += $(call cc-disable-warning, default-const-init-unsafe) else # gcc inanely warns about local variables called 'main' @@ -56,6 +69,13 @@ KBUILD_CFLAGS += -Wno-pointer-sign # globally built with -Wcast-function-type. KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) +# Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-disable-warning, stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) + +# Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-disable-warning, unterminated-string-initialization) + # The allocators already balk at large sizes, so silence the compiler # warnings for bounds checks involving those possible values. While # -Wno-alloc-size-larger-than would normally be used here, earlier versions @@ -82,7 +102,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # Warn if there is an enum types mismatch KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) -KBUILD_CFLAGS += -Wextra KBUILD_CFLAGS += -Wunused # diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 2fe73cda0bddb9..9cfd23590334df 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -34,8 +34,13 @@ else obj-m := $(filter-out %/, $(obj-m)) endif +builtin_suffix := $(if $(filter %.a_thinlto_native, $(MAKECMDGOALS)),.a_thinlto_native,.a) +ifeq ($(thinlto_final_pass),1) + builtin_suffix :=.a_thinlto_native +endif + ifdef need-builtin -obj-y := $(patsubst %/, %/built-in.a, $(obj-y)) +obj-y := $(patsubst %/, %/built-in$(builtin_suffix), $(obj-y)) else obj-y := $(filter-out %/, $(obj-y)) endif diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index b0a6cd5b818c9f..b64862dc6f08d4 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -13,7 +13,7 @@ ifdef CONFIG_ARCH_VMLINUX_NEEDS_RELOCS vmlinux-final := vmlinux.unstripped quiet_cmd_strip_relocs = RSTRIP $@ - cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' $< $@ + cmd_strip_relocs = $(OBJCOPY) --remove-section='.rel*' --remove-section=!'.rel*.dyn' $< $@ vmlinux: $(vmlinux-final) FORCE $(call if_changed,strip_relocs) @@ -94,10 +94,10 @@ $(vmlinux-final): $(RESOLVE_BTFIDS) endif ifdef CONFIG_BUILDTIME_TABLE_SORT -vmlinux: scripts/sorttable +$(vmlinux-final): scripts/sorttable endif -# module.builtin.ranges +# modules.builtin.ranges # --------------------------------------------------------------------------- ifdef CONFIG_BUILTIN_MODULE_RANGES __default: modules.builtin.ranges diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 938c7457717ea0..f9abc45a68b363 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -9,6 +9,14 @@ include $(srctree)/scripts/Kbuild.include # for objtool include $(srctree)/scripts/Makefile.lib +ifeq ($(thinlto_final_pass),1) +vmlinux_a := vmlinux.a_thinlto_native +vmlinux_libs := $(patsubst %.a,%.a_thinlto_native,$(KBUILD_VMLINUX_LIBS)) +else +vmlinux_a := vmlinux.a +vmlinux_libs := $(KBUILD_VMLINUX_LIBS) +endif + # Generate a linker script to ensure correct ordering of initcalls for Clang LTO # --------------------------------------------------------------------------- @@ -18,7 +26,7 @@ quiet_cmd_gen_initcalls_lds = GEN $@ $(PERL) $(real-prereqs) > $@ .tmp_initcalls.lds: $(srctree)/scripts/generate_initcall_order.pl \ - vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE + $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed,gen_initcalls_lds) targets := .tmp_initcalls.lds @@ -59,8 +67,8 @@ quiet_cmd_ld_vmlinux.o = LD $@ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ $(vmlinux-o-ld-args-y) \ $(addprefix -T , $(initcalls-lds)) \ - --whole-archive vmlinux.a --no-whole-archive \ - --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ + --whole-archive $(vmlinux_a) --no-whole-archive \ + --start-group $(vmlinux_libs) --end-group \ $(cmd_objtool) define rule_ld_vmlinux.o @@ -68,12 +76,12 @@ define rule_ld_vmlinux.o $(call cmd,gen_objtooldep) endef -vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE +vmlinux.o: $(initcalls-lds) $(vmlinux_a) $(vmlinux_libs) FORCE $(call if_changed_rule,ld_vmlinux.o) targets += vmlinux.o -# module.builtin.modinfo +# modules.builtin.modinfo # --------------------------------------------------------------------------- OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary @@ -82,7 +90,7 @@ targets += modules.builtin.modinfo modules.builtin.modinfo: vmlinux.o FORCE $(call if_changed,objcopy) -# module.builtin +# modules.builtin # --------------------------------------------------------------------------- # The second line aids cases where multiple modules share the same object. diff --git a/scripts/Makefile.vmlinux_thinlink b/scripts/Makefile.vmlinux_thinlink new file mode 100644 index 00000000000000..13e4026c7d45bd --- /dev/null +++ b/scripts/Makefile.vmlinux_thinlink @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0-only + +PHONY := __default +__default: vmlinux.thinlink + +include include/config/auto.conf +include $(srctree)/scripts/Kbuild.include + + +# Generate a linker script to ensure correct ordering of initcalls for Clang LTO +# --------------------------------------------------------------------------- + +quiet_cmd_gen_initcalls_lds = GEN $@ + cmd_gen_initcalls_lds = \ + $(PYTHON3) $(srctree)/scripts/jobserver-exec \ + $(PERL) $(real-prereqs) > $@ + +.tmp_initcalls_thinlink.lds: $(srctree)/scripts/generate_initcall_order.pl \ + vmlinux.a FORCE + $(call if_changed,gen_initcalls_lds) + +targets := .tmp_initcalls_thinlink.lds + +initcalls-lds := .tmp_initcalls_thinlink.lds + +quiet_cmd_ld_vmlinux.thinlink = LD $@ + cmd_ld_vmlinux.thinlink = \ + $(AR) t vmlinux.a > .vmlinux_thinlto_bc_files; \ + $(LD) ${KBUILD_LDFLAGS} -r $(addprefix -T , $(initcalls-lds)) \ + --thinlto-index-only @.vmlinux_thinlto_bc_files; \ + touch vmlinux.thinlink + +vmlinux.thinlink: vmlinux.a $(initcalls-lds) FORCE + $(call if_changed,ld_vmlinux.thinlink) + +targets += vmlinux.thinlink + +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. +# --------------------------------------------------------------------------- + +PHONY += FORCE +FORCE: + +# Read all saved command lines and dependencies for the $(targets) we +# may be building above, using $(if_changed{,_dep}). As an +# optimization, we don't need to read them if the target does not +# exist, we will rebuild anyway in that case. + +existing-targets := $(wildcard $(sort $(targets))) + +-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) + +.PHONY: $(PHONY) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 3222c1070444fa..ef12c8f929eda3 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -123,6 +123,38 @@ static inline tree build_const_char_string(int len, const char *str) return cstr; } +static inline void __add_type_attr(tree type, const char *attr, tree args) +{ + tree oldattr; + + if (type == NULL_TREE) + return; + oldattr = lookup_attribute(attr, TYPE_ATTRIBUTES(type)); + if (oldattr != NULL_TREE) { + gcc_assert(TREE_VALUE(oldattr) == args || TREE_VALUE(TREE_VALUE(oldattr)) == TREE_VALUE(args)); + return; + } + + TYPE_ATTRIBUTES(type) = copy_list(TYPE_ATTRIBUTES(type)); + TYPE_ATTRIBUTES(type) = tree_cons(get_identifier(attr), args, TYPE_ATTRIBUTES(type)); +} + +static inline void add_type_attr(tree type, const char *attr, tree args) +{ + tree main_variant = TYPE_MAIN_VARIANT(type); + + __add_type_attr(TYPE_CANONICAL(type), attr, args); + __add_type_attr(TYPE_CANONICAL(main_variant), attr, args); + __add_type_attr(main_variant, attr, args); + + for (type = TYPE_NEXT_VARIANT(main_variant); type; type = TYPE_NEXT_VARIANT(type)) { + if (!lookup_attribute(attr, TYPE_ATTRIBUTES(type))) + TYPE_ATTRIBUTES(type) = TYPE_ATTRIBUTES(main_variant); + + __add_type_attr(TYPE_CANONICAL(type), attr, args); + } +} + #define PASS_INFO(NAME, REF, ID, POS) \ struct register_pass_info NAME##_pass_info = { \ .pass = make_##NAME##_pass(), \ diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 5694df3da2e95b..ff65a4f87f240a 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -73,6 +73,9 @@ static tree handle_randomize_layout_attr(tree *node, tree name, tree args, int f if (TYPE_P(*node)) { type = *node; + } else if (TREE_CODE(*node) == FIELD_DECL) { + *no_add_attrs = false; + return NULL_TREE; } else { gcc_assert(TREE_CODE(*node) == TYPE_DECL); type = TREE_TYPE(*node); @@ -344,35 +347,18 @@ static int relayout_struct(tree type) shuffle(type, (tree *)newtree, shuffle_length); - /* - * set up a bogus anonymous struct field designed to error out on unnamed struct initializers - * as gcc provides no other way to detect such code - */ - list = make_node(FIELD_DECL); - TREE_CHAIN(list) = newtree[0]; - TREE_TYPE(list) = void_type_node; - DECL_SIZE(list) = bitsize_zero_node; - DECL_NONADDRESSABLE_P(list) = 1; - DECL_FIELD_BIT_OFFSET(list) = bitsize_zero_node; - DECL_SIZE_UNIT(list) = size_zero_node; - DECL_FIELD_OFFSET(list) = size_zero_node; - DECL_CONTEXT(list) = type; - // to satisfy the constify plugin - TREE_READONLY(list) = 1; - for (i = 0; i < num_fields - 1; i++) TREE_CHAIN(newtree[i]) = newtree[i+1]; TREE_CHAIN(newtree[num_fields - 1]) = NULL_TREE; + add_type_attr(type, "randomize_performed", NULL_TREE); + add_type_attr(type, "designated_init", NULL_TREE); + if (has_flexarray) + add_type_attr(type, "has_flexarray", NULL_TREE); + main_variant = TYPE_MAIN_VARIANT(type); - for (variant = main_variant; variant; variant = TYPE_NEXT_VARIANT(variant)) { - TYPE_FIELDS(variant) = list; - TYPE_ATTRIBUTES(variant) = copy_list(TYPE_ATTRIBUTES(variant)); - TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("randomize_performed"), NULL_TREE, TYPE_ATTRIBUTES(variant)); - TYPE_ATTRIBUTES(variant) = tree_cons(get_identifier("designated_init"), NULL_TREE, TYPE_ATTRIBUTES(variant)); - if (has_flexarray) - TYPE_ATTRIBUTES(type) = tree_cons(get_identifier("has_flexarray"), NULL_TREE, TYPE_ATTRIBUTES(type)); - } + for (variant = main_variant; variant; variant = TYPE_NEXT_VARIANT(variant)) + TYPE_FIELDS(variant) = newtree[0]; /* * force a re-layout of the main variant @@ -440,10 +426,8 @@ static void randomize_type(tree type) if (lookup_attribute("randomize_layout", TYPE_ATTRIBUTES(TYPE_MAIN_VARIANT(type))) || is_pure_ops_struct(type)) relayout_struct(type); - for (variant = TYPE_MAIN_VARIANT(type); variant; variant = TYPE_NEXT_VARIANT(variant)) { - TYPE_ATTRIBUTES(type) = copy_list(TYPE_ATTRIBUTES(type)); - TYPE_ATTRIBUTES(type) = tree_cons(get_identifier("randomize_considered"), NULL_TREE, TYPE_ATTRIBUTES(type)); - } + add_type_attr(type, "randomize_considered", NULL_TREE); + #ifdef __DEBUG_PLUGIN fprintf(stderr, "Marking randomize_considered on struct %s\n", ORIG_TYPE_NAME(type)); #ifdef __DEBUG_VERBOSE diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index cd41bc906fbd61..7c3ea2b55041f8 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -19,7 +19,7 @@ def args_crates_cfgs(cfgs): return crates_cfgs -def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): +def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edition): # Generate the configuration list. cfg = [] with open(objtree / "include" / "generated" / "rustc_cfg") as fd: @@ -35,7 +35,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): crates_indexes = {} crates_cfgs = args_crates_cfgs(cfgs) - def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False): + def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=True, is_proc_macro=False, edition="2021"): crate = { "display_name": display_name, "root_module": str(root_module), @@ -43,7 +43,7 @@ def append_crate(display_name, root_module, deps, cfg=[], is_workspace_member=Tr "is_proc_macro": is_proc_macro, "deps": [{"crate": crates_indexes[dep], "name": dep} for dep in deps], "cfg": cfg, - "edition": "2021", + "edition": edition, "env": { "RUST_MODFILE": "This is only for rust-analyzer" } @@ -61,6 +61,7 @@ def append_sysroot_crate( display_name, deps, cfg=[], + edition="2021", ): append_crate( display_name, @@ -68,12 +69,13 @@ def append_sysroot_crate( deps, cfg, is_workspace_member=False, + edition=edition, ) # NB: sysroot crates reexport items from one another so setting up our transitive dependencies # here is important for ensuring that rust-analyzer can resolve symbols. The sources of truth # for this dependency graph are `(sysroot_src / crate / "Cargo.toml" for crate in crates)`. - append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", [])) + append_sysroot_crate("core", [], cfg=crates_cfgs.get("core", []), edition=core_edition) append_sysroot_crate("alloc", ["core"]) append_sysroot_crate("std", ["alloc", "core"]) append_sysroot_crate("proc_macro", ["core", "std"]) @@ -112,6 +114,12 @@ def append_sysroot_crate( cfg=["kernel"], ) + append_crate( + "ffi", + srctree / "rust" / "ffi.rs", + ["core", "compiler_builtins"], + ) + def append_crate_with_generated( display_name, deps, @@ -131,9 +139,9 @@ def append_crate_with_generated( "exclude_dirs": [], } - append_crate_with_generated("bindings", ["core"]) - append_crate_with_generated("uapi", ["core"]) - append_crate_with_generated("kernel", ["core", "macros", "build_error", "pin_init", "bindings", "uapi"]) + append_crate_with_generated("bindings", ["core", "ffi"]) + append_crate_with_generated("uapi", ["core", "ffi"]) + append_crate_with_generated("kernel", ["core", "macros", "build_error", "pin_init", "ffi", "bindings", "uapi"]) def is_root_crate(build_file, target): try: @@ -171,6 +179,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--verbose', '-v', action='store_true') parser.add_argument('--cfgs', action='append', default=[]) + parser.add_argument("core_edition") parser.add_argument("srctree", type=pathlib.Path) parser.add_argument("objtree", type=pathlib.Path) parser.add_argument("sysroot", type=pathlib.Path) @@ -187,7 +196,7 @@ def main(): assert args.sysroot in args.sysroot_src.parents rust_project = { - "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs), + "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs, args.core_edition), "sysroot": str(args.sysroot), } diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c index 8b0d7ac73dbb09..83e48670c2fcfb 100644 --- a/scripts/genksyms/genksyms.c +++ b/scripts/genksyms/genksyms.c @@ -181,13 +181,9 @@ static int is_unknown_symbol(struct symbol *sym) strcmp(defn->string, "{") == 0); } -static struct symbol *__add_symbol(const char *name, enum symbol_type type, - struct string_list *defn, int is_extern, - int is_reference) +static struct string_list *process_enum(const char *name, enum symbol_type type, + struct string_list *defn) { - unsigned long h; - struct symbol *sym; - enum symbol_status status = STATUS_UNCHANGED; /* The parser adds symbols in the order their declaration completes, * so it is safe to store the value of the previous enum constant in * a static variable. @@ -216,7 +212,7 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, defn = mk_node(buf); } } - } else if (type == SYM_ENUM) { + } else { free_list(last_enum_expr, NULL); last_enum_expr = NULL; enum_counter = 0; @@ -225,6 +221,23 @@ static struct symbol *__add_symbol(const char *name, enum symbol_type type, return NULL; } + return defn; +} + +static struct symbol *__add_symbol(const char *name, enum symbol_type type, + struct string_list *defn, int is_extern, + int is_reference) +{ + unsigned long h; + struct symbol *sym; + enum symbol_status status = STATUS_UNCHANGED; + + if ((type == SYM_ENUM_CONST || type == SYM_ENUM) && !is_reference) { + defn = process_enum(name, type, defn); + if (defn == NULL) + return NULL; + } + h = crc32(name); hash_for_each_possible(symbol_hashtable, sym, hnode, h) { if (map_to_ns(sym->type) != map_to_ns(type) || diff --git a/scripts/genksyms/keywords.c b/scripts/genksyms/keywords.c index b85e0979a00cec..ee1499d2706176 100644 --- a/scripts/genksyms/keywords.c +++ b/scripts/genksyms/keywords.c @@ -17,6 +17,8 @@ static struct resword { { "__signed__", SIGNED_KEYW }, { "__typeof", TYPEOF_KEYW }, { "__typeof__", TYPEOF_KEYW }, + { "__typeof_unqual", TYPEOF_KEYW }, + { "__typeof_unqual__", TYPEOF_KEYW }, { "__volatile", VOLATILE_KEYW }, { "__volatile__", VOLATILE_KEYW }, { "__builtin_va_list", VA_LIST_KEYW }, @@ -40,6 +42,10 @@ static struct resword { // KAO. }, // { "attribute", ATTRIBUTE_KEYW }, + // X86 named address space qualifiers + { "__seg_gs", X86_SEG_KEYW }, + { "__seg_fs", X86_SEG_KEYW }, + { "auto", AUTO_KEYW }, { "char", CHAR_KEYW }, { "const", CONST_KEYW }, @@ -57,6 +63,7 @@ static struct resword { { "struct", STRUCT_KEYW }, { "typedef", TYPEDEF_KEYW }, { "typeof", TYPEOF_KEYW }, + { "typeof_unqual", TYPEOF_KEYW }, { "union", UNION_KEYW }, { "unsigned", UNSIGNED_KEYW }, { "void", VOID_KEYW }, diff --git a/scripts/genksyms/parse.y b/scripts/genksyms/parse.y index ee600a804fa103..efdcf07c4eb6da 100644 --- a/scripts/genksyms/parse.y +++ b/scripts/genksyms/parse.y @@ -91,6 +91,8 @@ static void record_compound(struct string_list **keyw, %token TYPEOF_KEYW %token VA_LIST_KEYW +%token X86_SEG_KEYW + %token EXPORT_SYMBOL_KEYW %token ASM_PHRASE @@ -292,7 +294,8 @@ type_qualifier_seq: ; type_qualifier: - CONST_KEYW | VOLATILE_KEYW + X86_SEG_KEYW + | CONST_KEYW | VOLATILE_KEYW | RESTRICT_KEYW { /* restrict has no effect in prototypes so ignore it */ remove_node($1); diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt index 7274dfc65af606..90710b87a38779 100644 --- a/scripts/head-object-list.txt +++ b/scripts/head-object-list.txt @@ -18,6 +18,7 @@ arch/arm/kernel/head.o arch/csky/kernel/head.o arch/hexagon/kernel/head.o arch/loongarch/kernel/head.o +arch/loongarch/kernel/head.o_thinlto_native arch/m68k/68000/head.o arch/m68k/coldfire/head.o arch/m68k/kernel/head.o diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 726f34e1196018..98f206cb7c6079 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -16,6 +16,7 @@ Source1: config Source2: diff.patch Provides: kernel-%{KERNELRELEASE} BuildRequires: bc binutils bison dwarves +BuildRequires: (elfutils-devel or libdw-devel) BuildRequires: (elfutils-libelf-devel or libelf-devel) flex BuildRequires: gcc make openssl openssl-devel perl python3 rsync diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 744ddba01d93f9..d4b007b38a4759 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -210,7 +210,7 @@ Rules-Requires-Root: no Build-Depends: debhelper-compat (= 12) Build-Depends-Arch: bc, bison, flex, gcc-${host_gnu} , - kmod, libelf-dev:native, + kmod, libdw-dev:native, libelf-dev:native, libssl-dev:native, libssl-dev , python3:native, rsync Homepage: https://www.kernel.org/ diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index c17366ce8224ef..3fe9d7b945c43e 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -344,7 +344,7 @@ config CC_HAS_RANDSTRUCT choice prompt "Randomize layout of sensitive kernel structures" - default RANDSTRUCT_FULL if COMPILE_TEST && (GCC_PLUGINS || CC_HAS_RANDSTRUCT) + default RANDSTRUCT_FULL if COMPILE_TEST && CC_HAS_RANDSTRUCT default RANDSTRUCT_NONE help If you enable this, the layouts of structures that are entirely diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f3e7ac513db3f5..f99ab1a3b0f092 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -245,7 +245,9 @@ static int process_measurement(struct file *file, const struct cred *cred, &allowed_algos); violation_check = ((func == FILE_CHECK || func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && - (ima_policy_flag & IMA_MEASURE)); + (ima_policy_flag & IMA_MEASURE) && + ((action & IMA_MEASURE) || + (file->f_mode & FMODE_WRITE))); if (!action && !violation_check) return 0; diff --git a/security/landlock/audit.c b/security/landlock/audit.c index 7e5e0ed0e4e5fe..c52d079cdb77ba 100644 --- a/security/landlock/audit.c +++ b/security/landlock/audit.c @@ -175,7 +175,7 @@ static void test_get_hierarchy(struct kunit *const test) KUNIT_EXPECT_EQ(test, 10, get_hierarchy(&dom2, 0)->id); KUNIT_EXPECT_EQ(test, 20, get_hierarchy(&dom2, 1)->id); KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, 2)->id); - KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, -1)->id); + /* KUNIT_EXPECT_EQ(test, 30, get_hierarchy(&dom2, -1)->id); */ } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ @@ -437,7 +437,7 @@ void landlock_log_denial(const struct landlock_cred_security *const subject, return; /* Checks if the current exec was restricting itself. */ - if (subject->domain_exec & (1 << youngest_layer)) { + if (subject->domain_exec & BIT(youngest_layer)) { /* Ignores denials for the same execution. */ if (!youngest_denied->log_same_exec) return; diff --git a/security/landlock/domain.c b/security/landlock/domain.c index bae2e99090131f..a647b68e8d060a 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "access.h" @@ -99,8 +100,7 @@ static struct landlock_details *get_current_details(void) return ERR_PTR(-ENOMEM); memcpy(details->exe_path, path_str, path_size); - WARN_ON_ONCE(current_cred() != current_real_cred()); - details->pid = get_pid(task_pid(current)); + details->pid = get_pid(task_tgid(current)); details->uid = from_kuid(&init_user_ns, current_uid()); get_task_comm(details->comm, current); return details; diff --git a/security/landlock/domain.h b/security/landlock/domain.h index ed0d348e214c96..7fb70b25f85a19 100644 --- a/security/landlock/domain.h +++ b/security/landlock/domain.h @@ -130,7 +130,7 @@ int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy); static inline void landlock_free_hierarchy_details(struct landlock_hierarchy *const hierarchy) { - if (WARN_ON_ONCE(!hierarchy || !hierarchy->details)) + if (!hierarchy || !hierarchy->details) return; put_pid(hierarchy->details->pid); diff --git a/security/landlock/id.c b/security/landlock/id.c index 11fab9259c157f..56f7cc0fc7440f 100644 --- a/security/landlock/id.c +++ b/security/landlock/id.c @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -25,7 +26,7 @@ static void __init init_id(atomic64_t *const counter, const u32 random_32bits) * Ensures sure 64-bit values are always used by user space (or may * fail with -EOVERFLOW), and makes this testable. */ - init = 1ULL << 32; + init = BIT_ULL(32); /* * Makes a large (2^32) boot-time value to limit ID collision in logs @@ -105,7 +106,7 @@ static u64 get_id_range(size_t number_of_ids, atomic64_t *const counter, * to get a new ID (e.g. a full landlock_restrict_self() call), and the * cost of draining all available IDs during the system's uptime. */ - random_4bits = random_4bits % (1 << 4); + random_4bits &= 0b1111; step = number_of_ids + random_4bits; /* It is safe to cast a signed atomic to an unsigned value. */ @@ -144,6 +145,19 @@ static void test_range1_rand1(struct kunit *const test) init + 2); } +static void test_range1_rand15(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 15), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 16); +} + static void test_range1_rand16(struct kunit *const test) { atomic64_t counter; @@ -196,6 +210,19 @@ static void test_range2_rand2(struct kunit *const test) init + 4); } +static void test_range2_rand15(struct kunit *const test) +{ + atomic64_t counter; + u64 init; + + init = get_random_u32(); + atomic64_set(&counter, init); + KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 15), init); + KUNIT_EXPECT_EQ( + test, get_id_range(get_random_u8(), &counter, get_random_u8()), + init + 17); +} + static void test_range2_rand16(struct kunit *const test) { atomic64_t counter; @@ -232,10 +259,12 @@ static struct kunit_case __refdata test_cases[] = { KUNIT_CASE(test_init_once), KUNIT_CASE(test_range1_rand0), KUNIT_CASE(test_range1_rand1), + KUNIT_CASE(test_range1_rand15), KUNIT_CASE(test_range1_rand16), KUNIT_CASE(test_range2_rand0), KUNIT_CASE(test_range2_rand1), KUNIT_CASE(test_range2_rand2), + KUNIT_CASE(test_range2_rand15), KUNIT_CASE(test_range2_rand16), {} /* clang-format on */ diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 54a9f29e6ebb11..33eafb71e4f31b 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -169,20 +170,16 @@ const int landlock_abi_version = 7; * the new ruleset. * @size: Size of the pointed &struct landlock_ruleset_attr (needed for * backward and forward compatibility). - * @flags: Supported value: + * @flags: Supported values: + * * - %LANDLOCK_CREATE_RULESET_VERSION * - %LANDLOCK_CREATE_RULESET_ERRATA * * This system call enables to create a new Landlock ruleset, and returns the * related file descriptor on success. * - * If @flags is %LANDLOCK_CREATE_RULESET_VERSION and @attr is NULL and @size is - * 0, then the returned value is the highest supported Landlock ABI version - * (starting at 1). - * - * If @flags is %LANDLOCK_CREATE_RULESET_ERRATA and @attr is NULL and @size is - * 0, then the returned value is a bitmask of fixed issues for the current - * Landlock ABI version. + * If %LANDLOCK_CREATE_RULESET_VERSION or %LANDLOCK_CREATE_RULESET_ERRATA is + * set, then @attr must be NULL and @size must be 0. * * Possible returned errors are: * @@ -191,6 +188,9 @@ const int landlock_abi_version = 7; * - %E2BIG: @attr or @size inconsistencies; * - %EFAULT: @attr or @size inconsistencies; * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_create_ruleset_flags */ SYSCALL_DEFINE3(landlock_create_ruleset, const struct landlock_ruleset_attr __user *const, attr, @@ -452,18 +452,15 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * @ruleset_fd: File descriptor tied to the ruleset to merge with the target. * @flags: Supported values: * - * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF - * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON - * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF + * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON + * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF * * This system call enables to enforce a Landlock ruleset on the current * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * - * It is allowed to only pass the %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF - * flag with a @ruleset_fd value of -1. - * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; @@ -475,6 +472,9 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * %CAP_SYS_ADMIN in its namespace. * - %E2BIG: The maximum number of stacked rulesets is reached for the current * thread. + * + * .. kernel-doc:: include/uapi/linux/landlock.h + * :identifiers: landlock_restrict_self_flags */ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) @@ -564,7 +564,7 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, new_llcred->domain = new_dom; #ifdef CONFIG_AUDIT - new_llcred->domain_exec |= 1 << (new_dom->num_layers - 1); + new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); #endif /* CONFIG_AUDIT */ return commit_creds(new_cred); diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 4683b9139c566a..4ecb17bd5436e7 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1074,8 +1074,7 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) runtime->oss.params = 0; runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; - if (runtime->dma_area) - snd_pcm_format_set_silence(runtime->format, runtime->dma_area, bytes_to_samples(runtime, runtime->dma_bytes)); + snd_pcm_runtime_buffer_set_silence(runtime); runtime->oss.period_frames = snd_pcm_alsa_frames(substream, oss_period_size); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 6c2b6a62d9d2f8..853ac5bb33ff2a 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -723,6 +723,17 @@ static void snd_pcm_buffer_access_unlock(struct snd_pcm_runtime *runtime) atomic_inc(&runtime->buffer_accessing); } +/* fill the PCM buffer with the current silence format; called from pcm_oss.c */ +void snd_pcm_runtime_buffer_set_silence(struct snd_pcm_runtime *runtime) +{ + snd_pcm_buffer_access_lock(runtime); + if (runtime->dma_area) + snd_pcm_format_set_silence(runtime->format, runtime->dma_area, + bytes_to_samples(runtime, runtime->dma_bytes)); + snd_pcm_buffer_access_unlock(runtime); +} +EXPORT_SYMBOL_GPL(snd_pcm_runtime_buffer_set_silence); + #if IS_ENABLED(CONFIG_SND_PCM_OSS) #define is_oss_stream(substream) ((substream)->oss.oss) #else diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 198c598a539398..880240924bfd05 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -732,15 +732,21 @@ static int snd_seq_deliver_single_event(struct snd_seq_client *client, */ static int __deliver_to_subscribers(struct snd_seq_client *client, struct snd_seq_event *event, - struct snd_seq_client_port *src_port, - int atomic, int hop) + int port, int atomic, int hop) { + struct snd_seq_client_port *src_port; struct snd_seq_subscribers *subs; int err, result = 0, num_ev = 0; union __snd_seq_event event_saved; size_t saved_size; struct snd_seq_port_subs_info *grp; + if (port < 0) + return 0; + src_port = snd_seq_port_use_ptr(client, port); + if (!src_port) + return 0; + /* save original event record */ saved_size = snd_seq_event_packet_size(event); memcpy(&event_saved, event, saved_size); @@ -775,6 +781,7 @@ static int __deliver_to_subscribers(struct snd_seq_client *client, read_unlock(&grp->list_lock); else up_read(&grp->list_mutex); + snd_seq_port_unlock(src_port); memcpy(event, &event_saved, saved_size); return (result < 0) ? result : num_ev; } @@ -783,25 +790,32 @@ static int deliver_to_subscribers(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop) { - struct snd_seq_client_port *src_port; - int ret = 0, ret2; - - src_port = snd_seq_port_use_ptr(client, event->source.port); - if (src_port) { - ret = __deliver_to_subscribers(client, event, src_port, atomic, hop); - snd_seq_port_unlock(src_port); - } - - if (client->ump_endpoint_port < 0 || - event->source.port == client->ump_endpoint_port) - return ret; + int ret; +#if IS_ENABLED(CONFIG_SND_SEQ_UMP) + int ret2; +#endif - src_port = snd_seq_port_use_ptr(client, client->ump_endpoint_port); - if (!src_port) + ret = __deliver_to_subscribers(client, event, + event->source.port, atomic, hop); +#if IS_ENABLED(CONFIG_SND_SEQ_UMP) + if (!snd_seq_client_is_ump(client) || client->ump_endpoint_port < 0) return ret; - ret2 = __deliver_to_subscribers(client, event, src_port, atomic, hop); - snd_seq_port_unlock(src_port); - return ret2 < 0 ? ret2 : ret; + /* If it's an event from EP port (and with a UMP group), + * deliver to subscribers of the corresponding UMP group port, too. + * Or, if it's from non-EP port, deliver to subscribers of EP port, too. + */ + if (event->source.port == client->ump_endpoint_port) + ret2 = __deliver_to_subscribers(client, event, + snd_seq_ump_group_port(event), + atomic, hop); + else + ret2 = __deliver_to_subscribers(client, event, + client->ump_endpoint_port, + atomic, hop); + if (ret2 < 0) + return ret2; +#endif + return ret; } /* deliver an event to the destination port(s). diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index ff7e558b4d51d0..db2f169cae11ea 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -1285,3 +1285,21 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source, else return cvt_to_ump_midi1(dest, dest_port, event, atomic, hop); } + +/* return the UMP group-port number of the event; + * return -1 if groupless or non-UMP event + */ +int snd_seq_ump_group_port(const struct snd_seq_event *event) +{ + const struct snd_seq_ump_event *ump_ev = + (const struct snd_seq_ump_event *)event; + unsigned char type; + + if (!snd_seq_ev_is_ump(event)) + return -1; + type = ump_message_type(ump_ev->ump[0]); + if (ump_is_groupless_msg(type)) + return -1; + /* group-port number starts from 1 */ + return ump_message_group(ump_ev->ump[0]) + 1; +} diff --git a/sound/core/seq/seq_ump_convert.h b/sound/core/seq/seq_ump_convert.h index 6c146d8032804f..4abf0a7637d701 100644 --- a/sound/core/seq/seq_ump_convert.h +++ b/sound/core/seq/seq_ump_convert.h @@ -18,5 +18,6 @@ int snd_seq_deliver_to_ump(struct snd_seq_client *source, struct snd_seq_client_port *dest_port, struct snd_seq_event *event, int atomic, int hop); +int snd_seq_ump_group_port(const struct snd_seq_event *event); #endif /* __SEQ_UMP_CONVERT_H */ diff --git a/sound/core/seq_device.c b/sound/core/seq_device.c index 4492be5d2317c7..bac9f860373425 100644 --- a/sound/core/seq_device.c +++ b/sound/core/seq_device.c @@ -43,7 +43,7 @@ MODULE_LICENSE("GPL"); static int snd_seq_bus_match(struct device *dev, const struct device_driver *drv) { struct snd_seq_device *sdev = to_seq_dev(dev); - struct snd_seq_driver *sdrv = to_seq_drv(drv); + const struct snd_seq_driver *sdrv = to_seq_drv(drv); return strcmp(sdrv->id, sdev->id) == 0 && sdrv->argsize == sdev->argsize; diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index 6199bb60ccf00f..c84754434d1627 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -9,6 +9,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include #include #include #include @@ -81,6 +82,7 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus) int idx; u32 link_count; struct hdac_ext_link *hlink; + u32 leptr; link_count = readl(bus->mlcap + AZX_REG_ML_MLCD) + 1; @@ -96,6 +98,12 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus) (AZX_ML_INTERVAL * idx); hlink->lcaps = readl(hlink->ml_addr + AZX_REG_ML_LCAP); hlink->lsdiid = readw(hlink->ml_addr + AZX_REG_ML_LSDIID); + hlink->slcount = FIELD_GET(AZX_ML_HDA_LCAP_SLCOUNT, hlink->lcaps) + 1; + + if (hdac_ext_link_alt(hlink)) { + leptr = readl(hlink->ml_addr + AZX_REG_ML_LEPTR); + hlink->id = FIELD_GET(AZX_REG_ML_LEPTR_ID, leptr); + } /* since link in On, update the ref */ hlink->ref_count = 1; @@ -125,6 +133,17 @@ void snd_hdac_ext_link_free_all(struct hdac_bus *bus) } EXPORT_SYMBOL_GPL(snd_hdac_ext_link_free_all); +struct hdac_ext_link *snd_hdac_ext_bus_get_hlink_by_id(struct hdac_bus *bus, u32 id) +{ + struct hdac_ext_link *hlink; + + list_for_each_entry(hlink, &bus->hlink_list, list) + if (hdac_ext_link_alt(hlink) && hlink->id == id) + return hlink; + return NULL; +} +EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_get_hlink_by_id); + /** * snd_hdac_ext_bus_get_hlink_by_addr - get hlink at specified address * @bus: hlink's parent bus device diff --git a/sound/hda/hda_bus_type.c b/sound/hda/hda_bus_type.c index 7545ace7b0ee4b..eb72a7af2e56e8 100644 --- a/sound/hda/hda_bus_type.c +++ b/sound/hda/hda_bus_type.c @@ -21,7 +21,7 @@ MODULE_LICENSE("GPL"); * driver id_table and returns the matching device id entry. */ const struct hda_device_id * -hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv) +hdac_get_device_id(struct hdac_device *hdev, const struct hdac_driver *drv) { if (drv->id_table) { const struct hda_device_id *id = drv->id_table; @@ -38,7 +38,7 @@ hdac_get_device_id(struct hdac_device *hdev, struct hdac_driver *drv) } EXPORT_SYMBOL_GPL(hdac_get_device_id); -static int hdac_codec_match(struct hdac_device *dev, struct hdac_driver *drv) +static int hdac_codec_match(struct hdac_device *dev, const struct hdac_driver *drv) { if (hdac_get_device_id(dev, drv)) return 1; @@ -49,7 +49,7 @@ static int hdac_codec_match(struct hdac_device *dev, struct hdac_driver *drv) static int hda_bus_match(struct device *dev, const struct device_driver *drv) { struct hdac_device *hdev = dev_to_hdac_dev(dev); - struct hdac_driver *hdrv = drv_to_hdac_driver(drv); + const struct hdac_driver *hdrv = drv_to_hdac_driver(drv); if (hdev->type != hdrv->type) return 0; diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 8686adaf453103..d3511135f7d39f 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -177,7 +177,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, * sdw_intel_startup() is required for creation of devices and bus * startup */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info) { acpi_status status; diff --git a/sound/isa/azt2320.c b/sound/isa/azt2320.c index b937c9138d1248..588b9f0831d3fe 100644 --- a/sound/isa/azt2320.c +++ b/sound/isa/azt2320.c @@ -189,8 +189,8 @@ static int snd_card_azt2320_probe(int dev, if (error < 0) return error; - strcpy(card->driver, "AZT2320"); - strcpy(card->shortname, "Aztech AZT2320"); + strscpy(card->driver, "AZT2320"); + strscpy(card->shortname, "Aztech AZT2320"); sprintf(card->longname, "%s, WSS at 0x%lx, irq %i, dma %i&%i", card->shortname, chip->port, irq[dev], dma1[dev], dma2[dev]); diff --git a/sound/pci/es1968.c b/sound/pci/es1968.c index c6c018b40c69f9..4e0693f0ab0f89 100644 --- a/sound/pci/es1968.c +++ b/sound/pci/es1968.c @@ -1561,7 +1561,7 @@ static int snd_es1968_capture_open(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct es1968 *chip = snd_pcm_substream_chip(substream); struct esschan *es; - int apu1, apu2; + int err, apu1, apu2; apu1 = snd_es1968_alloc_apu_pair(chip, ESM_APU_PCM_CAPTURE); if (apu1 < 0) @@ -1605,7 +1605,9 @@ static int snd_es1968_capture_open(struct snd_pcm_substream *substream) runtime->hw = snd_es1968_capture; runtime->hw.buffer_bytes_max = runtime->hw.period_bytes_max = calc_available_memory_size(chip) - 1024; /* keep MIXBUF size */ - snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES); + err = snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_BYTES); + if (err < 0) + return err; spin_lock_irq(&chip->substream_lock); list_add(&es->list, &chip->substream_list); diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index fb955a205d50ce..9c427270ff4f49 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -96,9 +96,7 @@ config SND_HDA_CIRRUS_SCODEC config SND_HDA_CIRRUS_SCODEC_KUNIT_TEST tristate "KUnit test for Cirrus side-codec library" if !KUNIT_ALL_TESTS - select SND_HDA_CIRRUS_SCODEC - select GPIOLIB - depends on KUNIT + depends on SND_HDA_CIRRUS_SCODEC && GPIOLIB && KUNIT default KUNIT_ALL_TESTS help This builds KUnit tests for the cirrus side-codec library. diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 9521e5e0e6e6f8..1fef350d821ef0 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -18,10 +18,10 @@ /* * find a matching codec id */ -static int hda_codec_match(struct hdac_device *dev, struct hdac_driver *drv) +static int hda_codec_match(struct hdac_device *dev, const struct hdac_driver *drv) { struct hda_codec *codec = container_of(dev, struct hda_codec, core); - struct hda_codec_driver *driver = + const struct hda_codec_driver *driver = container_of(drv, struct hda_codec_driver, core); const struct hda_device_id *list; /* check probe_id instead of vendor_id if set */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 79004bc8107bc2..20ab1fb2195ff6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -441,6 +441,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); fallthrough; case 0x10ec0215: + case 0x10ec0236: + case 0x10ec0245: + case 0x10ec0256: + case 0x10ec0257: case 0x10ec0285: case 0x10ec0289: alc_update_coef_idx(codec, 0x36, 1<<13, 0); @@ -448,12 +452,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0230: case 0x10ec0233: case 0x10ec0235: - case 0x10ec0236: - case 0x10ec0245: case 0x10ec0255: - case 0x10ec0256: case 0x19e58326: - case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: case 0x10ec0286: @@ -6742,6 +6742,25 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, codec->power_save_node = 0; } +/* avoid DAC 0x06 for speaker switch 0x17; it has no volume control */ +static void alc274_fixup_hp_aio_bind_dacs(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */ + /* The speaker is routed to the Node 0x06 by a mistake, thus the + * speaker's volume can't be adjusted since the node doesn't have + * Amp-out capability. Assure the speaker and lineout pin to be + * coupled with DAC NID 0x02. + */ + static const hda_nid_t preferred_pairs[] = { + 0x16, 0x02, 0x17, 0x02, 0x21, 0x03, 0 + }; + struct alc_spec *spec = codec->spec; + + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + spec->gen.preferred_dacs = preferred_pairs; +} + /* avoid DAC 0x06 for bass speaker 0x17; it has no volume control */ static void alc289_fixup_asus_ga401(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -6811,7 +6830,10 @@ static void alc256_fixup_chromebook(struct hda_codec *codec, switch (action) { case HDA_FIXUP_ACT_PRE_PROBE: - spec->gen.suppress_auto_mute = 1; + if (codec->core.subsystem_id == 0x10280d76) + spec->gen.suppress_auto_mute = 0; + else + spec->gen.suppress_auto_mute = 1; spec->gen.suppress_auto_mic = 1; spec->en_3kpull_low = false; break; @@ -6963,6 +6985,41 @@ static void alc285_fixup_hp_spectre_x360_eb1(struct hda_codec *codec, } } +/* GPIO1 = amplifier on/off */ +static void alc285_fixup_hp_spectre_x360_df1(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + static const hda_nid_t conn[] = { 0x02 }; + static const struct hda_pintbl pincfgs[] = { + { 0x14, 0x90170110 }, /* front/high speakers */ + { 0x17, 0x90170130 }, /* back/bass speakers */ + { } + }; + + // enable mute led + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + /* needed for amp of back speakers */ + spec->gpio_mask |= 0x01; + spec->gpio_dir |= 0x01; + snd_hda_apply_pincfgs(codec, pincfgs); + /* share DAC to have unified volume control */ + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn), conn); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + break; + case HDA_FIXUP_ACT_INIT: + /* need to toggle GPIO to enable the amp of back speakers */ + alc_update_gpio_data(codec, 0x01, true); + msleep(100); + alc_update_gpio_data(codec, 0x01, false); + break; + } +} + static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7761,6 +7818,7 @@ enum { ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, ALC285_FIXUP_HP_SPECTRE_X360_EB1, + ALC285_FIXUP_HP_SPECTRE_X360_DF1, ALC285_FIXUP_HP_ENVY_X360, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -7969,6 +8027,8 @@ enum { ALC233_FIXUP_MEDION_MTL_SPK, ALC294_FIXUP_BASS_SPEAKER_15, ALC283_FIXUP_DELL_HP_RESUME, + ALC294_FIXUP_ASUS_CS35L41_SPI_2, + ALC274_FIXUP_HP_AIO_BIND_DACS, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9836,6 +9896,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_eb1 }, + [ALC285_FIXUP_HP_SPECTRE_X360_DF1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_df1 + }, [ALC285_FIXUP_HP_ENVY_X360] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_envy_x360, @@ -10333,6 +10397,16 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc283_fixup_dell_hp_resume, }, + [ALC294_FIXUP_ASUS_CS35L41_SPI_2] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35l41_fixup_spi_two, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC, + }, + [ALC274_FIXUP_HP_AIO_BIND_DACS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc274_fixup_hp_aio_bind_dacs, + }, }; static const struct hda_quirk alc269_fixup_tbl[] = { @@ -10557,6 +10631,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x86c1, "HP Laptop 15-da3001TU", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x863e, "HP Spectre x360 15-df1xxx", ALC285_FIXUP_HP_SPECTRE_X360_DF1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), @@ -10761,12 +10836,13 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8caf, "HP Elite mt645 G8 Mobile Thin Client", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8cbd, "HP Pavilion Aero Laptop 13-bg0xxx", ALC245_FIXUP_HP_X360_MUTE_LEDS), - SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), - SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), + SND_PCI_QUIRK(0x103c, 0x8cde, "HP OmniBook Ultra Flip Laptop 14t", ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX), SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d18, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 14 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite X360 14 G12", ALC285_FIXUP_HP_GPIO_LED), @@ -10786,11 +10862,15 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8da1, "HP 16 Clipper OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da7, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8da8, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8dd4, "HP EliteStudio 8 AIO", ALC274_FIXUP_HP_AIO_BIND_DACS), SND_PCI_QUIRK(0x103c, 0x8de8, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8de9, "HP Gemtree", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8dec, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ded, "HP EliteBook 640 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dee, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8def, "HP EliteBook 660 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8df0, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8df1, "HP EliteBook 630 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfc, "HP EliteBook 645 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8dfe, "HP EliteBook 665 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e11, "HP Trekker", ALC287_FIXUP_CS35L41_I2C_2), @@ -10805,9 +10885,12 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), SND_PCI_QUIRK(0x103c, 0x8e1b, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), SND_PCI_QUIRK(0x103c, 0x8e1c, "HP EliteBook G12", ALC245_FIXUP_HP_ZBOOK_FIREFLY_G12A), + SND_PCI_QUIRK(0x103c, 0x8e1d, "HP ZBook X Gli 16 G12", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e2c, "HP EliteBook 16 G12", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8e36, "HP 14 Enstrom OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e37, "HP 16 Piston OmniBook X", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8e3a, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8e3b, "HP Agusta", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), @@ -10835,11 +10918,11 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12a3, "Asus N7691ZM", ALC269_FIXUP_ASUS_N7601ZM), SND_PCI_QUIRK(0x1043, 0x12af, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), - SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x12b4, "ASUS B3405CCA / P3405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1460, "Asus VivoBook 15", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -10893,7 +10976,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), - SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1ccf, "ASUS G814JU/JV/JI", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cdf, "ASUS G814JY/JZ/JG", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1cef, "ASUS G834JY/JZ/JI/JG", ALC285_FIXUP_ASUS_HEADSET_MIC), @@ -10925,14 +11008,14 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1fb3, "ASUS ROG Flow Z13 GZ302EA", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x3011, "ASUS B5605CVA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), - SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x3071, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x30d1, "ASUS B5405CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x30e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3061, "ASUS B3405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3071, "ASUS B5405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30c1, "ASUS B3605CCA / P3605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30d1, "ASUS B5405CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x30e1, "ASUS B5605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x31d0, "ASUS Zen AIO 27 Z272SD_A272SD", ALC274_FIXUP_ASUS_ZEN_AIO_27), - SND_PCI_QUIRK(0x1043, 0x31e1, "ASUS B5605CCA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x31e1, "ASUS B5605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x31f1, "ASUS B3605CCA", ALC294_FIXUP_ASUS_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS), @@ -11223,6 +11306,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38fd, "ThinkBook plus Gen5 Hybrid", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x390d, "Lenovo Yoga Pro 7 14ASP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3913, "Lenovo 145", ALC236_FIXUP_LENOVO_INV_DMIC), SND_PCI_QUIRK(0x17aa, 0x391f, "Yoga S990-16 pro Quad YC Quad", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x3920, "Yoga S990-16 pro Quad VECO Quad", ALC287_FIXUP_TAS2781_I2C), @@ -11487,6 +11571,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360_DF1, .name = "alc285-hp-spectre-x360-df1"}, {.id = ALC285_FIXUP_HP_ENVY_X360, .name = "alc285-hp-envy-x360"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, diff --git a/sound/pci/hda/tas2781_hda_spi.c b/sound/pci/hda/tas2781_hda_spi.c index 399f2e4c3b62b4..25175ff4b3aaa5 100644 --- a/sound/pci/hda/tas2781_hda_spi.c +++ b/sound/pci/hda/tas2781_hda_spi.c @@ -1003,8 +1003,7 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) */ out: - if (fmw) - release_firmware(fmw); + release_firmware(fmw); pm_runtime_mark_last_busy(tas_hda->priv->dev); pm_runtime_put_autosuspend(tas_hda->priv->dev); } diff --git a/sound/sh/Kconfig b/sound/sh/Kconfig index b75fbb3236a7b9..f5fa09d740b4c9 100644 --- a/sound/sh/Kconfig +++ b/sound/sh/Kconfig @@ -14,7 +14,7 @@ if SND_SUPERH config SND_AICA tristate "Dreamcast Yamaha AICA sound" - depends on SH_DREAMCAST + depends on SH_DREAMCAST && SH_DMA_API select SND_PCM select G2_DMA help diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c index a38409dd1d3409..70fa54d568ef68 100644 --- a/sound/soc/amd/acp/acp-i2s.c +++ b/sound/soc/amd/acp/acp-i2s.c @@ -97,7 +97,7 @@ static int acp_i2s_set_tdm_slot(struct snd_soc_dai *dai, u32 tx_mask, u32 rx_mas struct acp_stream *stream; int slot_len, no_of_slots; - chip = dev_get_platdata(dev); + chip = dev_get_drvdata(dev->parent); switch (slot_width) { case SLOT_WIDTH_8: slot_len = 8; diff --git a/sound/soc/amd/acp/acp-legacy-common.c b/sound/soc/amd/acp/acp-legacy-common.c index b4d68484e06df8..ba8db0851daac4 100644 --- a/sound/soc/amd/acp/acp-legacy-common.c +++ b/sound/soc/amd/acp/acp-legacy-common.c @@ -450,7 +450,7 @@ int acp_machine_select(struct acp_chip_info *chip) struct snd_soc_acpi_mach *mach; int size, platform; - if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC) { + if (chip->flag == FLAG_AMD_LEGACY_ONLY_DMIC && chip->is_pdm_dev) { platform = chip->acp_rev; chip->mach_dev = platform_device_register_data(chip->dev, "acp-pdm-mach", PLATFORM_DEVID_NONE, &platform, diff --git a/sound/soc/amd/acp/acp-rembrandt.c b/sound/soc/amd/acp/acp-rembrandt.c index 746b6ed7202965..cccdd10c345e6d 100644 --- a/sound/soc/amd/acp/acp-rembrandt.c +++ b/sound/soc/amd/acp/acp-rembrandt.c @@ -199,7 +199,7 @@ static void rembrandt_audio_remove(struct platform_device *pdev) static int rmb_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp-renoir.c b/sound/soc/amd/acp/acp-renoir.c index ebf0106fc73742..04f6d70b6a92d8 100644 --- a/sound/soc/amd/acp/acp-renoir.c +++ b/sound/soc/amd/acp/acp-renoir.c @@ -146,7 +146,7 @@ static void renoir_audio_remove(struct platform_device *pdev) static int rn_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp63.c b/sound/soc/amd/acp/acp63.c index 52d895e624c723..1f15c96a9b9461 100644 --- a/sound/soc/amd/acp/acp63.c +++ b/sound/soc/amd/acp/acp63.c @@ -250,7 +250,7 @@ static void acp63_audio_remove(struct platform_device *pdev) static int acp63_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/acp/acp70.c b/sound/soc/amd/acp/acp70.c index 6d5f5ade075c87..217b717e9beb75 100644 --- a/sound/soc/amd/acp/acp70.c +++ b/sound/soc/amd/acp/acp70.c @@ -182,7 +182,7 @@ static void acp_acp70_audio_remove(struct platform_device *pdev) static int acp70_pcm_resume(struct device *dev) { - struct acp_chip_info *chip = dev_get_platdata(dev); + struct acp_chip_info *chip = dev_get_drvdata(dev->parent); struct acp_stream *stream; struct snd_pcm_substream *substream; snd_pcm_uframes_t buf_in_frames; diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index 8e57f31ef7f7b8..7936b31736323b 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -193,6 +193,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) struct amd_sdw_manager *amd_manager; u32 ext_intr_stat, ext_intr_stat1; u16 irq_flag = 0; + u16 wake_irq_flag = 0; u16 sdw_dma_irq_flag = 0; adata = dev_id; @@ -231,7 +232,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) } if (adata->acp_rev >= ACP70_PCI_REV) - irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); + wake_irq_flag = check_and_handle_acp70_sdw_wake_irq(adata); if (ext_intr_stat & BIT(PDM_DMA_STAT)) { ps_pdm_data = dev_get_drvdata(&adata->pdm_dev->dev); @@ -245,7 +246,7 @@ static irqreturn_t acp63_irq_handler(int irq, void *dev_id) if (sdw_dma_irq_flag) return IRQ_WAKE_THREAD; - if (irq_flag) + if (irq_flag | wake_irq_flag) return IRQ_HANDLED; else return IRQ_NONE; diff --git a/sound/soc/apple/mca.c b/sound/soc/apple/mca.c index b4f4696809dd23..5dd24ab90d0f05 100644 --- a/sound/soc/apple/mca.c +++ b/sound/soc/apple/mca.c @@ -464,6 +464,28 @@ static int mca_configure_serdes(struct mca_cluster *cl, int serdes_unit, return -EINVAL; } +static int mca_fe_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct mca_cluster *cl = mca_dai_to_cluster(dai); + unsigned int mask, nchannels; + + if (cl->tdm_slots) { + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + mask = cl->tdm_tx_mask; + else + mask = cl->tdm_rx_mask; + + nchannels = hweight32(mask); + } else { + nchannels = 2; + } + + return snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, + 1, nchannels); +} + static int mca_fe_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width) { @@ -680,6 +702,7 @@ static int mca_fe_hw_params(struct snd_pcm_substream *substream, } static const struct snd_soc_dai_ops mca_fe_ops = { + .startup = mca_fe_startup, .set_fmt = mca_fe_set_fmt, .set_bclk_ratio = mca_set_bclk_ratio, .set_tdm_slot = mca_fe_set_tdm_slot, diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 40bb7a1d44bcfa..20f99cbee29b3e 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -776,10 +776,9 @@ config SND_SOC_CS_AMP_LIB tristate config SND_SOC_CS_AMP_LIB_TEST - tristate "KUnit test for Cirrus Logic cs-amp-lib" - depends on KUNIT + tristate "KUnit test for Cirrus Logic cs-amp-lib" if !KUNIT_ALL_TESTS + depends on SND_SOC_CS_AMP_LIB && KUNIT default KUNIT_ALL_TESTS - select SND_SOC_CS_AMP_LIB help This builds KUnit tests for the Cirrus Logic common amplifier library. diff --git a/sound/soc/codecs/aw88081.c b/sound/soc/codecs/aw88081.c index ad16ab6812cd3f..3dd8428f08cce9 100644 --- a/sound/soc/codecs/aw88081.c +++ b/sound/soc/codecs/aw88081.c @@ -1295,9 +1295,19 @@ static int aw88081_i2c_probe(struct i2c_client *i2c) aw88081_dai, ARRAY_SIZE(aw88081_dai)); } +#if defined(CONFIG_OF) +static const struct of_device_id aw88081_of_match[] = { + { .compatible = "awinic,aw88081" }, + { .compatible = "awinic,aw88083" }, + { } +}; +MODULE_DEVICE_TABLE(of, aw88081_of_match); +#endif + static struct i2c_driver aw88081_i2c_driver = { .driver = { .name = AW88081_I2C_NAME, + .of_match_table = of_match_ptr(aw88081_of_match), }, .probe = aw88081_i2c_probe, .id_table = aw88081_i2c_id, diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index ac19a572fe70cb..6165ac16c3a950 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -654,6 +654,10 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) reinit_completion(&priv->type_detect); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK); + cs42l43_start_hs_bias(priv, true); regmap_update_bits(cs42l43->regmap, CS42L43_HS2, CS42L43_HSDET_MODE_MASK, 0x3 << CS42L43_HSDET_MODE_SHIFT); @@ -665,6 +669,9 @@ static int cs42l43_run_type_detect(struct cs42l43_codec *priv) CS42L43_HSDET_MODE_MASK, 0x2 << CS42L43_HSDET_MODE_SHIFT); cs42l43_stop_hs_bias(priv); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_VAL_MASK, 0); + if (!time_left) return -ETIMEDOUT; @@ -702,6 +709,9 @@ static void cs42l43_clear_jack(struct cs42l43_codec *priv) CS42L43_PGA_WIDESWING_MODE_EN_MASK, 0); regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CTRL, CS42L43_JACK_STEREO_CONFIG_MASK, 0); + regmap_update_bits(cs42l43->regmap, CS42L43_STEREO_MIC_CLAMP_CTRL, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK, + CS42L43_SMIC_HPAMP_CLAMP_DIS_FRC_MASK); regmap_update_bits(cs42l43->regmap, CS42L43_HS2, CS42L43_HSDET_MODE_MASK | CS42L43_HSDET_MANUAL_MODE_MASK, 0x2 << CS42L43_HSDET_MODE_SHIFT); diff --git a/sound/soc/codecs/hda.c b/sound/soc/codecs/hda.c index ddc00927313cfe..dc7794c9ac44ce 100644 --- a/sound/soc/codecs/hda.c +++ b/sound/soc/codecs/hda.c @@ -152,7 +152,7 @@ int hda_codec_probe_complete(struct hda_codec *codec) ret = snd_hda_codec_build_controls(codec); if (ret < 0) { dev_err(&hdev->dev, "unable to create controls %d\n", ret); - goto out; + return ret; } /* Bus suspended codecs as it does not manage their pm */ @@ -160,7 +160,7 @@ int hda_codec_probe_complete(struct hda_codec *codec) /* rpm was forbidden in snd_hda_codec_device_new() */ snd_hda_codec_set_power_save(codec, 2000); snd_hda_codec_register(codec); -out: + /* Complement pm_runtime_get_sync(bus) in probe */ pm_runtime_mark_last_busy(bus->dev); pm_runtime_put_autosuspend(bus->dev); diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index 17019b1d680b77..bc01ff65bd6f4d 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -842,12 +842,28 @@ static void print_eld_info(struct snd_info_entry *entry, static int hdmi_dai_proc_new(struct hdmi_codec_priv *hcp, struct snd_soc_dai *dai) { + struct snd_soc_component *component = dai->component; + struct snd_soc_card *card = component->card; + struct snd_soc_dai *d; + struct snd_soc_pcm_runtime *rtd; struct snd_info_entry *entry; char name[32]; - int err; + int err, i, id = 0; - snprintf(name, sizeof(name), "eld#%d", dai->id); - err = snd_card_proc_new(dai->component->card->snd_card, name, &entry); + /* + * To avoid duplicate proc entry, find its rtd and use rtd->id + * instead of dai->id + */ + for_each_card_rtds(card, rtd) { + for_each_rtd_dais(rtd, i, d) + if (d == dai) { + id = rtd->id; + goto found; + } + } +found: + snprintf(name, sizeof(name), "eld#%d", id); + err = snd_card_proc_new(card->snd_card, name, &entry); if (err < 0) return err; diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index b261fa373e657a..c1fb71cfb5d077 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -63,6 +63,10 @@ #define CDC_WSA_TX_SPKR_PROT_CLK_DISABLE 0 #define CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK GENMASK(3, 0) #define CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K 0 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_16K 1 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_24K 2 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_32K 3 +#define CDC_WSA_TX_SPKR_PROT_PCM_RATE_48K 4 #define CDC_WSA_TX0_SPKR_PROT_PATH_CFG0 (0x0248) #define CDC_WSA_TX1_SPKR_PROT_PATH_CTL (0x0264) #define CDC_WSA_TX1_SPKR_PROT_PATH_CFG0 (0x0268) @@ -407,6 +411,7 @@ struct wsa_macro { int ear_spkr_gain; int spkr_gain_offset; int spkr_mode; + u32 pcm_rate_vi; int is_softclip_on[WSA_MACRO_SOFTCLIP_MAX]; int softclip_clk_users[WSA_MACRO_SOFTCLIP_MAX]; struct regmap *regmap; @@ -1280,6 +1285,7 @@ static int wsa_macro_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_component *component = dai->component; + struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); int ret; switch (substream->stream) { @@ -1291,6 +1297,11 @@ static int wsa_macro_hw_params(struct snd_pcm_substream *substream, __func__, params_rate(params)); return ret; } + break; + case SNDRV_PCM_STREAM_CAPTURE: + if (dai->id == WSA_MACRO_AIF_VI) + wsa->pcm_rate_vi = params_rate(params); + break; default: break; @@ -1448,35 +1459,11 @@ static void wsa_macro_mclk_enable(struct wsa_macro *wsa, bool mclk_enable) } } -static int wsa_macro_mclk_event(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, int event) +static void wsa_macro_enable_disable_vi_sense(struct snd_soc_component *component, bool enable, + u32 tx_reg0, u32 tx_reg1, u32 val) { - struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); - - wsa_macro_mclk_enable(wsa, event == SND_SOC_DAPM_PRE_PMU); - return 0; -} - -static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, - struct snd_kcontrol *kcontrol, - int event) -{ - struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); - u32 tx_reg0, tx_reg1; - - if (test_bit(WSA_MACRO_TX0, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) { - tx_reg0 = CDC_WSA_TX0_SPKR_PROT_PATH_CTL; - tx_reg1 = CDC_WSA_TX1_SPKR_PROT_PATH_CTL; - } else if (test_bit(WSA_MACRO_TX1, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) { - tx_reg0 = CDC_WSA_TX2_SPKR_PROT_PATH_CTL; - tx_reg1 = CDC_WSA_TX3_SPKR_PROT_PATH_CTL; - } - - switch (event) { - case SND_SOC_DAPM_POST_PMU: - /* Enable V&I sensing */ + if (enable) { + /* Enable V&I sensing */ snd_soc_component_update_bits(component, tx_reg0, CDC_WSA_TX_SPKR_PROT_RESET_MASK, CDC_WSA_TX_SPKR_PROT_RESET); @@ -1485,10 +1472,10 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, CDC_WSA_TX_SPKR_PROT_RESET); snd_soc_component_update_bits(component, tx_reg0, CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, - CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K); + val); snd_soc_component_update_bits(component, tx_reg1, CDC_WSA_TX_SPKR_PROT_PCM_RATE_MASK, - CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K); + val); snd_soc_component_update_bits(component, tx_reg0, CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, CDC_WSA_TX_SPKR_PROT_CLK_ENABLE); @@ -1501,9 +1488,7 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, tx_reg1, CDC_WSA_TX_SPKR_PROT_RESET_MASK, CDC_WSA_TX_SPKR_PROT_NO_RESET); - break; - case SND_SOC_DAPM_POST_PMD: - /* Disable V&I sensing */ + } else { snd_soc_component_update_bits(component, tx_reg0, CDC_WSA_TX_SPKR_PROT_RESET_MASK, CDC_WSA_TX_SPKR_PROT_RESET); @@ -1516,6 +1501,72 @@ static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, tx_reg1, CDC_WSA_TX_SPKR_PROT_CLK_EN_MASK, CDC_WSA_TX_SPKR_PROT_CLK_DISABLE); + } +} + +static void wsa_macro_enable_disable_vi_feedback(struct snd_soc_component *component, + bool enable, u32 rate) +{ + struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); + + if (test_bit(WSA_MACRO_TX0, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) + wsa_macro_enable_disable_vi_sense(component, enable, + CDC_WSA_TX0_SPKR_PROT_PATH_CTL, + CDC_WSA_TX1_SPKR_PROT_PATH_CTL, rate); + + if (test_bit(WSA_MACRO_TX1, &wsa->active_ch_mask[WSA_MACRO_AIF_VI])) + wsa_macro_enable_disable_vi_sense(component, enable, + CDC_WSA_TX2_SPKR_PROT_PATH_CTL, + CDC_WSA_TX3_SPKR_PROT_PATH_CTL, rate); +} + +static int wsa_macro_mclk_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); + + wsa_macro_mclk_enable(wsa, event == SND_SOC_DAPM_PRE_PMU); + return 0; +} + +static int wsa_macro_enable_vi_feedback(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, + int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); + u32 rate_val; + + switch (wsa->pcm_rate_vi) { + case 8000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K; + break; + case 16000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_16K; + break; + case 24000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_24K; + break; + case 32000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_32K; + break; + case 48000: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_48K; + break; + default: + rate_val = CDC_WSA_TX_SPKR_PROT_PCM_RATE_8K; + break; + } + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + /* Enable V&I sensing */ + wsa_macro_enable_disable_vi_feedback(component, true, rate_val); + break; + case SND_SOC_DAPM_POST_PMD: + /* Disable V&I sensing */ + wsa_macro_enable_disable_vi_feedback(component, false, rate_val); break; } diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 08aa7ee3425689..fbfe4d032df7b2 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -546,6 +546,8 @@ static uint8_t sn012776_bop_presets[] = { 0x06, 0x3e, 0x37, 0x30, 0xff, 0xe6 }; +static const struct regmap_config tas2764_i2c_regmap; + static int tas2764_codec_probe(struct snd_soc_component *component) { struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); @@ -559,9 +561,10 @@ static int tas2764_codec_probe(struct snd_soc_component *component) } tas2764_reset(tas2764); + regmap_reinit_cache(tas2764->regmap, &tas2764_i2c_regmap); if (tas2764->irq) { - ret = snd_soc_component_write(tas2764->component, TAS2764_INT_MASK0, 0xff); + ret = snd_soc_component_write(tas2764->component, TAS2764_INT_MASK0, 0x00); if (ret < 0) return ret; diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 4c4171bb3fbbe4..28001e9857d9dc 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -199,12 +199,10 @@ static void i2s_start(struct dw_i2s_dev *dev, else i2s_write_reg(dev->i2s_base, IRER, 1); - /* I2S needs to enable IRQ to make a handshake with DMAC on the JH7110 SoC */ - if (dev->use_pio || dev->is_jh7110) - i2s_enable_irqs(dev, substream->stream, config->chan_nr); - else + if (!(dev->use_pio || dev->is_jh7110)) i2s_enable_dma(dev, substream->stream); + i2s_enable_irqs(dev, substream->stream, config->chan_nr); i2s_write_reg(dev->i2s_base, CER, 1); } @@ -218,11 +216,12 @@ static void i2s_stop(struct dw_i2s_dev *dev, else i2s_write_reg(dev->i2s_base, IRER, 0); - if (dev->use_pio || dev->is_jh7110) - i2s_disable_irqs(dev, substream->stream, 8); - else + if (!(dev->use_pio || dev->is_jh7110)) i2s_disable_dma(dev, substream->stream); + i2s_disable_irqs(dev, substream->stream, 8); + + if (!dev->active) { i2s_write_reg(dev->i2s_base, CER, 0); i2s_write_reg(dev->i2s_base, IER, 0); diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c index f501f47242fb0b..1bba48318e2ddf 100644 --- a/sound/soc/fsl/fsl_asrc_dma.c +++ b/sound/soc/fsl/fsl_asrc_dma.c @@ -156,11 +156,24 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, for_each_dpcm_be(rtd, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; struct snd_pcm_substream *substream_be; - struct snd_soc_dai *dai = snd_soc_rtd_to_cpu(be, 0); + struct snd_soc_dai *dai_cpu = snd_soc_rtd_to_cpu(be, 0); + struct snd_soc_dai *dai_codec = snd_soc_rtd_to_codec(be, 0); + struct snd_soc_dai *dai; if (dpcm->fe != rtd) continue; + /* + * With audio graph card, original cpu dai is changed to codec + * device in backend, so if cpu dai is dummy device in backend, + * get the codec dai device, which is the real hardware device + * connected. + */ + if (!snd_soc_dai_is_dummy(dai_cpu)) + dai = dai_cpu; + else + dai = dai_codec; + substream_be = snd_soc_dpcm_get_substream(be, stream); dma_params_be = snd_soc_dai_get_dma_data(dai, substream_be); dev_be = dai->dev; diff --git a/sound/soc/fsl/fsl_qmc_audio.c b/sound/soc/fsl/fsl_qmc_audio.c index b2979290c97324..5614a8b909edf8 100644 --- a/sound/soc/fsl/fsl_qmc_audio.c +++ b/sound/soc/fsl/fsl_qmc_audio.c @@ -250,6 +250,9 @@ static int qmc_audio_pcm_trigger(struct snd_soc_component *component, switch (cmd) { case SNDRV_PCM_TRIGGER_START: bitmap_zero(prtd->chans_pending, 64); + prtd->buffer_ended = 0; + prtd->ch_dma_addr_current = prtd->ch_dma_addr_start; + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { for (i = 0; i < prtd->channels; i++) prtd->qmc_dai->chans[i].prtd_tx = prtd; diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 3686d468506b35..45e000f61eccac 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -544,7 +544,7 @@ static int imx_card_parse_of(struct imx_card_data *data) if (!card->dai_link) return -ENOMEM; - data->link_data = devm_kcalloc(dev, num_links, sizeof(*link), GFP_KERNEL); + data->link_data = devm_kcalloc(dev, num_links, sizeof(*link_data), GFP_KERNEL); if (!data->link_data) return -ENOMEM; diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index a1ccc300e68ca8..3ae2a212a2e38a 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1174,9 +1174,9 @@ void graph_util_parse_link_direction(struct device_node *np, bool is_playback_only = of_property_read_bool(np, "playback-only"); bool is_capture_only = of_property_read_bool(np, "capture-only"); - if (is_playback_only) + if (playback_only) *playback_only = is_playback_only; - if (is_capture_only) + if (capture_only) *capture_only = is_capture_only; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction); diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h index 585543f872fccc..ec5502f9d5cb1d 100644 --- a/sound/soc/intel/avs/avs.h +++ b/sound/soc/intel/avs/avs.h @@ -72,6 +72,8 @@ extern const struct avs_dsp_ops avs_tgl_dsp_ops; #define AVS_PLATATTR_CLDMA BIT_ULL(0) #define AVS_PLATATTR_IMR BIT_ULL(1) +#define AVS_PLATATTR_ACE BIT_ULL(2) +#define AVS_PLATATTR_ALTHDA BIT_ULL(3) #define avs_platattr_test(adev, attr) \ ((adev)->spec->attributes & AVS_PLATATTR_##attr) @@ -79,7 +81,6 @@ extern const struct avs_dsp_ops avs_tgl_dsp_ops; struct avs_sram_spec { const u32 base_offset; const u32 window_size; - const u32 rom_status_offset; }; struct avs_hipc_spec { @@ -91,6 +92,7 @@ struct avs_hipc_spec { const u32 rsp_offset; const u32 rsp_busy_mask; const u32 ctl_offset; + const u32 sts_offset; }; /* Platform specific descriptor */ diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 8fbf33e30dfc3e..cbbc656fcc3f86 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -54,14 +54,17 @@ void avs_hda_power_gating_enable(struct avs_dev *adev, bool enable) { u32 value = enable ? 0 : pgctl_mask; - avs_hda_update_config_dword(&adev->base.core, AZX_PCIREG_PGCTL, pgctl_mask, value); + if (!avs_platattr_test(adev, ACE)) + avs_hda_update_config_dword(&adev->base.core, AZX_PCIREG_PGCTL, pgctl_mask, value); } static void avs_hdac_clock_gating_enable(struct hdac_bus *bus, bool enable) { + struct avs_dev *adev = hdac_to_avs(bus); u32 value = enable ? cgctl_mask : 0; - avs_hda_update_config_dword(bus, AZX_PCIREG_CGCTL, cgctl_mask, value); + if (!avs_platattr_test(adev, ACE)) + avs_hda_update_config_dword(bus, AZX_PCIREG_CGCTL, cgctl_mask, value); } void avs_hda_clock_gating_enable(struct avs_dev *adev, bool enable) @@ -71,6 +74,8 @@ void avs_hda_clock_gating_enable(struct avs_dev *adev, bool enable) void avs_hda_l1sen_enable(struct avs_dev *adev, bool enable) { + if (avs_platattr_test(adev, ACE)) + return; if (enable) { if (atomic_inc_and_test(&adev->l1sen_counter)) snd_hdac_chip_updatel(&adev->base.core, VS_EM2, AZX_VS_EM2_L1SEN, @@ -99,6 +104,7 @@ static int avs_hdac_bus_init_streams(struct hdac_bus *bus) static bool avs_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) { + struct avs_dev *adev = hdac_to_avs(bus); struct hdac_ext_link *hlink; bool ret; @@ -114,7 +120,8 @@ static bool avs_hdac_bus_init_chip(struct hdac_bus *bus, bool full_reset) /* Set DUM bit to address incorrect position reporting for capture * streams. In order to do so, CTRL needs to be out of reset state */ - snd_hdac_chip_updatel(bus, VS_EM2, AZX_VS_EM2_DUM, AZX_VS_EM2_DUM); + if (!avs_platattr_test(adev, ACE)) + snd_hdac_chip_updatel(bus, VS_EM2, AZX_VS_EM2_DUM, AZX_VS_EM2_DUM); return ret; } @@ -748,13 +755,11 @@ static const struct dev_pm_ops avs_dev_pm = { static const struct avs_sram_spec skl_sram_spec = { .base_offset = SKL_ADSP_SRAM_BASE_OFFSET, .window_size = SKL_ADSP_SRAM_WINDOW_SIZE, - .rom_status_offset = SKL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_sram_spec apl_sram_spec = { .base_offset = APL_ADSP_SRAM_BASE_OFFSET, .window_size = APL_ADSP_SRAM_WINDOW_SIZE, - .rom_status_offset = APL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_hipc_spec skl_hipc_spec = { @@ -766,6 +771,19 @@ static const struct avs_hipc_spec skl_hipc_spec = { .rsp_offset = SKL_ADSP_REG_HIPCT, .rsp_busy_mask = SKL_ADSP_HIPCT_BUSY, .ctl_offset = SKL_ADSP_REG_HIPCCTL, + .sts_offset = SKL_ADSP_SRAM_BASE_OFFSET, +}; + +static const struct avs_hipc_spec apl_hipc_spec = { + .req_offset = SKL_ADSP_REG_HIPCI, + .req_ext_offset = SKL_ADSP_REG_HIPCIE, + .req_busy_mask = SKL_ADSP_HIPCI_BUSY, + .ack_offset = SKL_ADSP_REG_HIPCIE, + .ack_done_mask = SKL_ADSP_HIPCIE_DONE, + .rsp_offset = SKL_ADSP_REG_HIPCT, + .rsp_busy_mask = SKL_ADSP_HIPCT_BUSY, + .ctl_offset = SKL_ADSP_REG_HIPCCTL, + .sts_offset = APL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_hipc_spec cnl_hipc_spec = { @@ -777,6 +795,7 @@ static const struct avs_hipc_spec cnl_hipc_spec = { .rsp_offset = CNL_ADSP_REG_HIPCTDR, .rsp_busy_mask = CNL_ADSP_HIPCTDR_BUSY, .ctl_offset = CNL_ADSP_REG_HIPCCTL, + .sts_offset = APL_ADSP_SRAM_BASE_OFFSET, }; static const struct avs_spec skl_desc = { @@ -796,7 +815,7 @@ static const struct avs_spec apl_desc = { .core_init_mask = 3, .attributes = AVS_PLATATTR_IMR, .sram = &apl_sram_spec, - .hipc = &skl_hipc_spec, + .hipc = &apl_hipc_spec, }; static const struct avs_spec cnl_desc = { @@ -902,13 +921,13 @@ MODULE_AUTHOR("Cezary Rojewski "); MODULE_AUTHOR("Amadeusz Slawinski "); MODULE_DESCRIPTION("Intel cAVS sound driver"); MODULE_LICENSE("GPL"); -MODULE_FIRMWARE("intel/skl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/apl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/cnl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/icl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/jsl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/lkf/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/tgl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/ehl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/adl/dsp_basefw.bin"); -MODULE_FIRMWARE("intel/adl_n/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/skl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/apl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/cnl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/icl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/jsl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/lkf/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/tgl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/ehl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/adl/dsp_basefw.bin"); +MODULE_FIRMWARE("intel/avs/adl_n/dsp_basefw.bin"); diff --git a/sound/soc/intel/avs/debugfs.c b/sound/soc/intel/avs/debugfs.c index 8c4edda97f757f..0e826ca20619ca 100644 --- a/sound/soc/intel/avs/debugfs.c +++ b/sound/soc/intel/avs/debugfs.c @@ -373,7 +373,10 @@ static ssize_t trace_control_write(struct file *file, const char __user *from, s return ret; num_elems = *array; - resource_mask = array[1]; + if (!num_elems) { + ret = -EINVAL; + goto free_array; + } /* * Disable if just resource mask is provided - no log priority flags. @@ -381,6 +384,7 @@ static ssize_t trace_control_write(struct file *file, const char __user *from, s * Enable input format: mask, prio1, .., prioN * Where 'N' equals number of bits set in the 'mask'. */ + resource_mask = array[1]; if (num_elems == 1) { ret = disable_logs(adev, resource_mask); } else { diff --git a/sound/soc/intel/avs/ipc.c b/sound/soc/intel/avs/ipc.c index 08ed9d96738a05..0314f9d4ea5f40 100644 --- a/sound/soc/intel/avs/ipc.c +++ b/sound/soc/intel/avs/ipc.c @@ -169,7 +169,9 @@ static void avs_dsp_exception_caught(struct avs_dev *adev, union avs_notify_msg dev_crit(adev->dev, "communication severed, rebooting dsp..\n"); - cancel_delayed_work_sync(&ipc->d0ix_work); + /* Avoid deadlock as the exception may be the response to SET_D0IX. */ + if (current_work() != &ipc->d0ix_work.work) + cancel_delayed_work_sync(&ipc->d0ix_work); ipc->in_d0ix = false; /* Re-enabled on recovery completion. */ pm_runtime_disable(adev->dev); diff --git a/sound/soc/intel/avs/loader.c b/sound/soc/intel/avs/loader.c index 0b29941feb0ef0..138e4e9de5e309 100644 --- a/sound/soc/intel/avs/loader.c +++ b/sound/soc/intel/avs/loader.c @@ -310,7 +310,7 @@ avs_hda_init_rom(struct avs_dev *adev, unsigned int dma_id, bool purge) } /* await ROM init */ - ret = snd_hdac_adsp_readl_poll(adev, spec->sram->rom_status_offset, reg, + ret = snd_hdac_adsp_readl_poll(adev, spec->hipc->sts_offset, reg, (reg & 0xF) == AVS_ROM_INIT_DONE || (reg & 0xF) == APL_ROM_FW_ENTERED, AVS_ROM_INIT_POLLING_US, APL_ROM_INIT_TIMEOUT_US); @@ -683,6 +683,7 @@ int avs_dsp_boot_firmware(struct avs_dev *adev, bool purge) static int avs_dsp_alloc_resources(struct avs_dev *adev) { + struct hdac_ext_link *link; int ret, i; ret = avs_ipc_get_hw_config(adev, &adev->hw_cfg); @@ -693,6 +694,14 @@ static int avs_dsp_alloc_resources(struct avs_dev *adev) if (ret) return AVS_IPC_RET(ret); + /* If hw allows, read capabilities directly from it. */ + if (avs_platattr_test(adev, ALTHDA)) { + link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, + AZX_REG_ML_LEPTR_ID_INTEL_SSP); + if (link) + adev->hw_cfg.i2s_caps.ctrl_count = link->slcount; + } + adev->core_refs = devm_kcalloc(adev->dev, adev->hw_cfg.dsp_cores, sizeof(*adev->core_refs), GFP_KERNEL); adev->lib_names = devm_kcalloc(adev->dev, adev->fw_cfg.max_libs_count, diff --git a/sound/soc/intel/avs/path.c b/sound/soc/intel/avs/path.c index ef0c1d125d66b8..43b3d995391072 100644 --- a/sound/soc/intel/avs/path.c +++ b/sound/soc/intel/avs/path.c @@ -115,6 +115,80 @@ avs_path_find_variant(struct avs_dev *adev, return NULL; } +static struct acpi_nhlt_config * +avs_nhlt_config_or_default(struct avs_dev *adev, struct avs_tplg_module *t); + +int avs_path_set_constraint(struct avs_dev *adev, struct avs_tplg_path_template *template, + struct snd_pcm_hw_constraint_list *rate_list, + struct snd_pcm_hw_constraint_list *channels_list, + struct snd_pcm_hw_constraint_list *sample_bits_list) +{ + struct avs_tplg_path *path_template; + unsigned int *rlist, *clist, *slist; + size_t i; + + i = 0; + list_for_each_entry(path_template, &template->path_list, node) + i++; + + rlist = kcalloc(i, sizeof(*rlist), GFP_KERNEL); + clist = kcalloc(i, sizeof(*clist), GFP_KERNEL); + slist = kcalloc(i, sizeof(*slist), GFP_KERNEL); + if (!rlist || !clist || !slist) + return -ENOMEM; + + i = 0; + list_for_each_entry(path_template, &template->path_list, node) { + struct avs_tplg_pipeline *pipeline_template; + + list_for_each_entry(pipeline_template, &path_template->ppl_list, node) { + struct avs_tplg_module *module_template; + + list_for_each_entry(module_template, &pipeline_template->mod_list, node) { + const guid_t *type = &module_template->cfg_ext->type; + struct acpi_nhlt_config *blob; + + if (!guid_equal(type, &AVS_COPIER_MOD_UUID) && + !guid_equal(type, &AVS_WOVHOSTM_MOD_UUID)) + continue; + + switch (module_template->cfg_ext->copier.dma_type) { + case AVS_DMA_DMIC_LINK_INPUT: + case AVS_DMA_I2S_LINK_OUTPUT: + case AVS_DMA_I2S_LINK_INPUT: + break; + default: + continue; + } + + blob = avs_nhlt_config_or_default(adev, module_template); + if (IS_ERR(blob)) + continue; + + rlist[i] = path_template->fe_fmt->sampling_freq; + clist[i] = path_template->fe_fmt->num_channels; + slist[i] = path_template->fe_fmt->bit_depth; + i++; + } + } + } + + if (i) { + rate_list->count = i; + rate_list->list = rlist; + channels_list->count = i; + channels_list->list = clist; + sample_bits_list->count = i; + sample_bits_list->list = slist; + } else { + kfree(rlist); + kfree(clist); + kfree(slist); + } + + return i; +} + static void avs_init_node_id(union avs_connector_node_id *node_id, struct avs_tplg_modcfg_ext *te, u32 dma_id) { diff --git a/sound/soc/intel/avs/path.h b/sound/soc/intel/avs/path.h index 7ed7e94e0a566b..c65ed84aa85305 100644 --- a/sound/soc/intel/avs/path.h +++ b/sound/soc/intel/avs/path.h @@ -69,6 +69,11 @@ int avs_path_reset(struct avs_path *path); int avs_path_pause(struct avs_path *path); int avs_path_run(struct avs_path *path, int trigger); +int avs_path_set_constraint(struct avs_dev *adev, struct avs_tplg_path_template *template, + struct snd_pcm_hw_constraint_list *rate_list, + struct snd_pcm_hw_constraint_list *channels_list, + struct snd_pcm_hw_constraint_list *sample_bits_list); + int avs_peakvol_set_volume(struct avs_dev *adev, struct avs_path_module *mod, struct soc_mixer_control *mc, long *input); int avs_peakvol_set_mute(struct avs_dev *adev, struct avs_path_module *mod, diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index dac463390da135..5a2330e4e4225d 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -31,7 +31,12 @@ struct avs_dma_data { struct hdac_ext_stream *host_stream; }; + struct snd_pcm_hw_constraint_list rate_list; + struct snd_pcm_hw_constraint_list channels_list; + struct snd_pcm_hw_constraint_list sample_bits_list; + struct work_struct period_elapsed_work; + struct hdac_ext_link *link; struct snd_pcm_substream *substream; }; @@ -74,6 +79,42 @@ void avs_period_elapsed(struct snd_pcm_substream *substream) schedule_work(&data->period_elapsed_work); } +static int hw_rule_param_size(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule); +static int avs_hw_constraints_init(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_hw_constraint_list *r, *c, *s; + struct avs_dma_data *data; + int ret; + + ret = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); + if (ret < 0) + return ret; + + data = snd_soc_dai_get_dma_data(dai, substream); + r = &(data->rate_list); + c = &(data->channels_list); + s = &(data->sample_bits_list); + + ret = avs_path_set_constraint(data->adev, data->template, r, c, s); + if (ret <= 0) + return ret; + + ret = snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, r); + if (ret < 0) + return ret; + + ret = snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, c); + if (ret < 0) + return ret; + + ret = snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, s); + if (ret < 0) + return ret; + + return 0; +} + static int avs_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); @@ -101,7 +142,7 @@ static int avs_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_d if (rtd->dai_link->ignore_suspend) adev->num_lp_paths++; - return 0; + return avs_hw_constraints_init(substream, dai); } static void avs_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) @@ -114,6 +155,10 @@ static void avs_dai_shutdown(struct snd_pcm_substream *substream, struct snd_soc if (rtd->dai_link->ignore_suspend) data->adev->num_lp_paths--; + kfree(data->rate_list.list); + kfree(data->channels_list.list); + kfree(data->sample_bits_list.list); + snd_soc_dai_set_dma_data(dai, substream, NULL); kfree(data); } @@ -278,32 +323,75 @@ static const struct snd_soc_dai_ops avs_dai_nonhda_be_ops = { .trigger = avs_dai_nonhda_be_trigger, }; -static int avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +static int __avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai, + struct hdac_ext_link *link) { - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct hdac_ext_stream *link_stream; struct avs_dma_data *data; - struct hda_codec *codec; int ret; ret = avs_dai_startup(substream, dai); if (ret) return ret; - codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); - link_stream = snd_hdac_ext_stream_assign(&codec->bus->core, substream, + data = snd_soc_dai_get_dma_data(dai, substream); + link_stream = snd_hdac_ext_stream_assign(&data->adev->base.core, substream, HDAC_EXT_STREAM_TYPE_LINK); if (!link_stream) { avs_dai_shutdown(substream, dai); return -EBUSY; } - data = snd_soc_dai_get_dma_data(dai, substream); data->link_stream = link_stream; - substream->runtime->private_data = link_stream; + data->link = link; return 0; } +static int avs_dai_hda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct hdac_ext_link *link; + struct avs_dma_data *data; + struct hda_codec *codec; + int ret; + + codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); + + link = snd_hdac_ext_bus_get_hlink_by_addr(&codec->bus->core, codec->core.addr); + if (!link) + return -EINVAL; + + ret = __avs_dai_hda_be_startup(substream, dai, link); + if (!ret) { + data = snd_soc_dai_get_dma_data(dai, substream); + substream->runtime->private_data = data->link_stream; + } + + return ret; +} + +static int avs_dai_i2shda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct avs_dev *adev = to_avs_dev(dai->component->dev); + struct hdac_ext_link *link; + + link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, AZX_REG_ML_LEPTR_ID_INTEL_SSP); + if (!link) + return -EINVAL; + return __avs_dai_hda_be_startup(substream, dai, link); +} + +static int avs_dai_dmichda_be_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct avs_dev *adev = to_avs_dev(dai->component->dev); + struct hdac_ext_link *link; + + link = snd_hdac_ext_bus_get_hlink_by_id(&adev->base.core, AZX_REG_ML_LEPTR_ID_INTEL_DMIC); + if (!link) + return -EINVAL; + return __avs_dai_hda_be_startup(substream, dai, link); +} + static void avs_dai_hda_be_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct avs_dma_data *data = snd_soc_dai_get_dma_data(dai, substream); @@ -313,6 +401,14 @@ static void avs_dai_hda_be_shutdown(struct snd_pcm_substream *substream, struct avs_dai_shutdown(substream, dai); } +static void avs_dai_althda_be_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct avs_dma_data *data = snd_soc_dai_get_dma_data(dai, substream); + + snd_hdac_ext_stream_release(data->link_stream, HDAC_EXT_STREAM_TYPE_LINK); + avs_dai_shutdown(substream, dai); +} + static int avs_dai_hda_be_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params, struct snd_soc_dai *dai) { @@ -328,13 +424,8 @@ static int avs_dai_hda_be_hw_params(struct snd_pcm_substream *substream, static int avs_dai_hda_be_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct avs_dma_data *data; - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct hdac_ext_stream *link_stream; - struct hdac_ext_link *link; - struct hda_codec *codec; - - dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name); + struct avs_dma_data *data; data = snd_soc_dai_get_dma_data(dai, substream); if (!data->path) @@ -346,54 +437,43 @@ static int avs_dai_hda_be_hw_free(struct snd_pcm_substream *substream, struct sn data->path = NULL; /* clear link <-> stream mapping */ - codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); - link = snd_hdac_ext_bus_get_hlink_by_addr(&codec->bus->core, codec->core.addr); - if (!link) - return -EINVAL; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - snd_hdac_ext_bus_link_clear_stream_id(link, hdac_stream(link_stream)->stream_tag); + snd_hdac_ext_bus_link_clear_stream_id(data->link, + hdac_stream(link_stream)->stream_tag); return 0; } static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); - struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_soc_pcm_runtime *be = snd_soc_substream_to_rtd(substream); const struct snd_soc_pcm_stream *stream_info; struct hdac_ext_stream *link_stream; - struct hdac_ext_link *link; + const struct snd_pcm_hw_params *p; struct avs_dma_data *data; - struct hda_codec *codec; - struct hdac_bus *bus; unsigned int format_val; unsigned int bits; int ret; data = snd_soc_dai_get_dma_data(dai, substream); link_stream = data->link_stream; + p = &be->dpcm[substream->stream].hw_params; if (link_stream->link_prepared) return 0; - codec = dev_to_hda_codec(snd_soc_rtd_to_codec(rtd, 0)->dev); - bus = &codec->bus->core; stream_info = snd_soc_dai_get_pcm_stream(dai, substream->stream); - bits = snd_hdac_stream_format_bits(runtime->format, runtime->subformat, + bits = snd_hdac_stream_format_bits(params_format(p), params_subformat(p), stream_info->sig_bits); - format_val = snd_hdac_stream_format(runtime->channels, bits, runtime->rate); + format_val = snd_hdac_stream_format(params_channels(p), bits, params_rate(p)); - snd_hdac_ext_stream_decouple(bus, link_stream, true); + snd_hdac_ext_stream_decouple(&data->adev->base.core, link_stream, true); snd_hdac_ext_stream_reset(link_stream); snd_hdac_ext_stream_setup(link_stream, format_val); - link = snd_hdac_ext_bus_get_hlink_by_addr(bus, codec->core.addr); - if (!link) - return -EINVAL; - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - snd_hdac_ext_bus_link_set_stream_id(link, hdac_stream(link_stream)->stream_tag); + snd_hdac_ext_bus_link_set_stream_id(data->link, + hdac_stream(link_stream)->stream_tag); ret = avs_dai_prepare(substream, dai); if (ret) @@ -468,6 +548,26 @@ static const struct snd_soc_dai_ops avs_dai_hda_be_ops = { .trigger = avs_dai_hda_be_trigger, }; +__maybe_unused +static const struct snd_soc_dai_ops avs_dai_i2shda_be_ops = { + .startup = avs_dai_i2shda_be_startup, + .shutdown = avs_dai_althda_be_shutdown, + .hw_params = avs_dai_hda_be_hw_params, + .hw_free = avs_dai_hda_be_hw_free, + .prepare = avs_dai_hda_be_prepare, + .trigger = avs_dai_hda_be_trigger, +}; + +__maybe_unused +static const struct snd_soc_dai_ops avs_dai_dmichda_be_ops = { + .startup = avs_dai_dmichda_be_startup, + .shutdown = avs_dai_althda_be_shutdown, + .hw_params = avs_dai_hda_be_hw_params, + .hw_free = avs_dai_hda_be_hw_free, + .prepare = avs_dai_hda_be_prepare, + .trigger = avs_dai_hda_be_trigger, +}; + static int hw_rule_param_size(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct snd_interval *interval = hw_param_interval(params, rule->var); @@ -927,7 +1027,8 @@ static int avs_component_probe(struct snd_soc_component *component) else mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL, "hda-generic-tplg.bin"); - + if (!mach->tplg_filename) + return -ENOMEM; filename = kasprintf(GFP_KERNEL, "%s/%s", component->driver->topology_name_prefix, mach->tplg_filename); if (!filename) diff --git a/sound/soc/intel/avs/registers.h b/sound/soc/intel/avs/registers.h index 368ede05f2cdaa..4db0cdf68ffc7a 100644 --- a/sound/soc/intel/avs/registers.h +++ b/sound/soc/intel/avs/registers.h @@ -74,7 +74,7 @@ #define APL_ADSP_SRAM_WINDOW_SIZE 0x20000 /* Constants used when accessing SRAM, space shared with firmware */ -#define AVS_FW_REG_BASE(adev) ((adev)->spec->sram->base_offset) +#define AVS_FW_REG_BASE(adev) ((adev)->spec->hipc->sts_offset) #define AVS_FW_REG_STATUS(adev) (AVS_FW_REG_BASE(adev) + 0x0) #define AVS_FW_REG_ERROR(adev) (AVS_FW_REG_BASE(adev) + 0x4) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 6446cda0f85727..0f3b8f44e70112 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -576,6 +576,19 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, + { /* Acer Aspire SW3-013 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW3-013"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 90dafa810b2ec0..095d08b3fc8249 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -764,6 +764,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), + SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC), {} }; diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 5993819cc58a26..008a20a2acbda7 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c @@ -156,7 +156,7 @@ static void catpt_dsp_set_srampge(struct catpt_dev *cdev, struct resource *sram, { unsigned long old; u32 off = sram->start; - u32 b = __ffs(mask); + unsigned long b = __ffs(mask); old = catpt_readl_pci(cdev, VDRTCTL0) & mask; dev_dbg(cdev->dev, "SRAMPGE [0x%08lx] 0x%08lx -> 0x%08lx", diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 6603d8de501ca5..c599eb43eeb110 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -431,7 +431,8 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_l3[] = { .mask = BIT(3), .num_adr = ARRAY_SIZE(cs42l43_3_adr), .adr_d = cs42l43_3_adr, - } + }, + {} }; static const struct snd_soc_acpi_link_adr ptl_rt722_only[] = { diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index 3033e2d3fe1684..90e367586493d7 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -228,6 +228,7 @@ config SND_SOC_MT8188 config SND_SOC_MT8188_MT6359 tristate "ASoC Audio driver for MT8188 with MT6359 and I2S codecs" depends on SND_SOC_MT8188 && MTK_PMIC_WRAP + depends on SND_SOC_MT6359_ACCDET || !SND_SOC_MT6359_ACCDET depends on I2C select SND_SOC_MT6359 select SND_SOC_HDMI_CODEC diff --git a/sound/soc/mediatek/mt8195/mt8195-mt6359.c b/sound/soc/mediatek/mt8195/mt8195-mt6359.c index df29a9fa5aee5b..1fa664b56f30fa 100644 --- a/sound/soc/mediatek/mt8195/mt8195-mt6359.c +++ b/sound/soc/mediatek/mt8195/mt8195-mt6359.c @@ -822,12 +822,12 @@ SND_SOC_DAILINK_DEFS(ETDM1_IN_BE, SND_SOC_DAILINK_DEFS(ETDM2_IN_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM2_IN")), - DAILINK_COMP_ARRAY(COMP_EMPTY()), + DAILINK_COMP_ARRAY(COMP_DUMMY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM1_OUT_BE, DAILINK_COMP_ARRAY(COMP_CPU("ETDM1_OUT")), - DAILINK_COMP_ARRAY(COMP_EMPTY()), + DAILINK_COMP_ARRAY(COMP_DUMMY()), DAILINK_COMP_ARRAY(COMP_EMPTY())); SND_SOC_DAILINK_DEFS(ETDM2_OUT_BE, diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h index 27a2bf9a661393..de3ec6f594c11c 100644 --- a/sound/soc/qcom/lpass.h +++ b/sound/soc/qcom/lpass.h @@ -13,10 +13,11 @@ #include #include #include +#include #include "lpass-hdmi.h" #define LPASS_AHBIX_CLOCK_FREQUENCY 131072000 -#define LPASS_MAX_PORTS (LPASS_CDC_DMA_VA_TX8 + 1) +#define LPASS_MAX_PORTS (DISPLAY_PORT_RX_7 + 1) #define LPASS_MAX_MI2S_PORTS (8) #define LPASS_MAX_DMA_CHANNELS (8) #define LPASS_MAX_HDMI_DMA_CHANNELS (4) diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c index 3a0af4ca7ab6c7..0f7458a4390198 100644 --- a/sound/soc/renesas/rz-ssi.c +++ b/sound/soc/renesas/rz-ssi.c @@ -1244,7 +1244,7 @@ static int rz_ssi_runtime_resume(struct device *dev) static const struct dev_pm_ops rz_ssi_pm_ops = { RUNTIME_PM_OPS(rz_ssi_runtime_suspend, rz_ssi_runtime_resume, NULL) - SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) }; static struct platform_driver rz_ssi_driver = { diff --git a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c index 246e5c2e0af55f..c7e55f4433514c 100644 --- a/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c +++ b/sound/soc/sdw_utils/soc_sdw_bridge_cs35l56.c @@ -60,6 +60,10 @@ static int asoc_sdw_bridge_cs35l56_asp_init(struct snd_soc_pcm_runtime *rtd) /* 4 x 16-bit sample slots and FSYNC=48000, BCLK=3.072 MHz */ for_each_rtd_codec_dais(rtd, i, codec_dai) { + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dai_set_tdm_slot(codec_dai, tx_mask, rx_mask, 4, 16); if (ret < 0) return ret; diff --git a/sound/soc/sdw_utils/soc_sdw_cs42l43.c b/sound/soc/sdw_utils/soc_sdw_cs42l43.c index 668c9d28a1c12d..b415d45d520d0c 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs42l43.c +++ b/sound/soc/sdw_utils/soc_sdw_cs42l43.c @@ -20,6 +20,8 @@ #include #include +#define CS42L43_SPK_VOLUME_0DB 128 /* 0dB Max */ + static const struct snd_soc_dapm_route cs42l43_hs_map[] = { { "Headphone", NULL, "cs42l43 AMP3_OUT" }, { "Headphone", NULL, "cs42l43 AMP4_OUT" }, @@ -117,6 +119,14 @@ int asoc_sdw_cs42l43_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_so return -ENOMEM; } + ret = snd_soc_limit_volume(card, "cs42l43 Speaker Digital Volume", + CS42L43_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "cs42l43 speaker volume limit failed: %d\n", ret); + else + dev_info(card->dev, "Setting CS42L43 Speaker volume limit to %d\n", + CS42L43_SPK_VOLUME_0DB); + ret = snd_soc_dapm_add_routes(&card->dapm, cs42l43_spk_map, ARRAY_SIZE(cs42l43_spk_map)); if (ret) diff --git a/sound/soc/sdw_utils/soc_sdw_cs_amp.c b/sound/soc/sdw_utils/soc_sdw_cs_amp.c index 4b6181cf29716f..35b550bcd4ded5 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs_amp.c +++ b/sound/soc/sdw_utils/soc_sdw_cs_amp.c @@ -16,6 +16,25 @@ #define CODEC_NAME_SIZE 8 #define CS_AMP_CHANNELS_PER_AMP 4 +#define CS35L56_SPK_VOLUME_0DB 400 /* 0dB Max */ + +int asoc_sdw_cs35l56_volume_limit(struct snd_soc_card *card, const char *name_prefix) +{ + char *volume_ctl_name; + int ret; + + volume_ctl_name = kasprintf(GFP_KERNEL, "%s Speaker Volume", name_prefix); + if (!volume_ctl_name) + return -ENOMEM; + + ret = snd_soc_limit_volume(card, volume_ctl_name, CS35L56_SPK_VOLUME_0DB); + if (ret) + dev_err(card->dev, "%s limit set failed: %d\n", volume_ctl_name, ret); + + kfree(volume_ctl_name); + return ret; +} +EXPORT_SYMBOL_NS(asoc_sdw_cs35l56_volume_limit, "SND_SOC_SDW_UTILS"); int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) { @@ -40,6 +59,11 @@ int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai snprintf(widget_name, sizeof(widget_name), "%s SPK", codec_dai->component->name_prefix); + + ret = asoc_sdw_cs35l56_volume_limit(card, codec_dai->component->name_prefix); + if (ret) + return ret; + ret = snd_soc_dapm_add_routes(&card->dapm, &route, 1); if (ret) return ret; diff --git a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c index 46d917a99c51da..97be110a59b63a 100644 --- a/sound/soc/sdw_utils/soc_sdw_rt_dmic.c +++ b/sound/soc/sdw_utils/soc_sdw_rt_dmic.c @@ -29,6 +29,8 @@ int asoc_sdw_rt_dmic_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_da mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "rt715-sdca"); else mic_name = devm_kasprintf(card->dev, GFP_KERNEL, "%s", component->name_prefix); + if (!mic_name) + return -ENOMEM; card->components = devm_kasprintf(card->dev, GFP_KERNEL, "%s mic:%s", card->components, diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 4308a6cbb2e615..43835197d1feea 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1584,10 +1584,13 @@ int dpcm_add_paths(struct snd_soc_pcm_runtime *fe, int stream, /* * Filter for systems with 'component_chaining' enabled. * This helps to avoid unnecessary re-configuration of an - * already active BE on such systems. + * already active BE on such systems and ensures the BE DAI + * widget is powered ON after hw_params() BE DAI callback. */ if (fe->card->component_chaining && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_NEW) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN) && + (be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state != SND_SOC_DPCM_STATE_CLOSE)) continue; diff --git a/sound/soc/sof/amd/pci-acp70.c b/sound/soc/sof/amd/pci-acp70.c index 8fa1170a2161e9..9108f1139ff2dc 100644 --- a/sound/soc/sof/amd/pci-acp70.c +++ b/sound/soc/sof/amd/pci-acp70.c @@ -33,6 +33,7 @@ static const struct sof_amd_acp_desc acp70_chip_info = { .ext_intr_cntl = ACP70_EXTERNAL_INTR_CNTL, .ext_intr_stat = ACP70_EXT_INTR_STAT, .ext_intr_stat1 = ACP70_EXT_INTR_STAT1, + .acp_error_stat = ACP70_ERROR_STATUS, .dsp_intr_base = ACP70_DSP_SW_INTR_BASE, .acp_sw0_i2s_err_reason = ACP7X_SW0_I2S_ERROR_REASON, .sram_pte_offset = ACP70_SRAM_PTE_OFFSET, diff --git a/sound/soc/sof/intel/hda-bus.c b/sound/soc/sof/intel/hda-bus.c index b1be03011d7e15..6492e1cefbfb60 100644 --- a/sound/soc/sof/intel/hda-bus.c +++ b/sound/soc/sof/intel/hda-bus.c @@ -76,7 +76,7 @@ void sof_hda_bus_init(struct snd_sof_dev *sdev, struct device *dev) snd_hdac_ext_bus_init(bus, dev, &bus_core_ops, sof_hda_ext_ops); - if (chip && chip->hw_ip_version == SOF_INTEL_ACE_2_0) + if (chip && chip->hw_ip_version >= SOF_INTEL_ACE_2_0) bus->use_pio_for_commands = true; #else snd_hdac_ext_bus_init(bus, dev, NULL, NULL); diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index b34e5fdf10f16f..6a3932d90b43a9 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1049,7 +1049,21 @@ static void hda_generic_machine_select(struct snd_sof_dev *sdev, if (!*mach && codec_num <= 2) { bool tplg_fixup = false; - hda_mach = snd_soc_acpi_intel_hda_machines; + /* + * make a local copy of the match array since we might + * be modifying it + */ + hda_mach = devm_kmemdup_array(sdev->dev, + snd_soc_acpi_intel_hda_machines, + 2, /* we have one entry + sentinel in the array */ + sizeof(snd_soc_acpi_intel_hda_machines[0]), + GFP_KERNEL); + if (!hda_mach) { + dev_err(bus->dev, + "%s: failed to duplicate the HDA match table\n", + __func__); + return; + } dev_info(bus->dev, "using HDA machine driver %s now\n", hda_mach->drv_name); diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c index 576f407cd456af..976a4794d61000 100644 --- a/sound/soc/sof/ipc4-control.c +++ b/sound/soc/sof/ipc4-control.c @@ -531,6 +531,14 @@ static int sof_ipc4_bytes_ext_put(struct snd_sof_control *scontrol, return -EINVAL; } + /* Check header id */ + if (header.numid != SOF_CTRL_CMD_BINARY) { + dev_err_ratelimited(scomp->dev, + "Incorrect numid for bytes put %d\n", + header.numid); + return -EINVAL; + } + /* Verify the ABI header first */ if (copy_from_user(&abi_hdr, tlvd->tlv, sizeof(abi_hdr))) return -EFAULT; @@ -613,7 +621,8 @@ static int _sof_ipc4_bytes_ext_get(struct snd_sof_control *scontrol, if (data_size > size) return -ENOSPC; - header.numid = scontrol->comp_id; + /* Set header id and length */ + header.numid = SOF_CTRL_CMD_BINARY; header.length = data_size; if (copy_to_user(tlvd, &header, sizeof(struct snd_ctl_tlv))) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 1a2841899ff5a9..8eee3e1aadf932 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -784,7 +784,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm /* allocate memory for max number of pipeline IDs */ pipeline_list->pipelines = kcalloc(ipc4_data->max_num_pipelines, - sizeof(struct snd_sof_widget *), GFP_KERNEL); + sizeof(*pipeline_list->pipelines), + GFP_KERNEL); if (!pipeline_list->pipelines) { sof_ipc4_pcm_free(sdev, spcm); return -ENOMEM; @@ -798,7 +799,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm spcm->stream[stream].private = stream_priv; - if (!support_info) + /* Delay reporting is only supported on playback */ + if (!support_info || stream == SNDRV_PCM_STREAM_CAPTURE) continue; time_info = kzalloc(sizeof(*time_info), GFP_KERNEL); diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index dc9cb832406783..14aa8ecc4bc426 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -1063,7 +1063,7 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp, struct snd_sof_dai *dai) { struct snd_soc_card *card = scomp->card; - struct snd_soc_pcm_runtime *rtd; + struct snd_soc_pcm_runtime *rtd, *full, *partial; struct snd_soc_dai *cpu_dai; int stream; int i; @@ -1080,12 +1080,22 @@ static int sof_connect_dai_widget(struct snd_soc_component *scomp, else goto end; + full = NULL; + partial = NULL; list_for_each_entry(rtd, &card->rtd_list, list) { /* does stream match DAI link ? */ - if (!rtd->dai_link->stream_name || - !strstr(rtd->dai_link->stream_name, w->sname)) - continue; + if (rtd->dai_link->stream_name) { + if (!strcmp(rtd->dai_link->stream_name, w->sname)) { + full = rtd; + break; + } else if (strstr(rtd->dai_link->stream_name, w->sname)) { + partial = rtd; + } + } + } + rtd = full ? full : partial; + if (rtd) { for_each_rtd_cpu_dais(rtd, i, cpu_dai) { /* * Please create DAI widget in the right order diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index e8c1abf1ae0a70..bf5299ba11c3c9 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -409,11 +409,11 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, unsigned int rate) { struct platform_device *pdev = sai->pdev; - unsigned int sai_ck_rate, sai_ck_max_rate, sai_curr_rate, sai_new_rate; + unsigned int sai_ck_rate, sai_ck_max_rate, sai_ck_min_rate, sai_curr_rate, sai_new_rate; int div, ret; /* - * Set maximum expected kernel clock frequency + * Set minimum and maximum expected kernel clock frequency * - mclk on or spdif: * f_sai_ck = MCKDIV * mclk-fs * fs * Here typical 256 ratio is assumed for mclk-fs @@ -423,13 +423,16 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * Set constraint MCKDIV * FRL <= 256, to ensure MCKDIV is in available range * f_sai_ck = sai_ck_max_rate * pow_of_two(FRL) / 256 */ + sai_ck_min_rate = rate * 256; if (!(rate % SAI_RATE_11K)) sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_11K * 256; else sai_ck_max_rate = SAI_MAX_SAMPLE_RATE_8K * 256; - if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) + if (!sai->sai_mclk && !STM_SAI_PROTOCOL_IS_SPDIF(sai)) { + sai_ck_min_rate = rate * sai->fs_length; sai_ck_max_rate /= DIV_ROUND_CLOSEST(256, roundup_pow_of_two(sai->fs_length)); + } /* * Request exclusivity, as the clock is shared by SAI sub-blocks and by @@ -444,7 +447,10 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, * return immediately. */ sai_curr_rate = clk_get_rate(sai->sai_ck); - if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate)) + dev_dbg(&pdev->dev, "kernel clock rate: min [%u], max [%u], current [%u]", + sai_ck_min_rate, sai_ck_max_rate, sai_curr_rate); + if (stm32_sai_rate_accurate(sai_ck_max_rate, sai_curr_rate) && + sai_curr_rate >= sai_ck_min_rate) return 0; /* @@ -472,7 +478,7 @@ static int stm32_sai_set_parent_rate(struct stm32_sai_sub_data *sai, /* Try a lower frequency */ div++; sai_ck_rate = sai_ck_max_rate / div; - } while (sai_ck_rate > rate); + } while (sai_ck_rate >= sai_ck_min_rate); /* No accurate rate found */ dev_err(&pdev->dev, "Failed to find an accurate rate"); diff --git a/sound/soc/ti/omap-hdmi.c b/sound/soc/ti/omap-hdmi.c index cf43ac19c4a6d0..55e7cb96858fca 100644 --- a/sound/soc/ti/omap-hdmi.c +++ b/sound/soc/ti/omap-hdmi.c @@ -361,17 +361,20 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev) if (!card->dai_link) return -ENOMEM; - compnent = devm_kzalloc(dev, sizeof(*compnent), GFP_KERNEL); + compnent = devm_kzalloc(dev, 2 * sizeof(*compnent), GFP_KERNEL); if (!compnent) return -ENOMEM; - card->dai_link->cpus = compnent; + card->dai_link->cpus = &compnent[0]; card->dai_link->num_cpus = 1; card->dai_link->codecs = &snd_soc_dummy_dlc; card->dai_link->num_codecs = 1; + card->dai_link->platforms = &compnent[1]; + card->dai_link->num_platforms = 1; card->dai_link->name = card->name; card->dai_link->stream_name = card->name; card->dai_link->cpus->dai_name = dev_name(ad->dssdev); + card->dai_link->platforms->name = dev_name(ad->dssdev); card->num_links = 1; card->dev = dev; diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index a29f28eb7d0c64..f36ec98da4601d 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -926,6 +926,8 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, { int altset = set ? ep->altsetting : 0; int err; + int retries = 0; + const int max_retries = 5; if (ep->iface_ref->altset == altset) return 0; @@ -935,8 +937,13 @@ static int endpoint_set_interface(struct snd_usb_audio *chip, usb_audio_dbg(chip, "Setting usb interface %d:%d for EP 0x%x\n", ep->iface, altset, ep->ep_num); +retry: err = usb_set_interface(chip->dev, ep->iface, altset); if (err < 0) { + if (err == -EPROTO && ++retries <= max_retries) { + msleep(5 * (1 << (retries - 1))); + goto retry; + } usb_audio_err_ratelimited( chip, "%d:%d: usb_set_interface failed (%d)\n", ep->iface, altset, err); diff --git a/sound/usb/format.c b/sound/usb/format.c index 9d32b21a5fbb02..0ba4641a0eb11d 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -260,7 +260,8 @@ static int parse_audio_format_rates_v1(struct snd_usb_audio *chip, struct audiof } /* Jabra Evolve 65 headset */ - if (chip->usb_id == USB_ID(0x0b0e, 0x030b)) { + if (chip->usb_id == USB_ID(0x0b0e, 0x030b) || + chip->usb_id == USB_ID(0x0b0e, 0x030c)) { /* only 48kHz for playback while keeping 16kHz for capture */ if (fp->nr_rates != 1) return set_fixed_rate(fp, 48000, SNDRV_PCM_RATE_48000); diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 4727043fd74580..77f06da93151e8 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -57,6 +57,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x31e9, 0x0002, 0x81, 2), /* Solid State Logic SSL2+ */ IMPLICIT_FB_FIXED_DEV(0x0499, 0x172f, 0x81, 2), /* Steinberg UR22C */ IMPLICIT_FB_FIXED_DEV(0x0d9a, 0x00df, 0x81, 2), /* RTX6001 */ + IMPLICIT_FB_FIXED_DEV(0x19f7, 0x000a, 0x84, 3), /* RODE AI-1 */ IMPLICIT_FB_FIXED_DEV(0x22f0, 0x0006, 0x81, 3), /* Allen&Heath Qu-16 */ IMPLICIT_FB_FIXED_DEV(0x1686, 0xf029, 0x82, 2), /* Zoom UAC-2 */ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8003, 0x86, 2), /* Fractal Audio Axe-Fx II */ diff --git a/sound/usb/midi.c b/sound/usb/midi.c index dcdd7e9e1ae974..c3de2b13743500 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1530,6 +1530,7 @@ static void snd_usbmidi_free(struct snd_usb_midi *umidi) snd_usbmidi_in_endpoint_delete(ep->in); } mutex_destroy(&umidi->mutex); + timer_shutdown_sync(&umidi->error_timer); kfree(umidi); } @@ -1553,7 +1554,7 @@ void snd_usbmidi_disconnect(struct list_head *p) spin_unlock_irq(&umidi->disc_lock); up_write(&umidi->disc_rwsem); - timer_delete_sync(&umidi->error_timer); + timer_shutdown_sync(&umidi->error_timer); for (i = 0; i < MIDI_MAX_ENDPOINTS; ++i) { struct snd_usb_midi_endpoint *ep = &umidi->endpoints[i]; @@ -1885,10 +1886,18 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, } port_info = find_port_info(umidi, number); - name_format = port_info ? port_info->name : - (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); - snprintf(substream->name, sizeof(substream->name), - name_format, umidi->card->shortname, jack_name, number + 1); + if (port_info || jack_name == default_jack_name || + strncmp(umidi->card->shortname, jack_name, strlen(umidi->card->shortname)) != 0) { + name_format = port_info ? port_info->name : + (jack_name != default_jack_name ? "%s %s" : "%s %s %d"); + snprintf(substream->name, sizeof(substream->name), + name_format, umidi->card->shortname, jack_name, number + 1); + } else { + /* The manufacturer included the iProduct name in the jack + * name, do not use both + */ + strscpy(substream->name, jack_name); + } *rsubstream = substream; } diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 9112313a9dbc00..dbbc9eb935a4b3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2242,6 +2242,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x0c45, 0x636b, /* Microdia JP001 USB Camera */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x0d8c, 0x0014, /* USB Audio Device */ QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ @@ -2250,6 +2252,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1101, 0x0003, /* Audioengine D1 */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_MIC_RES_16), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 6d44f8c8a18fd9..af9d9acaf9975a 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -108,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -418,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 9306726337fe00..df36f23876e863 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -1,24 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_MEMFD_SECRET - -#include +#include diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 9e3fa7942e7d3f..6c2c152d8a67b9 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -75,8 +75,8 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +/* Free ( 3*32+ 6) */ +/* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ #define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ @@ -329,6 +329,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ @@ -377,6 +378,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ @@ -434,15 +436,18 @@ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ @@ -455,6 +460,11 @@ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -470,6 +480,7 @@ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -521,4 +532,5 @@ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index dc1c1057f26e45..e6134ef2263d50 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -397,7 +397,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) @@ -610,6 +611,7 @@ #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 @@ -646,6 +648,7 @@ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 @@ -684,11 +687,12 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_RESV_BIT 18 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f - #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) #define MSR_SVSM_CAA 0xc001f000 @@ -699,15 +703,17 @@ #define MSR_AMD_CPPC_REQ 0xc00102b3 #define MSR_AMD_CPPC_STATUS 0xc00102b4 -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* AMD Performance Counter Global Status and Control MSRs */ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 @@ -719,6 +725,7 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 /* Fam 19h MSRs */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 88585c1de416fa..460306b35a4bfe 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,6 +559,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -925,5 +928,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 #define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 1814b413fd5783..ec1321248dac2a 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -224,6 +225,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 1b25c0a95d3f9a..40a9e59c2fd568 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE -#include +#include +#include #include +#include #include #include -#include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) < (b)) ? (a) : (b)) @@ -145,14 +146,14 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, if (!func->leafs) { func->leafs = malloc(sizeof(struct subleaf)); if (!func->leafs) - perror("malloc func leaf"); + err(EXIT_FAILURE, NULL); func->nr = 1; } else { s = func->nr; func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf)); if (!func->leafs) - perror("realloc f->leafs"); + err(EXIT_FAILURE, NULL); func->nr++; } @@ -211,7 +212,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range = malloc(sizeof(struct cpuid_range)); if (!range) - perror("malloc range"); + err(EXIT_FAILURE, NULL); if (input_eax & 0x80000000) range->is_ext = true; @@ -220,7 +221,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); if (!range->funcs) - perror("malloc range->funcs"); + err(EXIT_FAILURE, NULL); range->nr = idx_func; memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); @@ -395,8 +396,8 @@ static int parse_line(char *line) return 0; err_exit: - printf("Warning: wrong line format:\n"); - printf("\tline[%d]: %s\n", flines, line); + warnx("Wrong line format:\n" + "\tline[%d]: %s", flines, line); return -1; } @@ -418,10 +419,8 @@ static void parse_text(void) file = fopen("./cpuid.csv", "r"); } - if (!file) { - printf("Fail to open '%s'\n", filename); - return; - } + if (!file) + err(EXIT_FAILURE, "%s", filename); while (1) { ret = getline(&line, &len, file); @@ -530,7 +529,7 @@ static inline struct cpuid_func *index_to_func(u32 index) func_idx = index & 0xffff; if ((func_idx + 1) > (u32)range->nr) { - printf("ERR: invalid input index (0x%x)\n", index); + warnx("Invalid input index (0x%x)", index); return NULL; } return &range->funcs[func_idx]; @@ -562,7 +561,7 @@ static void show_info(void) return; } - printf("ERR: invalid input subleaf (0x%x)\n", user_sub); + warnx("Invalid input subleaf (0x%x)", user_sub); } show_func(func); @@ -593,15 +592,15 @@ static void setup_platform_cpuid(void) static void usage(void) { - printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" - "\t-a|--all Show both bit flags and complex bit fields info\n" - "\t-b|--bitflags Show boolean flags only\n" - "\t-d|--detail Show details of the flag/fields (default)\n" - "\t-f|--flags Specify the cpuid csv file\n" - "\t-h|--help Show usage info\n" - "\t-l|--leaf=index Specify the leaf you want to check\n" - "\t-r|--raw Show raw cpuid data\n" - "\t-s|--subleaf=sub Specify the subleaf you want to check\n" + warnx("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the CPUID CSV file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw CPUID data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check" ); } @@ -652,7 +651,7 @@ static int parse_options(int argc, char *argv[]) user_sub = strtoul(optarg, NULL, 0); break; default: - printf("%s: Invalid option '%c'\n", argv[0], optopt); + warnx("Invalid option '%c'", optopt); return -1; } diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0199d56cb479d8..d66b710d628f88 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -28,7 +29,7 @@ * only for the return value that is the same as the source input, * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(__memset) +SYM_TYPED_FUNC_START(__memset) ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688fc..cd3fd5155f6ece 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -35,7 +35,7 @@ # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. # -# REX2 Prefix +# REX2 Prefix Superscripts # - (!REX2): REX2 is not allowed # - (REX2): REX2 variant e.g. JMPABS @@ -286,10 +286,10 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) (!REX2) -e1: LOOPE/LOOPZ Jb (f64) (!REX2) -e2: LOOP Jb (f64) (!REX2) -e3: JrCXZ Jb (f64) (!REX2) +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) e4: IN AL,Ib (!REX2) e5: IN eAX,Ib (!REX2) e6: OUT Ib,AL (!REX2) @@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2) # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) (!REX2) -e9: JMP-near Jz (f64) (!REX2) -ea: JMP-far Ap (i64) (!REX2) -eb: JMP-short Jb (f64) (!REX2) +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) ec: IN AL,DX (!REX2) ed: IN eAX,DX (!REX2) ee: OUT DX,AL (!REX2) @@ -478,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) (!REX2) -81: JNO Jz (f64) (!REX2) -82: JB/JC/JNAE Jz (f64) (!REX2) -83: JAE/JNB/JNC Jz (f64) (!REX2) -84: JE/JZ Jz (f64) (!REX2) -85: JNE/JNZ Jz (f64) (!REX2) -86: JBE/JNA Jz (f64) (!REX2) -87: JA/JNBE Jz (f64) (!REX2) -88: JS Jz (f64) (!REX2) -89: JNS Jz (f64) (!REX2) -8a: JP/JPE Jz (f64) (!REX2) -8b: JNP/JPO Jz (f64) (!REX2) -8c: JL/JNGE Jz (f64) (!REX2) -8d: JNL/JGE Jz (f64) (!REX2) -8e: JLE/JNG Jz (f64) (!REX2) -8f: JNLE/JG Jz (f64) (!REX2) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 93b139bfb9880a..3f1d6be512151d 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -221,7 +221,7 @@ static int cgroup_has_attached_progs(int cgroup_fd) for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]); - if (count < 0) + if (count < 0 && errno != EINVAL) return -1; if (count > 0) { diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index afbddea3a39c64..ce1b556dfa90f1 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -17,7 +17,7 @@ endif # Overrides for the prepare step libraries. HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ - CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" RM ?= rm HOSTCC ?= gcc diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 1f44ca677ad3d6..57bd995ce6afa3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -87,7 +87,6 @@ FEATURE_TESTS_BASIC := \ libtracefs \ libcpupower \ libcrypto \ - libunwind \ pthread-attr-setaffinity-np \ pthread-barrier \ reallocarray \ @@ -148,15 +147,12 @@ endif FEATURE_DISPLAY ?= \ libdw \ glibc \ - libbfd \ - libbfd-buildid \ libelf \ libnuma \ numa_num_possible_cpus \ libperl \ libpython \ libcrypto \ - libunwind \ libcapstone \ llvm-perf \ zlib \ diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 04ba035d67e9de..b9ce3aab15fea7 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd, pclose(file); } +static bool kvp_verify_ip_address(const void *address_string) +{ + char verify_buf[sizeof(struct in6_addr)]; + + if (inet_pton(AF_INET, address_string, verify_buf) == 1) + return true; + if (inet_pton(AF_INET6, address_string, verify_buf) == 1) + return true; + return false; +} + +static void kvp_extract_routes(const char *line, void **output, size_t *remaining) +{ + static const char needle[] = "via "; + const char *match, *haystack = line; + + while ((match = strstr(haystack, needle))) { + const char *address, *next_char; + + /* Address starts after needle. */ + address = match + strlen(needle); + + /* The char following address is a space or end of line. */ + next_char = strpbrk(address, " \t\\"); + if (!next_char) + next_char = address + strlen(address) + 1; + + /* Enough room for address and semicolon. */ + if (*remaining >= (next_char - address) + 1) { + memcpy(*output, address, next_char - address); + /* Terminate string for verification. */ + memcpy(*output + (next_char - address), "", 1); + if (kvp_verify_ip_address(*output)) { + /* Advance output buffer. */ + *output += next_char - address; + *remaining -= next_char - address; + + /* Each address needs a trailing semicolon. */ + memcpy(*output, ";", 1); + *output += 1; + *remaining -= 1; + } + } + haystack = next_char; + } +} + +static void kvp_get_gateway(void *buffer, size_t buffer_len) +{ + static const char needle[] = "default "; + FILE *f; + void *output = buffer; + char *line = NULL; + size_t alloc_size = 0, remaining = buffer_len - 1; + ssize_t num_chars; + + /* Show route information in a single line, for each address family */ + f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r"); + if (!f) { + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + return; + } + while ((num_chars = getline(&line, &alloc_size, f)) > 0) { + /* Skip short lines. */ + if (num_chars <= strlen(needle)) + continue; + /* Skip lines without default route. */ + if (memcmp(line, needle, strlen(needle))) + continue; + /* Remove trailing newline to simplify further parsing. */ + if (line[num_chars - 1] == '\n') + line[num_chars - 1] = '\0'; + /* Search routes after match. */ + kvp_extract_routes(line + strlen(needle), &output, &remaining); + } + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + free(line); + pclose(f); +} + static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { @@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name, char *p; FILE *file; - /* - * Get the address of default gateway (ipv4). - */ - sprintf(cmd, "%s %s", "ip route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info. - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); - - /* - * Get the address of default gateway (ipv6). - */ - sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info (ipv6). - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); - + kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way)); /* * Gather the DNS state. diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 00000000000000..6b87136757655c --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_ type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86a2..ef1c27fa3c570f 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca38..2892a45023af6d 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae677849f..fe5df2a9fe8ee6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2051,7 +2051,8 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -4968,6 +4969,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The @@ -6065,6 +6069,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d99f..ced0fc3c3aa534 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e73..b6ae8ad8934b52 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +929,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3d5..5fc753c23734df 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -385,6 +385,8 @@ enum perf_event_read_format { * * @sample_max_stack: Max number of frame pointers in a callchain, * should be < /proc/sys/kernel/perf_event_max_stack + * Max number of entries of branch stack + * should be < hardware limit */ struct perf_event_attr { diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a2528644168..f78ee3670dd5d7 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index c0e13cdf966077..b997c68bd94536 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6b85060f07b3b4..147964bb64c8f4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -60,6 +60,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" #define BPF_INSN_SZ (sizeof(struct bpf_insn)) @@ -284,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) old_errno = errno; va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); errno = old_errno; @@ -896,7 +898,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; @@ -1725,15 +1727,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } -/* Some versions of Android don't provide memfd_create() in their libc - * implementation, so avoid complications and just go straight to Linux - * syscall. - */ -static int sys_memfd_create(const char *name, unsigned flags) -{ - return syscall(__NR_memfd_create, name, flags); -} - #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U #endif @@ -11121,16 +11114,16 @@ static const char *tracefs_available_filter_functions_addrs(void) : TRACEFS"/available_filter_functions_addrs"; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); - /* sanitize binary_path in the probe name */ + /* sanitize name in the probe name */ for (i = 0; buf[i]; i++) { if (!isalnum(buf[i])) buf[i] = '_'; @@ -11255,9 +11248,9 @@ int probe_kern_syscall_wrapper(int token_fd) return pfd >= 0 ? 1 : 0; } else { /* legacy mode */ - char probe_name[128]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) return 0; @@ -11313,10 +11306,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -11860,20 +11853,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru return ret; } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) -{ - int i; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; - } -} - static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { @@ -12297,13 +12276,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[PATH_MAX + 64]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -13371,7 +13351,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = sample_period; attr.wakeup_events = sample_period; p.attr = &attr; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 76669c73dcd162..477a3b3389a091 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -667,6 +667,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags) return syscall(__NR_dup3, oldfd, newfd, flags); } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static inline int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + /* Point *fixed_fd* to the same file that *tmp_fd* points to. * Regardless of success, *tmp_fd* is closed. * Whatever *fixed_fd* pointed to is closed silently. diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 800e0ef09c3787..a469e5d4fee70e 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -573,7 +573,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); - fd = memfd_create(filename, 0); + fd = sys_memfd_create(filename, 0); if (fd < 0) { ret = -errno; pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); @@ -1376,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj } else { if (!secs_match(dst_sec, src_sec)) { pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* "license" and "version" sections are deduped */ @@ -2223,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob } } else if (!secs_match(dst_sec, src_sec)) { pr_warn("sections %s are not compatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* shdr->sh_link points to SYMTAB */ diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 975e265eab3bfe..06663f9ea581f9 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, minlen = nla_attr_minlen[pt->type]; if (libbpf_nla_len(nla) < minlen) - return -1; + return -EINVAL; if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; + return -EINVAL; if (pt->type == LIBBPF_NLA_STRING) { char *data = libbpf_nla_data(nla); if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; + return -EINVAL; } return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) - goto errout; + return err; } - if (tb[type]) + if (tb[type]) { pr_warn("Attribute of type %#x found multiple times in message, " "previous attribute is being ignored.\n", type); + } tb[type] = nla; } - err = 0; -errout: - return err; + return 0; } /** diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index ffcfd777c45181..7fbb50b74c00b3 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -99,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) @@ -120,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index ce32cb35007d6f..c4da34048ef858 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) "Invalid attribute (binary %s)", policy->name); return -1; case YNL_PT_NUL_STR: - if ((!policy->len || len <= policy->len) && !data[len - 1]) + if (len && (!policy->len || len <= policy->len) && !data[len - 1]) break; yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, "Invalid attribute (string %s)", policy->name); diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index af7fddd7b085bf..cab6b576c8762e 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -338,16 +338,24 @@ def main(): print('Capabilities:') [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])] - print(f'PTP Hardware Clock: {tsinfo["phc-index"]}') + print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}') - print('Hardware Transmit Timestamp Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + if 'tx-types' in tsinfo: + print('Hardware Transmit Timestamp Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + else: + print('Hardware Transmit Timestamp Modes: none') + + if 'rx-filters' in tsinfo: + print('Hardware Receive Filter Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + else: + print('Hardware Receive Filter Modes: none') - print('Hardware Receive Filter Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + if 'stats' in tsinfo and tsinfo['stats']: + print('Statistics:') + [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] - print('Statistics:') - [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] return print(f'Settings for {args.device}:') diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index a1427c53703025..a7f08edbc23598 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -162,9 +162,15 @@ def _complex_member_type(self, ri): def free_needs_iter(self): return False - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): if self.is_multi_val() or self.presence_type() == 'len': - ri.cw.p(f'free({var}->{ref}{self.c_name});') + return [f'free({var}->{ref}{self.c_name});'] + return [] + + def free(self, ri, var, ref): + lines = self._free_lines(ri, var, ref) + for line in lines: + ri.cw.p(line) def arg_member(self, ri): member = self._complex_member_type(ri) @@ -263,6 +269,10 @@ def setter(self, ri, space, direction, deref=False, ref=None): var = "req" member = f"{var}->{'.'.join(ref)}" + local_vars = [] + if self.free_needs_iter(): + local_vars += ['unsigned int i;'] + code = [] presence = '' for i in range(0, len(ref)): @@ -272,6 +282,10 @@ def setter(self, ri, space, direction, deref=False, ref=None): if i == len(ref) - 1 and self.presence_type() != 'bit': continue code.append(presence + ' = 1;') + ref_path = '.'.join(ref[:-1]) + if ref_path: + ref_path += '.' + code += self._free_lines(ri, var, ref_path) code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" @@ -279,7 +293,8 @@ def setter(self, ri, space, direction, deref=False, ref=None): alloc = bool([x for x in code if 'alloc(' in x]) if free and not alloc: func_name = '__' + func_name - ri.cw.write_func('static inline void', func_name, body=code, + ri.cw.write_func('static inline void', func_name, local_vars=local_vars, + body=code, args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri)) @@ -482,8 +497,7 @@ def _attr_get(self, ri, var): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = strlen({self.c_name});", + return [f"{presence}_len = strlen({self.c_name});", f"{member} = malloc({presence}_len + 1);", f'memcpy({member}, {self.c_name}, {presence}_len);', f'{member}[{presence}_len] = 0;'] @@ -536,8 +550,7 @@ def _attr_get(self, ri, var): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = len;", + return [f"{presence}_len = len;", f"{member} = malloc({presence}_len);", f'memcpy({member}, {self.c_name}, {presence}_len);'] @@ -574,12 +587,14 @@ def is_recursive(self): def _complex_member_type(self, ri): return self.nested_struct_type - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] at = '&' if self.is_recursive_for_op(ri): at = '' - ri.cw.p(f'if ({var}->{ref}{self.c_name})') - ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});') + lines += [f'if ({var}->{ref}{self.c_name})'] + lines += [f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});'] + return lines def _attr_typol(self): return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' @@ -632,15 +647,19 @@ def _complex_member_type(self, ri): def free_needs_iter(self): return 'type' not in self.attr or self.attr['type'] == 'nest' - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] if self.attr['type'] in scalars: - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [f"free({var}->{ref}{self.c_name});"] elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)") - ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);') - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [ + f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + f"free({var}->{ref}{self.c_name});", + ] else: raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet") + return lines def _attr_policy(self, policy): return self.base_type._attr_policy(policy) @@ -654,10 +673,10 @@ def _attr_get(self, ri, var): def attr_put(self, ri, var): if self.attr['type'] in scalars: put_type = self.type - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + f"{self.enum_name}, &{var}->{self.c_name}[i])") else: @@ -666,8 +685,7 @@ def attr_put(self, ri, var): def _setter_lines(self, ri, member, presence): # For multi-attr we have a count, not presence, hack up the presence presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name - return [f"free({member});", - f"{member} = {self.c_name};", + return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] @@ -696,8 +714,11 @@ def _attr_typol(self): def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] get_lines = [f'attr_{self.c_name} = attr;', - 'ynl_attr_for_each_nested(attr2, attr)', - f'\t{var}->n_{self.c_name}++;'] + 'ynl_attr_for_each_nested(attr2, attr) {', + '\tif (ynl_attr_validate(yarg, attr2))', + '\t\treturn YNL_PARSE_CB_ERROR;', + f'\t{var}->n_{self.c_name}++;', + '}'] return get_lines, None, local_vars @@ -755,6 +776,7 @@ def __init__(self, family, space_name, type_list=None, inherited=None): self.request = False self.reply = False self.recursive = False + self.in_multi_val = False # used by a MultiAttr or and legacy arrays self.attr_list = [] self.attrs = dict() @@ -1121,6 +1143,9 @@ def _load_nested_sets(self): self.pure_nested_structs[nested].request = True if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True + if spec.is_multi_val(): + child = self.pure_nested_structs.get(nested) + child.in_multi_val = True self._sort_pure_types() @@ -1136,6 +1161,8 @@ def _load_nested_sets(self): struct.child_nests.update(child.child_nests) child.request |= struct.request child.reply |= struct.reply + if spec.is_multi_val(): + child.in_multi_val = True if attr_set in struct.child_nests: struct.recursive = True @@ -1399,9 +1426,9 @@ def write_func_lvar(self, local_vars): def write_func(self, qual_ret, name, body, args=None, local_vars=None): self.write_func_prot(qual_ret=qual_ret, name=name, args=args) + self.block_start() self.write_func_lvar(local_vars=local_vars) - self.block_start() for line in body: self.p(line) self.block_end() @@ -1644,11 +1671,23 @@ def put_req_nested_prototype(ri, struct, suffix=';'): def put_req_nested(ri, struct): + local_vars = [] + init_lines = [] + + local_vars.append('struct nlattr *nest;') + init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);") + + for _, arg in struct.member_list(): + if arg.presence_type() == 'count': + local_vars.append('unsigned int i;') + break + put_req_nested_prototype(ri, struct, suffix='') ri.cw.block_start() - ri.cw.write_func_lvar('struct nlattr *nest;') + ri.cw.write_func_lvar(local_vars) - ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);") + for line in init_lines: + ri.cw.p(line) for _, arg in struct.member_list(): arg.attr_put(ri, "obj") @@ -1850,6 +1889,11 @@ def print_req(ri): local_vars += ['size_t hdr_len;', 'void *hdr;'] + for _, attr in ri.struct["request"].member_list(): + if attr.presence_type() == 'count': + local_vars += ['unsigned int i;'] + break + print_prototype(ri, direction, terminate=False) ri.cw.block_start() ri.cw.write_func_lvar(local_vars) @@ -2941,6 +2985,9 @@ def main(): for attr_set, struct in parsed.pure_nested_structs.items(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) + if struct.request and struct.in_multi_val: + free_rsp_nested_prototype(ri) + cw.nl() for op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 33d861c04ebd8f..687c5eafb49a7e 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op2 = ins.opcode.bytes[1]; op3 = ins.opcode.bytes[2]; + /* + * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long. + */ + if (op1 == 0xea) { + insn->len = 1; + insn->type = INSN_BUG; + return 0; + } + if (ins.rex_prefix.nbytes) { rex = ins.rex_prefix.bytes[0]; rex_w = X86_REX_W(rex) >> 3; @@ -522,7 +531,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case INAT_PFX_REPNE: if (modrm == 0xca) /* eretu/erets */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; default: if (modrm == 0xca) @@ -535,11 +544,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec insn->type = INSN_JUMP_CONDITIONAL; - } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || - op2 == 0x35) { + } else if (op2 == 0x05 || op2 == 0x34) { + + /* syscall, sysenter */ + insn->type = INSN_SYSCALL; + + } else if (op2 == 0x07 || op2 == 0x35) { - /* sysenter, sysret */ - insn->type = INSN_CONTEXT_SWITCH; + /* sysret, sysexit */ + insn->type = INSN_SYSRET; } else if (op2 == 0x0b || op2 == 0xb9) { @@ -676,7 +689,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case 0xca: /* retf */ case 0xcb: /* retf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; case 0xe0: /* loopne */ @@ -721,7 +734,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } else if (modrm_reg == 5) { /* jmpf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; } else if (modrm_reg == 6) { diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 403e587676f1b1..06ca4a2659a45f 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -126,7 +126,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, * indicates a rare GCC quirk/bug which can leave dead * code behind. */ - if (reloc_type(text_reloc) == R_X86_64_PC32) { + if (!file->ignore_unreachables && reloc_type(text_reloc) == R_X86_64_PC32) { WARN_INSN(insn, "ignoring unreachables due to jump table quirk"); file->ignore_unreachables = true; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4a1f6c3169b3b6..f23bdda737aaa5 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -225,10 +225,13 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || + str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + str_ends_with(func->name, "_7___rustc17rust_begin_unwind") || strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || - (strstr(func->name, "_4core5slice5index24slice_") && + (strstr(func->name, "_4core5slice5index") && + strstr(func->name, "slice_") && str_ends_with(func->name, "_fail")); } @@ -3505,6 +3508,34 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file, return next_insn_same_sec(file, alt_group->orig_group->last_insn); } +static bool skip_alt_group(struct instruction *insn) +{ + struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL; + + /* ANNOTATE_IGNORE_ALTERNATIVE */ + if (insn->alt_group && insn->alt_group->ignore) + return true; + + /* + * For NOP patched with CLAC/STAC, only follow the latter to avoid + * impossible code paths combining patched CLAC with unpatched STAC + * or vice versa. + * + * ANNOTATE_IGNORE_ALTERNATIVE could have been used here, but Linus + * requested not to do that to avoid hurting .s file readability + * around CLAC/STAC alternative sites. + */ + + if (!alt_insn) + return false; + + /* Don't override ASM_{CLAC,STAC}_UNSAFE */ + if (alt_insn->alt_group && alt_insn->alt_group->ignore) + return false; + + return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC; +} + /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -3625,7 +3656,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } - if (insn->alt_group && insn->alt_group->ignore) + if (skip_alt_group(insn)) return 0; if (handle_insn_ops(insn, next_insn, &state)) @@ -3684,14 +3715,20 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; - case INSN_CONTEXT_SWITCH: - if (func) { - if (!next_insn || !next_insn->hint) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; - } - break; + case INSN_SYSCALL: + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + + break; + + case INSN_SYSRET: + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; } + return 0; case INSN_STAC: @@ -3886,6 +3923,12 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) WARN_INSN(insn, "RET before UNTRAIN"); return 1; + case INSN_SYSCALL: + break; + + case INSN_SYSRET: + return 0; + case INSN_NOP: if (insn->retpoline_safe) return 0; @@ -3895,6 +3938,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) break; } + if (insn->dead_end) + return 0; + if (!next) { WARN_INSN(insn, "teh end!"); return 1; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 089a1acc48a8d0..01ef6f415adf64 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -19,7 +19,8 @@ enum insn_type { INSN_CALL, INSN_CALL_DYNAMIC, INSN_RETURN, - INSN_CONTEXT_SWITCH, + INSN_SYSCALL, + INSN_SYSRET, INSN_BUG, INSN_NOP, INSN_STAC, diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 364b55b00b4841..34af57b8ec2a9c 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,8 +1,10 @@ COPYING LICENSES/preferred/GPL-2.0 arch/arm64/tools/gen-sysreg.awk +arch/arm64/tools/syscall_64.tbl arch/arm64/tools/sysreg arch/*/include/uapi/asm/bpf_perf_event.h +include/uapi/asm-generic/Kbuild tools/perf tools/arch tools/scripts @@ -25,6 +27,10 @@ tools/lib/str_error_r.c tools/lib/vsprintf.c tools/lib/zalloc.c scripts/bpf_doc.py +scripts/Kbuild.include +scripts/Makefile.asm-headers +scripts/syscall.tbl +scripts/syscallhdr.sh tools/bpf/bpftool kernel/bpf/disasm.c kernel/bpf/disasm.h diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eea95c6c0c71f7..d1ea7bf449647e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated +CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) @@ -559,6 +560,8 @@ ifndef NO_LIBELF ifeq ($(feature-libdebuginfod), 1) CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT EXTLIBS += -ldebuginfod + else + $(warning No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent) endif endif @@ -624,6 +627,8 @@ endif ifndef NO_LIBUNWIND have_libunwind := + $(call feature_check,libunwind) + $(call feature_check,libunwind-x86) ifeq ($(feature-libunwind-x86), 1) $(call detected,CONFIG_LIBUNWIND_X86) @@ -648,7 +653,7 @@ ifndef NO_LIBUNWIND endif ifneq ($(feature-libunwind), 1) - $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR) + $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR and set LIBUNWIND=1 in the make command line as it is opt-in now) NO_LOCAL_LIBUNWIND := 1 else have_libunwind := 1 diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 979d4691221a07..a7ae5637dadeeb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1147,7 +1147,8 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \ $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ - $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ + $(INSTALL) tests/shell/base_report/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ + $(INSTALL) tests/shell/base_report/*.txt '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' ; \ $(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' $(Q)$(MAKE) -C tests/shell/coresight install-tests diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 49eeb2ad8dbd8e..27c1d5ebcd91c8 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index c844cd5cda620b..1e8c44c7b61492 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index d8b4ab78bef076..9a084bdb892694 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index e9115b4d8b635b..a4569b96ef06c5 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index c8cad33bf250ea..52a7652fcff639 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 727f99d333b304..83e45eb6c095a3 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 4d0fb2fba7e208..ac007ea00979dc 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,7 +396,7 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +384 i386 arch_prctl sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents 386 i386 rseq sys_rseq 393 i386 semget sys_semget @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c791..cfb5ca41e30de1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 37effc1b134eea..f657a77314f866 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ba20bf7c011d77..d56273a0e241c7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3480,7 +3480,7 @@ static struct option __record_options[] = { "sample selected machine registers on interrupt," " use '-I?' to list register names", parse_intr_regs), OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", - "sample selected machine registers on interrupt," + "sample selected machine registers in user space," " use '--user-regs=?' to list register names", parse_user_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6ac51925ea4249..33cce59bdfbdb5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1352,7 +1352,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ }, [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, }, - { .name = "rseq", .errpid = true, + { .name = "rseq", .arg = { [0] = { .from_user = true /* rseq */, }, }, }, { .name = "rt_sigaction", .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, @@ -1376,7 +1376,7 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "sendto", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, [4] = SCA_SOCKADDR_FROM_USER(addr), }, }, - { .name = "set_robust_list", .errpid = true, + { .name = "set_robust_list", .arg = { [0] = { .from_user = true /* head */, }, }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", @@ -2842,7 +2842,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, e_machine = thread__e_machine(thread, trace->host); sc = trace__syscall_info(trace, evsel, e_machine, id); if (sc == NULL) - return -1; + goto out_put; ttrace = thread__trace(thread, trace); /* * We need to get ttrace just to make sure it is there when syscall__scnprintf_args() @@ -3005,8 +3005,8 @@ errno_print: { else if (sc->fmt->errpid) { struct thread *child = machine__find_thread(trace->host, ret, ret); + fprintf(trace->output, "%ld", ret); if (child != NULL) { - fprintf(trace->output, "%ld", ret); if (thread__comm_set(child)) fprintf(trace->output, " (%s)", thread__comm_str(child)); thread__put(child); @@ -4128,10 +4128,13 @@ static int trace__set_filter_loop_pids(struct trace *trace) if (!strcmp(thread__comm_str(parent), "sshd") || strstarts(thread__comm_str(parent), "gnome-terminal")) { pids[nr++] = thread__tid(parent); + thread__put(parent); break; } + thread__put(thread); thread = parent; } + thread__put(thread); err = evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a4499e5a6f9cb0..857f6646cc23e9 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -20,6 +20,7 @@ FILES=( "include/uapi/linux/stat.h" "include/linux/bits.h" "include/vdso/bits.h" + "include/linux/cfi_types.h" "include/linux/const.h" "include/vdso/const.h" "include/vdso/unaligned.h" diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 121cf61ba1b345..e0b2e7268ef68c 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -680,7 +680,10 @@ def FindSelect(self, value, pattern, query): s = value.replace("%", "\\%") s = s.replace("_", "\\_") # Translate * and ? into SQL LIKE pattern characters % and _ - trans = string.maketrans("*?", "%_") + if sys.version_info[0] == 3: + trans = str.maketrans("*?", "%_") + else: + trans = string.maketrans("*?", "%_") match = " LIKE '" + str(s).translate(trans) + "'" else: match = " GLOB '" + str(value) + "'" diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh index 4d4aac547f0109..c2ec7881ec1de4 100644 --- a/tools/perf/tests/shell/lib/stat_output.sh +++ b/tools/perf/tests/shell/lib/stat_output.sh @@ -151,6 +151,11 @@ check_per_socket() check_metric_only() { echo -n "Checking $1 output: metric only " + if [ "$(uname -m)" = "s390x" ] && ! grep '^facilities' /proc/cpuinfo | grep -qw 67 + then + echo "[Skip] CPU-measurement counter facility not installed" + return + fi perf stat --metric-only $2 -e instructions,cycles true commachecker --metric-only echo "[Success]" diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh index a4f257ea839e13..98fb65274ac4f7 100755 --- a/tools/perf/tests/shell/stat+json_output.sh +++ b/tools/perf/tests/shell/stat+json_output.sh @@ -176,6 +176,11 @@ check_per_socket() check_metric_only() { echo -n "Checking json output: metric only " + if [ "$(uname -m)" = "s390x" ] && ! grep '^facilities' /proc/cpuinfo | grep -qw 67 + then + echo "[Skip] CPU-measurement counter facility not installed" + return + fi perf stat -j --metric-only -e instructions,cycles -o "${stat_output}" true $PYTHON $pythonchecker --metric-only --file "${stat_output}" echo "[Success]" diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 8df3f9d9ffd2b2..6b3aac283c371c 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -264,7 +264,7 @@ static int compar(const void *a, const void *b) const struct event_node *nodeb = b; s64 cmp = nodea->event_time - nodeb->event_time; - return cmp; + return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0); } static int process_events(struct evlist *evlist, diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index d18cc47e89bd01..c3322eb3d6865d 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index 6e6907e63bfc2b..a15ac2fa4b202f 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -155,4 +155,8 @@ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ +/* Flags for execveat2(2). */ +#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution + would be allowed. */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 7539717707337a..e762e1af650c4b 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<dso here since * it's just to remove the current filter. - * Ditto for thread below. */ do_zoom_dso(browser, actions); } else if (top == &browser->hists->thread_filter) { + actions->thread = thread; do_zoom_thread(browser, actions); } else if (top == &browser->hists->socket_filter) { do_zoom_socket(browser, actions); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1974395492d7da..3c030da2e477c7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2566,25 +2566,6 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu return false; } -static bool evsel__handle_error_quirks(struct evsel *evsel, int error) -{ - /* - * AMD core PMU tries to forward events with precise_ip to IBS PMU - * implicitly. But IBS PMU has more restrictions so it can fail with - * supported event attributes. Let's forward it back to the core PMU - * by clearing precise_ip only if it's from precise_max (:P). - */ - if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && - evsel->core.attr.precise_ip && evsel->precise_max) { - evsel->core.attr.precise_ip = 0; - pr_debug2_peo("removing precise_ip on AMD\n"); - display_attr(&evsel->core.attr); - return true; - } - - return false; -} - static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, int start_cpu_map_idx, int end_cpu_map_idx) @@ -2730,9 +2711,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__handle_error_quirks(evsel, err)) - goto retry_open; - out_close: if (err) threads->err_thread = thread; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4e8a9b172fbcc7..9b1011fe482671 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -2272,7 +2274,146 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample; @@ -2393,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2413,9 +2566,10 @@ static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2440,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -3407,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3427,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3976,6 +4174,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2531b373f2cf7c..b048165b10c141 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1976,7 +1976,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -2078,7 +2078,7 @@ static int add_callchain_ip(struct thread *thread, al.sym = NULL; al.srcline = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, symbols, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -2106,6 +2106,8 @@ static int add_callchain_ip(struct thread *thread, } if (symbols) thread__find_symbol(thread, *cpumode, ip, &al); + else + thread__find_map(thread, *cpumode, ip, &al); } if (al.sym != NULL) { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b7ebac5ab1d112..e2e3969e12d360 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -2052,6 +2052,9 @@ static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_mat for (size_t i = 0; i < ARRAY_SIZE(names); i++) { const char *name = names[i]; + if (!name) + continue; + if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match)) return true; if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match)) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c6f369b5d893f3..36c1d3090689fc 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; - bool need_swap = false; + bool need_swap = false, elf32; u8 e_ident[EI_NIDENT]; - size_t buf_size; - void *buf; int i; + union { + struct { + Elf32_Ehdr ehdr32; + Elf32_Phdr *phdr32; + }; + struct { + Elf64_Ehdr ehdr64; + Elf64_Phdr *phdr64; + }; + } hdrs; + void *phdr; + size_t phdr_size; + void *buf = NULL; + size_t buf_size = 0; fp = fopen(filename, "r"); if (fp == NULL) @@ -108,117 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out; need_swap = check_need_swap(e_ident[EI_DATA]); + elf32 = e_ident[EI_CLASS] == ELFCLASS32; - /* for simplicity */ - fseek(fp, 0, SEEK_SET); - - if (e_ident[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr ehdr; - Elf32_Phdr *phdr; - - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, + elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), + 1, fp) != 1) + goto out; - if (need_swap) { - ehdr.e_phoff = bswap_32(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (need_swap) { + if (elf32) { + hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff); + hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize); + hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum); + } else { + hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff); + hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize); + hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } + } + phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum + : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + phdr = malloc(phdr_size); + if (phdr == NULL) + goto out; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { - void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_32(phdr->p_offset); - phdr->p_filesz = bswap_32(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (fread(phdr, phdr_size, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } - } - } else { - Elf64_Ehdr ehdr; - Elf64_Phdr *phdr; + if (elf32) + hdrs.phdr32 = phdr; + else + hdrs.phdr64 = phdr; - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { + size_t p_filesz; if (need_swap) { - ehdr.e_phoff = bswap_64(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (elf32) { + hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type); + hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset); + hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset); + } else { + hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type); + hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset); + hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz); + } } + if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE) + continue; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz; + if (p_filesz > buf_size) { void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_64(phdr->p_offset); - phdr->p_filesz = bswap_64(phdr->p_filesz); - } - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; + buf_size = p_filesz; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + } + fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (fread(buf, p_filesz, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } + ret = read_build_id(buf, p_filesz, bid, need_swap); + if (ret == 0) { + ret = bid->size; + break; } } out_free: free(buf); + free(phdr); out: fclose(fp); return ret; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 89585f53c1d5cc..10a01f8fbd4000 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -410,7 +410,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al) + bool symbols, struct addr_location *al) { size_t i; const u8 cpumodes[] = { @@ -421,7 +421,11 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_symbol(thread, cpumodes[i], addr, al); + if (symbols) + thread__find_symbol(thread, cpumodes[i], addr, al); + else + thread__find_map(thread, cpumodes[i], addr, al); + if (al->map) break; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index cd574a896418ac..2b90bbed7a6121 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -126,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al); + bool symbols, struct addr_location *al); int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 97b327d1ce4a01..727a10e3f99001 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -486,8 +486,14 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) delta_start *= 1000000000 / ticks_per_sec; } count->val = delta_start; - count->ena = count->run = delta_start; count->lost = 0; + /* + * The values of enabled and running must make a ratio of 100%. The + * exact values don't matter as long as they are non-zero to avoid + * issues with evsel__count_has_error. + */ + count->ena++; + count->run++; return 0; } diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9fb2c1343c7fe2..0b037e7389a009 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - if (dso__data_get_fd(dso, machine, &fd) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 9d70d8c945af4e..987a5d32f3b600 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl") typedef struct osl_table_info { struct osl_table_info *next; u32 instance; - char signature[ACPI_NAMESEG_SIZE]; + char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING; } osl_table_info; diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 13817f9112c06a..9fc927fcc22a7f 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname) int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) { - char filename[ACPI_NAMESEG_SIZE + 16]; + char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING; char instance_str[16]; ACPI_FILE file; acpi_size actual; diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0170d3cc68194c..ab79854cb296e4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4766,6 +4766,38 @@ unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) return (value & value_mask) >> value_shift; } + +/* Rapl domain enumeration helpers */ +static inline int get_rapl_num_domains(void) +{ + int num_packages = topo.max_package_id + 1; + int num_cores_per_package; + int num_cores; + + if (!platform->has_per_core_rapl) + return num_packages; + + num_cores_per_package = topo.max_core_id + 1; + num_cores = num_cores_per_package * num_packages; + + return num_cores; +} + +static inline int get_rapl_domain_id(int cpu) +{ + int nr_cores_per_package = topo.max_core_id + 1; + int rapl_core_id; + + if (!platform->has_per_core_rapl) + return cpus[cpu].physical_package_id; + + /* Compute the system-wide unique core-id for @cpu */ + rapl_core_id = cpus[cpu].physical_core_id; + rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package; + + return rapl_core_id; +} + /* * get_counters(...) * migrate to cpu @@ -4821,7 +4853,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) goto done; if (platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, c->core_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -4887,7 +4919,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) p->sys_lpi = cpuidle_cur_sys_lpi_us; if (!platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, p->package_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -7863,7 +7895,7 @@ void linux_perf_init(void) void rapl_perf_init(void) { - const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; + const unsigned int num_domains = get_rapl_num_domains(); bool *domain_visited = calloc(num_domains, sizeof(bool)); rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); @@ -7904,8 +7936,7 @@ void rapl_perf_init(void) continue; /* Skip already seen and handled RAPL domains */ - next_domain = - platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; + next_domain = get_rapl_domain_id(cpu); assert(next_domain < num_domains); diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 2c720e3ecad593..fdc7170639e604 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_move = (void *)fcg_cgroup_move, .init = (void *)fcg_init, .exit = (void *)fcg_exit, - .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, + .flags = SCX_OPS_ENQ_EXITING, .name = "flatcg"); diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index ebbdb3c42e9f74..580b4e246aecd5 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index f2957a3e36fe7f..bf9caa908f894c 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(dev, "No CXL FWCTL setup\n"); diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646ae..422e186cf3cf1c 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y CONFIG_PCI=y CONFIG_USB4=y +CONFIG_I2C=y CONFIG_NET=y CONFIG_MCTP=y @@ -43,6 +44,8 @@ CONFIG_REGMAP_BUILD=y CONFIG_AUDIT=y +CONFIG_PRIME_NUMBERS=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y @@ -51,3 +54,4 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_SOC_TOPOLOGY_BUILD=y +CONFIG_SND_SOC_CS35L56_I2C=y diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index da53a709773a23..c176487356e6c9 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): test.add_error(printer, 'missing subtest result line!') + elif not lines: + print_log(test.log, printer) + test.status = TestStatus.NO_TESTS + test.add_error(printer, 'No more test results!') else: parse_test_result(lines, test, expected_num, printer) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 5ff4f6ffd8736d..bbba921e0eacb1 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -371,8 +371,8 @@ def test_parse_late_test_plan(self): """ result = kunit_parser.parse_run_tests(output.splitlines(), stdout) # Missing test results after test plan should alert a suspected test crash. - self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2)) def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py index 78a474a5b95f3a..f00cb89fdef6aa 100644 --- a/tools/testing/kunit/qemu_configs/sh.py +++ b/tools/testing/kunit/qemu_configs/sh.py @@ -7,7 +7,9 @@ CONFIG_MEMORY_START=0x0c000000 CONFIG_SH_RTS7751R2D=y CONFIG_RTS7751R2D_PLUS=y -CONFIG_SERIAL_SH_SCI=y''', +CONFIG_SERIAL_SH_SCI=y +CONFIG_CMDLINE_EXTEND=y +''', qemu_arch='sh4', kernel_path='arch/sh/boot/zImage', kernel_command_line='console=ttySC1', diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index 256d9573b44646..2019550a1b692e 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,6 +2,8 @@ QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' +CONFIG_KUNIT_FAULT_TEST=n +CONFIG_SPARC32=y CONFIG_SERIAL_SUNZILOG=y CONFIG_SERIAL_SUNZILOG_CONSOLE=y ''', diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index 1cf82acb2a3e4d..0ab4b53bb4f32a 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -24,4 +24,10 @@ static inline void accept_memory(phys_addr_t start, unsigned long size) { } +static inline unsigned long free_reserved_area(void *start, void *end, + int poison, const char *s) +{ + return 0; +} + #endif diff --git a/tools/testing/memblock/linux/mutex.h b/tools/testing/memblock/linux/mutex.h new file mode 100644 index 00000000000000..ae3f497165d6a8 --- /dev/null +++ b/tools/testing/memblock/linux/mutex.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MUTEX_H +#define _MUTEX_H + +#define DEFINE_MUTEX(name) int name + +static inline void dummy_mutex_guard(int *name) +{ +} + +#define guard(mutex) \ + dummy_##mutex##_guard + +#endif /* _MUTEX_H */ \ No newline at end of file diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 67503089e6a0ad..01e836fba48884 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void) return 0; } +#ifdef CONFIG_NUMA +static int memblock_set_node_check(void) +{ + unsigned long i, max_reserved; + struct memblock_region *rgn; + void *orig_region; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + orig_region = memblock.reserved.regions; + + /* Equally Split range to node 0 and 1*/ + memblock_set_node(memblock_start_of_DRAM(), + memblock_phys_mem_size() / 2, &memblock.memory, 0); + memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2, + memblock_phys_mem_size() / 2, &memblock.memory, 1); + + ASSERT_EQ(memblock.memory.cnt, 2); + rgn = &memblock.memory.regions[0]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 0); + rgn = &memblock.memory.regions[1]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 1); + + /* Reserve 126 regions with the last one across node boundary */ + for (i = 0; i < 125; i++) + memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8); + + memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8, + SZ_16); + + /* + * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") + * do following process to set nid to each memblock.reserved region. + * But it may miss some region if memblock_set_node() double the + * array. + * + * By checking 'max', we make sure all region nid is set properly. + */ +repeat: + max_reserved = memblock.reserved.max; + for_each_mem_region(rgn) { + int nid = memblock_get_region_node(rgn); + + memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid); + } + if (max_reserved != memblock.reserved.max) + goto repeat; + + /* Confirm each region has valid node set */ + for_each_reserved_mem_region(rgn) { + ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn))); + if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1)) + ASSERT_EQ(1, memblock_get_region_node(rgn)); + else + ASSERT_EQ(0, memblock_get_region_node(rgn)); + } + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + +static int memblock_set_node_checks(void) +{ + prefix_reset(); + prefix_push("memblock_set_node"); + test_print("Running memblock_set_node tests...\n"); + + memblock_set_node_check(); + + prefix_pop(); + + return 0; +} +#else +static int memblock_set_node_checks(void) +{ + return 0; +} +#endif + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2444,6 +2545,7 @@ int memblock_basic_checks(void) memblock_bottom_up_checks(); memblock_trim_memory_checks(); memblock_overlaps_region_checks(); + memblock_set_node_checks(); return 0; } diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index cb24124ac5b98e..674aaa02e39664 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -4,7 +4,6 @@ gpiogpio-hammer gpioinclude/ gpiolsgpio kselftest_install/ -tpm2/SpaceTest.log # Python bytecode and cache __pycache__/ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c77c8c8e3d9bdd..9c477321a5b474 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -121,6 +121,7 @@ TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 +TARGETS += x86/bugs TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or @@ -201,7 +202,7 @@ export KHDR_INCLUDES all: @ret=1; \ - for TARGET in $(TARGETS); do \ + for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \ diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 4930e03a7b9903..762048eb354ffe 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -891,18 +891,11 @@ static void set_initial_values(struct test_config *config) { int vq = __sve_vq_from_vl(vl_in(config)); int sme_vq = __sve_vq_from_vl(config->sme_vl_in); - bool sm_change; svcr_in = config->svcr_in; svcr_expected = config->svcr_expected; svcr_out = 0; - if (sme_supported() && - (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM)) - sm_change = true; - else - sm_change = false; - fill_random(&v_in, sizeof(v_in)); memcpy(v_expected, v_in, sizeof(v_in)); memset(v_out, 0, sizeof(v_out)); @@ -953,12 +946,7 @@ static void set_initial_values(struct test_config *config) if (fpmr_supported()) { fill_random(&fpmr_in, sizeof(fpmr_in)); fpmr_in &= FPMR_SAFE_BITS; - - /* Entering or exiting streaming mode clears FPMR */ - if (sm_change) - fpmr_expected = 0; - else - fpmr_expected = fpmr_in; + fpmr_expected = fpmr_in; } else { fpmr_in = 0; fpmr_expected = 0; diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 5680befae8c6d1..5e713ef7caa307 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dbd13f8e42a7aa..dd6512fa652be0 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode) .repeat = 1, ); + if (SYS_NOFAIL("iptables-legacy --version")) { + fprintf(stdout, "Missing required iptables-legacy tool\n"); + test__skip(); + return; + } + skel = test_bpf_nf__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 09f6487f58b9cb..5fea3209566ed5 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -6,6 +6,7 @@ #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" #include "for_each_multi_maps.skel.h" +#include "for_each_hash_modify.skel.h" static unsigned int duration; @@ -203,6 +204,40 @@ static void test_multi_maps(void) for_each_multi_maps__destroy(skel); } +static void test_hash_modify(void) +{ + struct for_each_hash_modify *skel; + int max_entries, i, err; + __u64 key, val; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1 + ); + + skel = for_each_hash_modify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load")) + return; + + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + +out: + for_each_hash_modify__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -213,4 +248,6 @@ void test_for_each(void) test_write_map_key(); if (test__start_subtest("multi_maps")) test_multi_maps(); + if (test__start_subtest("hash_modify")) + test_hash_modify(); } diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 8e13a3416a21d2..1de14b111931aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -104,7 +104,7 @@ void test_kmem_cache_iter(void) goto destroy; memset(buf, 0, sizeof(buf)); - while (read(iter_fd, buf, sizeof(buf) > 0)) { + while (read(iter_fd, buf, sizeof(buf)) > 0) { /* Read out all contents */ printf("%s", buf); } diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 115287ba441bfd..0703e987df8997 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg) while (!READ_ONCE(skip)) { err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_OK(topts.retval, "test_run retval"); + if (err || topts.retval) { + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + break; + } } pthread_exit(arg); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862ac..0a99fd404f6dc0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli; err = disconnect(cli); - ASSERT_OK(err, "disconnect"); close_cli: close(cli); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 13a2e22f546583..863df7c0fdd027 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -221,7 +221,7 @@ #define CAN_USE_GOTOL #endif -#if _clang_major__ >= 18 +#if __clang_major__ >= 18 #define CAN_USE_BPF_ST #endif diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c new file mode 100644 index 00000000000000..82307166f78925 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Intel Corporation */ +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, __u64); + __type(value, __u64); +} hashmap SEC(".maps"); + +static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg) +{ + bpf_map_delete_elem(map, key); + bpf_map_update_elem(map, key, val, 0); + return 0; +} + +SEC("tc") +int test_pkt_access(struct __sk_buff *skb) +{ + (void)skb; + + bpf_for_each_map_elem(&hashmap, cb, NULL, 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c index b33385dfbd3503..22c4fb8b9266da 100644 --- a/tools/testing/selftests/bpf/progs/res_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c @@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx) r = bpf_res_spin_lock(&elem1->lock); if (r) return r; - if (!bpf_res_spin_lock(&elem2->lock)) { + r = bpf_res_spin_lock(&elem2->lock); + if (!r) { bpf_res_spin_unlock(&elem2->lock); bpf_res_spin_unlock(&elem1->lock); return -1; } bpf_res_spin_unlock(&elem1->lock); - return 0; + return r != -EDEADLK; } SEC("tc") @@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx) /* Trigger AA, after exhausting entries in the held lock table. This * time, only the timeout can save us, as AA detection won't succeed. */ - if (!bpf_res_spin_lock(locks[34])) { + ret = bpf_res_spin_lock(locks[34]); + if (!ret) { bpf_res_spin_unlock(locks[34]); ret = 1; goto end; } + ret = ret != -ETIMEDOUT ? 2 : 0; + end: for (i = i - 1; i >= 0; i--) bpf_res_spin_unlock(locks[i]); diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c index 77698d5a19e446..a696ab84bfd662 100644 --- a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c +++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c @@ -10,65 +10,81 @@ SEC("socket") __description("load-acquire, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void load_acquire_8(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12;" "*(u8 *)(r10 - 1) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1)) + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1)) : __clobber_all); } SEC("socket") __description("load-acquire, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void load_acquire_16(void) { asm volatile ( + "r0 = 0;" "w1 = 0x1234;" "*(u16 *)(r10 - 2) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2)) + BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2)) : __clobber_all); } SEC("socket") __description("load-acquire, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void load_acquire_32(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12345678;" "*(u32 *)(r10 - 4) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4)) + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4)) : __clobber_all); } SEC("socket") __description("load-acquire, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void load_acquire_64(void) { asm volatile ( + "r0 = 0;" "r1 = 0x1234567890abcdef ll;" "*(u64 *)(r10 - 8) = r1;" - ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8)); + ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8)) + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8)) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c index c0442d5bb049d8..022d03d9835957 100644 --- a/tools/testing/selftests/bpf/progs/verifier_store_release.c +++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c @@ -11,13 +11,17 @@ SEC("socket") __description("store-release, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void store_release_8(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12;" ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1); - "w0 = *(u8 *)(r10 - 1);" + "w2 = *(u8 *)(r10 - 1);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -27,13 +31,17 @@ __naked void store_release_8(void) SEC("socket") __description("store-release, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void store_release_16(void) { asm volatile ( + "r0 = 0;" "w1 = 0x1234;" ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1); - "w0 = *(u16 *)(r10 - 2);" + "w2 = *(u16 *)(r10 - 2);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -43,13 +51,17 @@ __naked void store_release_16(void) SEC("socket") __description("store-release, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void store_release_32(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12345678;" ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1); - "w0 = *(u32 *)(r10 - 4);" + "w2 = *(u32 *)(r10 - 4);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -59,13 +71,17 @@ __naked void store_release_32(void) SEC("socket") __description("store-release, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void store_release_64(void) { asm volatile ( + "r0 = 0;" "r1 = 0x1234567890abcdef ll;" ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); - "r0 = *(u64 *)(r10 - 8);" + "r2 = *(u64 *)(r10 - 8);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 49f2fc61061f5d..9551d8d5f8f9f8 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -1042,6 +1042,14 @@ void run_subtest(struct test_loader *tester, emit_verifier_log(tester->log_buf, false /*force*/); validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log); + /* Restore capabilities because the kernel will silently ignore requests + * for program info (such as xlated program text) if we are not + * bpf-capable. Also, for some reason test_verifier executes programs + * with all capabilities restored. Do the same here. + */ + if (restore_capabilities(&caps)) + goto tobj_cleanup; + if (subspec->expect_xlated.cnt) { err = get_xlated_program_text(bpf_program__fd(tprog), tester->log_buf, tester->log_buf_sz); @@ -1067,12 +1075,6 @@ void run_subtest(struct test_loader *tester, } if (should_do_test_run(spec, subspec)) { - /* For some reason test_verifier executes programs - * with all capabilities restored. Do the same here. - */ - if (restore_capabilities(&caps)) - goto tobj_cleanup; - /* Do bpf_map__attach_struct_ops() for each struct_ops map. * This should trigger bpf_struct_ops->reg callback on kernel side. */ diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 400a696a0d212e..a17256d9f88a87 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -88,22 +88,32 @@ echo "" > test/cpuset.cpus # If isolated CPUs have been reserved at boot time (as shown in # cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These pre-isolated CPUs should stay in -# an isolated state throughout the testing process for now. +# the tests may fail incorrectly. Wait a bit and retry again just in case +# these isolated CPUs are leftover from previous run and have just been +# cleaned up earlier in this script. +# +# These pre-isolated CPUs should stay in an isolated state throughout the +# testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +[[ -n "$BOOT_ISOLCPUS" ]] && { + sleep 0.5 + BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +} if [[ -n "$BOOT_ISOLCPUS" ]] then [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi + cleanup() { online_cpus cd $CGROUP2 - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - rmdir test > /dev/null 2>&1 + rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 + rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \ + rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -173,14 +183,22 @@ test_add_proc() # # Cgroup test hierarchy # -# root -- A1 -- A2 -- A3 -# +- B1 +# root +# | +# +------+------+ +# | | +# A1 B1 +# | +# A2 +# | +# A3 # # P = set cpus.partition (0:member, 1:root, 2:isolated) # C = add cpu-list to cpuset.cpus # X = add cpu-list to cpuset.cpus.exclusive # S

= use prefix in subtree_control # T = put a task into cgroup +# CX = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O= = Write to CPU online file of # # ECPUs - effective CPUs of cpusets @@ -207,130 +225,129 @@ TEST_MATRIX=( " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" - "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P0,B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4,2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ - A1:P0,A2:P2,A3:P1 2" + " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P0|B1:P1" + " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + A1:P0|A2:P2|A3:P1 2-4|2-3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + A1:P0|A2:P2|A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1" + . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ + A1:P0|A2:P-2|A3:P-1 ." " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ - A1:P0,A2:P2,A3:P-1 2-4" + . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ + A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:" + " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2" + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" + . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" + " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" # Local partition invalidation tests " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3" + . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" + " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2" + . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2" + . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" + . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2" + . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ - A1:P0,A3:P-2" + . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ + A1:P0|A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -339,68 +356,127 @@ TEST_MATRIX=( # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" - "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" - # A partition root with non-partition root parent is invalid, but it + # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" - " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" - " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" + " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1" # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it - " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5" # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3" + " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" + + # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive + # but creating a local partition out of it is not allowed. Similarly and change + # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will + # invalidate it. + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + A1:P0|A2:P2:B1:P1 2-4" + " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected - " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive - " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" +) + +# +# Cpuset controller remote partition test matrix. +# +# Cgroup test hierarchy +# +# root +# | +# rtest (cpuset.cpus.exclusive=1-7) +# | +# +------+------+ +# | | +# p1 p2 +# +--+--+ +--+--+ +# | | | | +# c11 c12 c21 c22 +# +# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX. +# Only CPUs 1-7 should be used. +# +REMOTE_TEST_MATRIX=( + # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 + # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS + # ------ ------ ------- ------- ------- ------- ----- ------ -------- + " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \ + . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ + c11:P2|c12:P2|c21:P2|c22:P2 1-6" + " CX1-4:S+ . X1-2:P2 C3 . . \ + . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ + p1:P0|c11:P2|c12:P0 1-2" + " CX1-4:S+ . X1-2:P2 . . . \ + X2-4 . . . . . p1:1,3-4|c11:2 \ + p1:P0|c11:P2 2" + " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \ + X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X2 . . . p1:1|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + # p1 as member, will get its effective CPUs from its parent rtest + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ + p1:P0|c11:P2|c12:P1 1" + " CX1-4:S+ X5-6:P1:S+ . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ + 1-2,4-6|1-2,5-6" ) # @@ -453,25 +529,26 @@ set_ctrl_state() PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - S=$(expr substr $CMD 1 1) - if [[ $S = S ]] - then - PREFIX=${CMD#?} + case $CMD in + S*) PREFIX=${CMD#?} COMM="echo ${PREFIX}${CTRL} > $SFILE" eval $COMM $REDIRECT - elif [[ $S = X ]] - then + ;; + X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" eval $COMM $REDIRECT - elif [[ $S = C ]] - then - CPUS=${CMD#?} + ;; + CX*) + CPUS=${CMD#??} + COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE" + eval $COMM $REDIRECT + ;; + C*) CPUS=${CMD#?} COMM="echo $CPUS > $CFILE" eval $COMM $REDIRECT - elif [[ $S = P ]] - then - VAL=${CMD#?} + ;; + P*) VAL=${CMD#?} case $VAL in 0) VAL=member ;; @@ -486,15 +563,17 @@ set_ctrl_state() esac COMM="echo $VAL > $PFILE" eval $COMM $REDIRECT - elif [[ $S = O ]] - then - VAL=${CMD#?} + ;; + O*) VAL=${CMD#?} write_cpu_online $VAL - elif [[ $S = T ]] - then - COMM="echo 0 > $TFILE" + ;; + T*) COMM="echo 0 > $TFILE" eval $COMM $REDIRECT - fi + ;; + *) echo "Unknown command: $CMD" + exit 1 + ;; + esac RET=$? [[ $RET -ne 0 ]] && { [[ -n "$SHOWERR" ]] && { @@ -532,21 +611,18 @@ online_cpus() } # -# Return 1 if the list of effective cpus isn't the same as the initial list. +# Remove all the test cgroup directories # reset_cgroup_states() { echo 0 > $CGROUP2/cgroup.procs online_cpus - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - pause 0.02 - set_ctrl_state . R- - pause 0.01 + rmdir $RESET_LIST > /dev/null 2>&1 } dump_states() { - for DIR in . A1 A1/A2 A1/A2/A3 B1 + for DIR in $CGROUP_LIST do CPUS=$DIR/cpuset.cpus ECPUS=$DIR/cpuset.cpus.effective @@ -565,18 +641,34 @@ dump_states() done } +# +# Set the actual cgroup directory into $CGRP_DIR +# $1 - cgroup name +# +set_cgroup_dir() +{ + CGRP_DIR=$1 + [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3 + [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11 + [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12 + [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21 + [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22 +} + # # Check effective cpus -# $1 - check string, format: :[,:]* +# $1 - check string, format: :[|:]* # check_effective_cpus() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CPUS=$2 + EXPECTED_CPUS=$2 + ACTUAL_CPUS= if [[ $CGRP = X* ]] then CGRP=${CGRP#X} @@ -584,41 +676,39 @@ check_effective_cpus() else FILE=cpuset.cpus.effective fi - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 - [[ -e $CGRP/$FILE ]] || return 1 - [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1 + set_cgroup_dir $CGRP + [[ -e $CGRP_DIR/$FILE ]] || return 1 + ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE) + [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1 done } # # Check cgroup states -# $1 - check string, format: :[,:]* +# $1 - check string, format: :[|:]* # check_cgroup_states() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CGRP_DIR=$CGRP - STATE=$2 + EXPECTED_STATE=$2 FILE= - EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 - [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 + EVAL=$(expr substr $EXPECTED_STATE 2 2) - case $STATE in + set_cgroup_dir $CGRP + case $EXPECTED_STATE in P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; - *) echo "Unknown state: $STATE!" + *) echo "Unknown state: $EXPECTED_STATE!" exit 1 ;; esac - VAL=$(cat $FILE) + ACTUAL_STATE=$(cat $FILE) - case "$VAL" in + case "$ACTUAL_STATE" in member) VAL=0 ;; root) VAL=1 @@ -642,7 +732,7 @@ check_cgroup_states() [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) [[ -n "$DOMS" ]] && - echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE } done return 0 @@ -665,22 +755,22 @@ check_cgroup_states() # check_isolcpus() { - EXPECT_VAL=$1 - ISOLCPUS= + EXPECTED_ISOLCPUS=$1 + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + ISOLCPUS=$(cat $ISCPUS) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/cpuset.cpus.isolated - if [[ $EXPECT_VAL = . ]] + if [[ $EXPECTED_ISOLCPUS = . ]] then - EXPECT_VAL= - EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + EXPECTED_ISOLCPUS= + EXPECTED_SDOMAIN= + elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") - EXPECT_VAL=$1 - EXPECT_VAL2=$2 + set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g") + EXPECTED_ISOLCPUS=$2 + EXPECTED_SDOMAIN=$1 else - EXPECT_VAL2=$EXPECT_VAL + EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS fi # @@ -689,20 +779,21 @@ check_isolcpus() # to make appending those CPUs easier. # [[ -n "$BOOT_ISOLCPUS" ]] && { - EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} - EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS} + EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS} } # # Check cpuset.cpus.isolated cpumask # - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 ISOLCPUS=$(cat $ISCPUS) } - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1 ISOLCPUS= + EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN # # Use the sched domain in debugfs to check isolated CPUs, if available @@ -736,7 +827,7 @@ check_isolcpus() done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] + [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]] } test_fail() @@ -773,6 +864,63 @@ null_isolcpus_check() exit 1 } +# +# Check state transition test result +# $1 - Test number +# $2 - Expected effective CPU values +# $3 - Expected partition states +# $4 - Expected isolated CPUs +# +check_test_results() +{ + _NR=$1 + _ECPUS="$2" + _PSTATES="$3" + _ISOLCPUS="$4" + + [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && { + check_effective_cpus $_ECPUS + [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \ + "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS" + } + + [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && { + check_cgroup_states $_PSTATES + [[ $? -ne 0 ]] && test_fail $_NR states \ + "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE" + } + + # Compare the expected isolated CPUs with the actual ones, + # if available + [[ -n "$_ISOLCPUS" ]] && { + check_isolcpus $_ISOLCPUS + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS} + test_fail $_NR "isolated CPU" \ + "Expect $_ISOLCPUS, get $ISOLCPUS instead" + } + } + reset_cgroup_states + # + # Check to see if effective cpu list changes + # + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + RETRY=0 + while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]] + do + # Wait a bit longer & recheck a few times + pause 0.02 + ((RETRY++)) + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + done + [[ $_NEWLIST != $CPULIST ]] && { + echo "Effective cpus changed to $_NEWLIST after test $_NR!" + exit 1 + } + null_isolcpus_check + [[ $VERBOSE -gt 0 ]] && echo "Test $I done." +} + # # Run cpuset state transition test # $1 - test matrix name @@ -785,6 +933,8 @@ run_state_test() { TEST=$1 CONTROLLER=cpuset + CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" + RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 eval CNT="\${#$TEST[@]}" @@ -812,10 +962,11 @@ run_state_test() STATES=${11} ICPUS=${12} - set_ctrl_state_noerr B1 $OLD_B1 set_ctrl_state_noerr A1 $OLD_A1 set_ctrl_state_noerr A1/A2 $OLD_A2 set_ctrl_state_noerr A1/A2/A3 $OLD_A3 + set_ctrl_state_noerr B1 $OLD_B1 + RETVAL=0 set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) @@ -824,47 +975,79 @@ run_state_test() [[ $RETVAL -ne $RESULT ]] && test_fail $I result - [[ -n "$ECPUS" && "$ECPUS" != . ]] && { - check_effective_cpus $ECPUS - [[ $? -ne 0 ]] && test_fail $I "effective CPU" - } + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" + ((I++)) + done + echo "All $I tests of $TEST PASSED." +} - [[ -n "$STATES" && "$STATES" != . ]] && { - check_cgroup_states $STATES - [[ $? -ne 0 ]] && test_fail $I states - } +# +# Run cpuset remote partition state transition test +# $1 - test matrix name +# +run_remote_state_test() +{ + TEST=$1 + CONTROLLER=cpuset + [[ -d rtest ]] || mkdir rtest + cd rtest + echo +cpuset > cgroup.subtree_control + echo "1-7" > cpuset.cpus + echo "1-7" > cpuset.cpus.exclusive + CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22" + RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2" + I=0 + eval CNT="\${#$TEST[@]}" - # Compare the expected isolated CPUs with the actual ones, - # if available - [[ -n "$ICPUS" ]] && { - check_isolcpus $ICPUS - [[ $? -ne 0 ]] && { - [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} - test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" - } - } - reset_cgroup_states - # - # Check to see if effective cpu list changes - # - NEWLIST=$(cat cpuset.cpus.effective) - RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] - do - # Wait a bit longer & recheck a few times - pause 0.02 - ((RETRY++)) - NEWLIST=$(cat cpuset.cpus.effective) - done - [[ $NEWLIST != $CPULIST ]] && { - echo "Effective cpus changed to $NEWLIST after test $I!" - exit 1 + reset_cgroup_states + console_msg "Running remote partition state transition test ..." + + while [[ $I -lt $CNT ]] + do + echo "Running test $I ..." > $CONSOLE + [[ $VERBOSE -gt 1 ]] && { + echo "" + eval echo \${$TEST[$I]} } - null_isolcpus_check - [[ $VERBOSE -gt 0 ]] && echo "Test $I done." + eval set -- "\${$TEST[$I]}" + OLD_p1=$1 + OLD_p2=$2 + OLD_c11=$3 + OLD_c12=$4 + OLD_c21=$5 + OLD_c22=$6 + NEW_p1=$7 + NEW_p2=$8 + NEW_c11=$9 + NEW_c12=${10} + NEW_c21=${11} + NEW_c22=${12} + ECPUS=${13} + STATES=${14} + ICPUS=${15} + + set_ctrl_state_noerr p1 $OLD_p1 + set_ctrl_state_noerr p2 $OLD_p2 + set_ctrl_state_noerr p1/c11 $OLD_c11 + set_ctrl_state_noerr p1/c12 $OLD_c12 + set_ctrl_state_noerr p2/c21 $OLD_c21 + set_ctrl_state_noerr p2/c22 $OLD_c22 + + RETVAL=0 + set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?)) + set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?)) + set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?)) + set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?)) + set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?)) + set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?)) + + [[ $RETVAL -ne 0 ]] && test_fail $I result + + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" ((I++)) done + cd .. + rmdir rtest echo "All $I tests of $TEST PASSED." } @@ -932,6 +1115,7 @@ test_isolated() echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 null_isolcpus_check + pause 0.05 } # @@ -997,10 +1181,13 @@ test_inotify() else echo "Inotify test PASSED" fi + echo member > cpuset.cpus.partition + echo "" > cpuset.cpus } trap cleanup 0 2 3 6 run_state_test TEST_MATRIX +run_remote_state_test REMOTE_TEST_MATRIX test_isolated test_inotify echo "All tests PASSED." diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 137b2364a08207..fe3c728cd6be5a 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -89,14 +89,14 @@ FIXTURE_TEARDOWN(coredump) fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); } -TEST_F(coredump, stackdump) +TEST_F_TIMEOUT(coredump, stackdump, 120) { struct sigaction action = {}; unsigned long long stack; char *test_dir, *line; size_t line_length; char buf[PATH_MAX]; - int ret, i; + int ret, i, status; FILE *file; pid_t pid; @@ -129,6 +129,10 @@ TEST_F(coredump, stackdump) /* * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file */ + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + for (i = 0; i < 10; ++i) { file = fopen(STACKDUMP_FILE, "r"); if (file) @@ -138,10 +142,12 @@ TEST_F(coredump, stackdump) ASSERT_NE(file, NULL); /* Step 4: Make sure all stack pointer values are non-zero */ + line = NULL; for (i = 0; -1 != getline(&line, &line_length, file); ++i) { stack = strtoull(line, NULL, 10); ASSERT_NE(stack, 0); } + free(line); ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index e350c521b46750..3aad9db921b533 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -244,9 +244,10 @@ do_suspend() printf "Failed to suspend using RTC wake alarm\n" return 1 fi + else + echo $filename > $SYSFS/power/state fi - echo $filename > $SYSFS/power/state printf "Came out of $1\n" printf "Do basic tests after finishing $1 to verify cpufreq state\n\n" diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh new file mode 120000 index 00000000000000..d16a65e7595d9c --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 8b7f6acad15f13..7c90a040ce45ad 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -6,7 +6,7 @@ from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv -from lib.py import defer, ethtool, ip +from lib.py import defer, ethtool, ip, random def _get_hds_mode(cfg, netnl) -> str: @@ -109,6 +109,36 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) +def set_hds_thresh_random(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + + if rings['hds-thresh-max'] < 2: + raise KsftSkipEx('hds-thresh-max is too small') + elif rings['hds-thresh-max'] == 2: + hds_thresh = 1 + else: + while True: + hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1) + if hds_thresh != rings['hds-thresh']: + break + + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(hds_thresh, rings['hds-thresh']) + def set_hds_thresh_max(cfg, netnl) -> None: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) @@ -243,6 +273,7 @@ def main() -> None: get_hds_thresh, set_hds_disable, set_hds_enable, + set_hds_thresh_random, set_hds_thresh_zero, set_hds_thresh_max, set_hds_thresh_gt, diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9f271ab6ec04e6..6a0378e06cabd2 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -35,6 +35,7 @@ def test_zcrx(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -48,6 +49,7 @@ def test_zcrx(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -59,6 +61,7 @@ def test_zcrx_oneshot(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -72,6 +75,7 @@ def test_zcrx_oneshot(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def main() -> None: diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 2bf14ac2b8c624..9d48004ff1a178 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -431,6 +431,22 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) return 0; } +static struct netdev_queue_id *create_queues(void) +{ + struct netdev_queue_id *queues; + size_t i = 0; + + queues = calloc(num_queues, sizeof(*queues)); + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + return queues; +} + int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; @@ -448,7 +464,6 @@ int do_server(struct memory_buffer *mem) char buffer[256]; int socket_fd; int client_fd; - size_t i = 0; int ret; ret = parse_address(server_ip, atoi(port), &server_sin); @@ -471,16 +486,7 @@ int do_server(struct memory_buffer *mem) sleep(1); - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); tmp_mem = malloc(mem->size); @@ -545,7 +551,6 @@ int do_server(struct memory_buffer *mem) goto cleanup; } - i++; for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && @@ -630,10 +635,8 @@ int do_server(struct memory_buffer *mem) void run_devmem_tests(void) { - struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; - size_t i = 0; mem = provider->alloc(getpagesize() * NUM_PAGES); @@ -641,38 +644,24 @@ void run_devmem_tests(void) if (configure_rss()) error(1, 0, "rss error\n"); - queues = calloc(num_queues, sizeof(*queues)); - if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, + calloc(num_queues, sizeof(struct netdev_queue_id)), + num_queues, &ys)) error(1, 0, "Binding empty queues array should have failed\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - if (configure_headersplit(0)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Configure dmabuf with header split off should have failed\n"); if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index e1ecb92f79d9b4..3370827409aa02 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -39,7 +39,7 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): port = rand_port() listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" - with bkg(listen_cmd, host=cfg.remote) as nc: + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) if ipver == "4": @@ -216,7 +216,7 @@ def main() -> None: ("", "6", "tx-tcp6-segmentation", None), ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), - ("gre", "4", "tx-gre-segmentation", ("ipgre", False, "")), + ("gre", "4", "tx-gre-segmentation", ("gre", False, "")), ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), ) diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index bed748dde4b066..8972f42dfe03ec 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -266,18 +266,14 @@ run_test() "${base_time}" \ "${CYCLE_TIME_NS}" \ "${SHIFT_TIME_NS}" \ + "${GATE_DURATION_NS}" \ "${NUM_PKTS}" \ "${STREAM_VID}" \ "${STREAM_PRIO}" \ "" \ "${isochron_dat}" - # Count all received packets by looking at the non-zero RX timestamps - received=$(isochron report \ - --input-file "${isochron_dat}" \ - --printf-format "%u\n" --printf-args "R" | \ - grep -w -v '0' | wc -l) - + received=$(isochron_report_num_received "${isochron_dat}") if [ "${received}" = "${expected}" ]; then RET=0 else diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 4b682286606617..af8df2313a3b4b 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -9,11 +9,11 @@ from lib.py import bkg, cmd, wait_port_listen, rand_port from lib.py import defer, ethtool, ip -remote_ifname="" no_sleep=False def _test_v4(cfg) -> None: - cfg.require_ipver("4") + if not cfg.addr_v["4"]: + return cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) @@ -21,7 +21,8 @@ def _test_v4(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: - cfg.require_ipver("6") + if not cfg.addr_v["6"]: + return cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) @@ -57,7 +58,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: def _set_xdp_generic_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +67,8 @@ def _set_xdp_generic_sb_on(cfg) -> None: def _set_xdp_generic_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") @@ -76,7 +77,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: def _set_xdp_native_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] @@ -93,8 +94,8 @@ def _set_xdp_native_sb_on(cfg) -> None: def _set_xdp_native_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -112,18 +113,15 @@ def _set_xdp_offload_on(cfg) -> None: except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') defer(ip, f"link set dev {cfg.ifname} xdpoffload off") - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: time.sleep(10) def get_interface_info(cfg) -> None: - global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout - remote_ifname = remote_info.rstrip('\n') - if remote_ifname == "": + if cfg.remote_ifname == "": raise KsftFailEx('Can not get remote interface') local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": @@ -136,15 +134,25 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") -def test_default(cfg, netnl) -> None: _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) - _test_v6(cfg) _test_tcp(cfg) _set_offload_checksum(cfg, netnl, "on") _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") _test_v6(cfg) _test_tcp(cfg) @@ -202,7 +210,8 @@ def main() -> None: with NetDrvEpEnv(__file__) as cfg: get_interface_info(cfg) set_interface_init(cfg) - ksft_run([test_default, + ksft_run([test_default_v4, + test_default_v6, test_xdp_generic_sb, test_xdp_generic_mb, test_xdp_native_sb, diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 4a2d5c454fd15c..59a71f22fb1180 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata, static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + FIXTURE(fanotify) { - int fan_fd; + int fan_fd[NUM_FAN_FDS]; char buf[256]; unsigned int rem; void *next; @@ -61,7 +69,7 @@ FIXTURE(fanotify) { FIXTURE_SETUP(fanotify) { - int ret; + int i, ret; ASSERT_EQ(unshare(CLONE_NEWNS), 0); @@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify) self->root_id = get_mnt_id(_metadata, "/"); ASSERT_NE(self->root_id, 0); - self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); - ASSERT_GE(self->fan_fd, 0); - - ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, - FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); - ASSERT_EQ(ret, 0); + for (i = 0; i < NUM_FAN_FDS; i++) { + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } self->rem = 0; } FIXTURE_TEARDOWN(fanotify) { + int i; + ASSERT_EQ(self->rem, 0); - close(self->fan_fd); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); ASSERT_EQ(fchdir(self->orig_root), 0); @@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata, unsigned int thislen; if (!self->rem) { - ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); - ASSERT_GT(len, 0); + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } self->rem = len; self->next = (void *) self->buf; diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc index 6b94b678741a4c..f656bccb1a1490 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc @@ -7,11 +7,32 @@ MAX_ARGS=128 EXCEED_ARGS=$((MAX_ARGS + 1)) +# bash and dash evaluate variables differently. +# dash will evaluate '\\' every time it is read whereas bash does not. +# +# TEST_STRING="$TEST_STRING \\$i" +# echo $TEST_STRING +# +# With i=123 +# On bash, that will print "\123" +# but on dash, that will print the escape sequence of \123 as the \ will +# be interpreted again in the echo. +# +# Set a variable "bs" to save a double backslash, then echo that +# to "ts" to see if $ts changed or not. If it changed, it's dash, +# if not, it's bash, and then bs can equal a single backslash. +bs='\\' +ts=`echo $bs` +if [ "$ts" = '\\' ]; then + # this is bash + bs='\' +fi + check_max_args() { # event_header TEST_STRING=$1 # Acceptable for i in `seq 1 $MAX_ARGS`; do - TEST_STRING="$TEST_STRING \\$i" + TEST_STRING="$TEST_STRING $bs$i" done echo "$TEST_STRING" >> dynamic_events echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 118247b8dd84d8..c62165fabd0ce1 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then exit_fail fi +# Check strings too +if [ -f events/syscalls/sys_enter_openat/filter ]; then + DIRNAME=`basename $TMPDIR` + echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_openat/enable + echo 1 > tracing_on + ls /bin/sh + nocnt=`grep openat trace | wc -l` + ls $TMPDIR + echo 0 > tracing_on + hitcnt=`grep openat trace | wc -l`; + echo 0 > events/syscalls/sys_enter_openat/enable + if [ $nocnt -gt 0 ]; then + exit_fail + fi + if [ $hitcnt -eq 0 ]; then + exit_fail + fi +fi + reset_events_filter exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc new file mode 100644 index 00000000000000..b6d6a312ead5ae --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +function_count() { + search=$1 + vsearch=$2 + + if [ -z "$search" ]; then + cat enabled_functions | wc -l + elif [ -z "$vsearch" ]; then + grep $search enabled_functions | wc -l + else + grep $search enabled_functions | grep $vsearch| wc -l + fi +} + +set_fgraph() { + instance=$1 + filter="$2" + notrace="$3" + + echo "$filter" > $instance/set_ftrace_filter + echo "$notrace" > $instance/set_ftrace_notrace + echo function_graph > $instance/current_tracer +} + +check_functions() { + orig_cnt=$1 + test=$2 + + cnt=`function_count $test` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +check_cnt() { + orig_cnt=$1 + search=$2 + vsearch=$3 + + cnt=`function_count $search $vsearch` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +reset_graph() { + instance=$1 + echo nop > $instance/current_tracer +} + +# get any functions that were enabled before the test +total_cnt=`function_count` +sched_cnt=`function_count sched` +lock_cnt=`function_count lock` +time_cnt=`function_count time` +clock_cnt=`function_count clock` +locks_clock_cnt=`function_count locks clock` +clock_locks_cnt=`function_count clock locks` + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# Make sure "time" isn't listed +check_functions $time_cnt 'time' +instance1_cnt=`function_count` + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' +instance1_2_cnt=`function_count` + +# Turn off the first instance +reset_graph $INSTANCE1 + +# The second instance doesn't trace "clock" functions +check_functions $clock_cnt 'clock' +instance2_cnt=`function_count` + +# Start from a clean slate +reset_graph $INSTANCE2 +check_functions $total_cnt + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' + +# This should match the last time instance 2 was by itself +cnt=`function_count` +if [ $instance2_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "clock" functions +check_functions $clock_cnt 'clock' + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# This should match the last time both instances were enabled +cnt=`function_count` +if [ $instance1_2_cnt -ne $cnt ]; then + fail +fi + +# Turn off the second instance +reset_graph $INSTANCE2 + +# This should match the last time instance 1 was by itself +cnt=`function_count` +if [ $instance1_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "time" functions +check_functions $time_cnt 'time' + +# Start from a clean slate +reset_graph $INSTANCE1 +check_functions $total_cnt + +# Enable all functions but those that have "locks" +set_fgraph $INSTANCE1 '' '*locks*' + +# Enable all functions but those that have "clock" +set_fgraph $INSTANCE2 '' '*clock*' + +# If a function has "locks" it should not have "clock" +check_cnt $locks_clock_cnt locks clock + +# If a function has "clock" it should not have "locks" +check_cnt $clock_locks_cnt clock locks + +reset_graph $INSTANCE1 +reset_graph $INSTANCE2 + +do_reset + +exit 0 diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 7d7a6a06cdb75b..2d8230da906429 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { - ksft_test_result_pass("futex_waitv returned: %d %s\n", + ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 45b5570441ce81..b1f40857307da6 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f773f8f9924945..f62b0a5aba35a0 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test + # Compiled test targets -TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test @@ -119,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test -TEST_GEN_PROGS_x86 += demand_paging_test -TEST_GEN_PROGS_x86 += dirty_log_test TEST_GEN_PROGS_x86 += dirty_log_perf_test TEST_GEN_PROGS_x86 += guest_memfd_test -TEST_GEN_PROGS_x86 += guest_print_test TEST_GEN_PROGS_x86 += hardware_disable_test -TEST_GEN_PROGS_x86 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86 += kvm_page_table_test TEST_GEN_PROGS_x86 += memslot_modification_stress_test TEST_GEN_PROGS_x86 += memslot_perf_test TEST_GEN_PROGS_x86 += mmu_stress_test TEST_GEN_PROGS_x86 += rseq_test -TEST_GEN_PROGS_x86 += set_memory_region_test TEST_GEN_PROGS_x86 += steal_time -TEST_GEN_PROGS_x86 += kvm_binary_stats_test TEST_GEN_PROGS_x86 += system_counter_offset_test TEST_GEN_PROGS_x86 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions @@ -158,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test -TEST_GEN_PROGS_arm64 += demand_paging_test -TEST_GEN_PROGS_arm64 += dirty_log_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test -TEST_GEN_PROGS_arm64 += guest_print_test TEST_GEN_PROGS_arm64 += get-reg-list -TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus -TEST_GEN_PROGS_arm64 += kvm_page_table_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test TEST_GEN_PROGS_arm64 += rseq_test -TEST_GEN_PROGS_arm64 += set_memory_region_test TEST_GEN_PROGS_arm64 += steal_time -TEST_GEN_PROGS_arm64 += kvm_binary_stats_test -TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop TEST_GEN_PROGS_s390 += s390/resets TEST_GEN_PROGS_s390 += s390/sync_regs_test TEST_GEN_PROGS_s390 += s390/tprot @@ -182,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test TEST_GEN_PROGS_s390 += s390/shared_zeropage_test TEST_GEN_PROGS_s390 += s390/ucontrol_test -TEST_GEN_PROGS_s390 += demand_paging_test -TEST_GEN_PROGS_s390 += dirty_log_test -TEST_GEN_PROGS_s390 += guest_print_test -TEST_GEN_PROGS_s390 += kvm_create_max_vcpus -TEST_GEN_PROGS_s390 += kvm_page_table_test TEST_GEN_PROGS_s390 += rseq_test -TEST_GEN_PROGS_s390 += set_memory_region_test -TEST_GEN_PROGS_s390 += kvm_binary_stats_test +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += steal_time SPLIT_TESTS += arch_timer diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908c8..dc6559dad9d863 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -199,7 +199,7 @@ static bool guest_set_ha(void) if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 322b9d3b012559..57708de2075df7 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -129,10 +129,10 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), REG_FTR_END, }; diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd39..b0fc0f945766fe 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -62,6 +62,67 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -102,12 +163,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, uint32_t *ipa16k, uint32_t *ipa64k); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3aa3755f35f..9d69904cb6084a 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) uint64_t pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; @@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) uint64_t pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; @@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -167,7 +171,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -293,20 +301,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -319,35 +327,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 279ad8946040cb..815bc45dd8dc68 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2019,9 +2019,8 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), }; /* diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index e5898678bfab48..1375fca80bcdbe 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -196,25 +196,27 @@ static void calc_min_max_cpu(void) static void help(const char *name) { puts(""); - printf("usage: %s [-h] [-u]\n", name); + printf("usage: %s [-h] [-u] [-l latency]\n", name); printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); puts(""); exit(0); } int main(int argc, char *argv[]) { + int r, i, snapshot, opt, fd = -1, latency = -1; bool skip_sanity_check = false; - int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - int opt; - while ((opt = getopt(argc, argv, "hu")) != -1) { + while ((opt = getopt(argc, argv, "hl:u")) != -1) { switch (opt) { case 'u': skip_sanity_check = true; + case 'l': + latency = atoi_paranoid(optarg); break; case 'h': default: @@ -243,6 +245,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -278,6 +294,9 @@ int main(int argc, char *argv[]) "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for @@ -293,8 +312,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n\n" " Try disabling deep sleep states to reduce CPU wakeup latency,\n" - " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" - " or run with -u to disable this sanity check.", i); + " e.g. via cpuidle.off=1 or via -l , or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1b3..390ae2d874932b 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -7,6 +7,7 @@ #include "kvm_util.h" #include "processor.h" +#include "kselftest.h" #define CPUID_MWAIT (1u << 3) @@ -14,6 +15,8 @@ enum monitor_mwait_testcases { MWAIT_QUIRK_DISABLED = BIT(0), MISC_ENABLES_QUIRK_DISABLED = BIT(1), MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, }; /* @@ -35,11 +38,19 @@ do { \ testcase, vector); \ } while (0) -static void guest_monitor_wait(int testcase) +static void guest_monitor_wait(void *arg) { + int testcase = (int) (long) arg; u8 vector; - GUEST_SYNC(testcase); + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); /* * Arbitrarily MONITOR this function, SVM performs fault checks before @@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase) vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); GUEST_DONE(); } @@ -74,56 +72,64 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; int testcase; + char test[80]; - TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; + /* Detected in vcpu_run */ + break; case UCALL_DONE: - goto done; + ksft_test_result_pass("%s\n", test); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; + break; } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + kvm_vm_free(vm); } + ksft_finished(); -done: - kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b9054086a0c929..18a6014920b5f8 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -300,15 +300,22 @@ static int audit_match_record(int audit_fd, const __u16 type, return err; } -static int __maybe_unused matches_log_domain_allocated(int audit_fd, +static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, __u64 *domain_id) { - return audit_match_record( - audit_fd, AUDIT_LANDLOCK_DOMAIN, - REGEX_LANDLOCK_PREFIX - " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+" - " exe=\"[^\"]\\+\" comm=\".*_test\"$", - domain_id); + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=allocated mode=enforcing pid=%d uid=[0-9]\\+" + " exe=\"[^\"]\\+\" comm=\".*_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, pid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); } static int __maybe_unused matches_log_domain_deallocated( diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index a0643070c403de..cfc571afd0eb81 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,6 @@ FIXTURE(audit) { struct audit_filter audit_filter; int audit_fd; - __u64(*domain_stack)[16]; }; FIXTURE_SETUP(audit) @@ -60,18 +60,10 @@ FIXTURE_SETUP(audit) TH_LOG("Failed to initialize audit: %s", error_msg); } clear_cap(_metadata, CAP_AUDIT_CONTROL); - - self->domain_stack = mmap(NULL, sizeof(*self->domain_stack), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - ASSERT_NE(MAP_FAILED, self->domain_stack); - memset(self->domain_stack, 0, sizeof(*self->domain_stack)); } FIXTURE_TEARDOWN(audit) { - EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack))); - set_cap(_metadata, CAP_AUDIT_CONTROL); EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); clear_cap(_metadata, CAP_AUDIT_CONTROL); @@ -83,9 +75,15 @@ TEST_F(audit, layers) .scoped = LANDLOCK_SCOPE_SIGNAL, }; int status, ruleset_fd, i; + __u64(*domain_stack)[16]; __u64 prev_dom = 3; pid_t child; + domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, domain_stack); + memset(domain_stack, 0, sizeof(*domain_stack)); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); @@ -94,7 +92,7 @@ TEST_F(audit, layers) child = fork(); ASSERT_LE(0, child); if (child == 0) { - for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) { + for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) { __u64 denial_dom = 1; __u64 allocated_dom = 2; @@ -107,7 +105,8 @@ TEST_F(audit, layers) matches_log_signal(_metadata, self->audit_fd, getppid(), &denial_dom)); EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(denial_dom, 1); EXPECT_NE(denial_dom, 0); EXPECT_EQ(denial_dom, allocated_dom); @@ -115,7 +114,7 @@ TEST_F(audit, layers) /* Checks that the new domain is younger than the previous one. */ EXPECT_GT(allocated_dom, prev_dom); prev_dom = allocated_dom; - (*self->domain_stack)[i] = allocated_dom; + (*domain_stack)[i] = allocated_dom; } /* Checks that we reached the maximum number of layers. */ @@ -142,23 +141,143 @@ TEST_F(audit, layers) /* Purges log from deallocated domains. */ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); - for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) { + for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { __u64 deallocated_dom = 2; EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, &deallocated_dom)); - EXPECT_EQ((*self->domain_stack)[i], deallocated_dom) + EXPECT_EQ((*domain_stack)[i], deallocated_dom) { TH_LOG("Failed to match domain %llx (#%d)", - (*self->domain_stack)[i], i); + (*domain_stack)[i], i); } } + EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, sizeof(audit_tv_default))); - EXPECT_EQ(0, close(ruleset_fd)); } +struct thread_data { + pid_t parent_pid; + int ruleset_fd, pipe_child, pipe_parent; +}; + +static void *thread_audit_test(void *arg) +{ + const struct thread_data *data = (struct thread_data *)arg; + uintptr_t err = 0; + char buffer; + + /* TGID and TID are different for a second thread. */ + if (getpid() == gettid()) { + err = 1; + goto out; + } + + if (landlock_restrict_self(data->ruleset_fd, 0)) { + err = 2; + goto out; + } + + if (close(data->ruleset_fd)) { + err = 3; + goto out; + } + + /* Creates a denial to get the domain ID. */ + if (kill(data->parent_pid, 0) != -1) { + err = 4; + goto out; + } + + if (EPERM != errno) { + err = 5; + goto out; + } + + /* Signals the parent to read denial logs. */ + if (write(data->pipe_child, ".", 1) != 1) { + err = 6; + goto out; + } + + /* Waits for the parent to update audit filters. */ + if (read(data->pipe_parent, &buffer, 1) != 1) { + err = 7; + goto out; + } + +out: + close(data->pipe_child); + close(data->pipe_parent); + return (void *)err; +} + +/* Checks that the PID tied to a domain is not a TID but the TGID. */ +TEST_F(audit, thread) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + __u64 denial_dom = 1; + __u64 allocated_dom = 2; + __u64 deallocated_dom = 3; + pthread_t thread; + int pipe_child[2], pipe_parent[2]; + char buffer; + struct thread_data child_data; + + child_data.parent_pid = getppid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + child_data.pipe_child = pipe_child[1]; + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + child_data.pipe_parent = pipe_parent[0]; + child_data.ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, child_data.ruleset_fd); + + /* TGID and TID are the same for the initial thread . */ + EXPECT_EQ(getpid(), gettid()); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test, + &child_data)); + + /* Waits for the child to generate a denial. */ + ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); + EXPECT_EQ(0, close(pipe_child[0])); + + /* Matches the signal log to get the domain ID. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + child_data.parent_pid, &denial_dom)); + EXPECT_NE(denial_dom, 1); + EXPECT_NE(denial_dom, 0); + + EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(), + &allocated_dom)); + EXPECT_EQ(denial_dom, allocated_dom); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + /* Signals the thread to exit, which will generate a domain deallocation. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, pthread_join(thread, NULL)); + + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + &deallocated_dom)); + EXPECT_EQ(denial_dom, deallocated_dom); + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, sizeof(audit_tv_default))); +} + FIXTURE(audit_flags) { struct audit_filter audit_filter; @@ -273,7 +392,8 @@ TEST_F(audit_flags, signal) /* Checks domain information records. */ EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(*self->domain_id, 1); EXPECT_NE(*self->domain_id, 0); EXPECT_EQ(*self->domain_id, allocated_dom); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index f819011a87986e..73729382d40f82 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled) EXPECT_EQ(EXDEV, errno); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d1)); - EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL)); + EXPECT_EQ(0, + matches_log_domain_allocated(self->audit_fd, getpid(), NULL)); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d3)); diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index 81a1f64a22e860..377b3699ff3129 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -1,2 +1,3 @@ CONFIG_TEST_BITMAP=m +CONFIG_PRIME_NUMBERS=m CONFIG_TEST_BITOPS=m diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e949a43a614508..17ed3e9917ca17 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -261,9 +261,6 @@ TEST(check_file_mmap) TH_LOG("No read-ahead pages found in memory"); } - EXPECT_LT(i, vec_size) { - TH_LOG("Read-ahead pages reached the end of the file"); - } /* * End of the readahead window. The rest of the pages shouldn't * be in memory. @@ -286,8 +283,7 @@ TEST(check_file_mmap) /* * Test mincore() behavior on a page backed by a tmpfs file. This test - * performs the same steps as the previous one. However, we don't expect - * any readahead in this case. + * performs the same steps as the previous one. */ TEST(check_tmpfs_mmap) { @@ -298,7 +294,6 @@ TEST(check_tmpfs_mmap) int page_size; int fd; int i; - int ra_pages = 0; page_size = sysconf(_SC_PAGESIZE); vec_size = FILE_SIZE / page_size; @@ -341,8 +336,7 @@ TEST(check_tmpfs_mmap) } /* - * Touch a page in the middle of the mapping. We expect only - * that page to be fetched into memory. + * Touch a page in the middle of the mapping. */ addr[FILE_SIZE / 2] = 1; retval = mincore(addr, FILE_SIZE, vec); @@ -351,15 +345,6 @@ TEST(check_tmpfs_mmap) TH_LOG("Page not found in memory after use"); } - i = FILE_SIZE / 2 / page_size + 1; - while (i < vec_size && vec[i]) { - ra_pages++; - i++; - } - ASSERT_EQ(ra_pages, 0) { - TH_LOG("Read-ahead pages found in memory"); - } - munmap(addr, FILE_SIZE); close(fd); free(vec); diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 67df7b47087f03..e1fe16bcbbe880 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 2c3a0eb6b22d31..9bc4591c7b1699 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int compaction_index = 0; char nr_hugepages[20] = {0}; char init_nr_hugepages[24] = {0}; + char target_nr_hugepages[24] = {0}; + int slen; snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), "%lu", initial_nr_hugepages); @@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, goto out; } - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); + /* + * Request huge pages for about half of the free memory. The Kernel + * will allocate as much as it can, and we expect it will get at least 1/3 + */ + nr_hugepages_ul = mem_free / hugepage_size / 2; + snprintf(target_nr_hugepages, sizeof(target_nr_hugepages), + "%lu", nr_hugepages_ul); + + slen = strlen(target_nr_hugepages); + if (write(fd, target_nr_hugepages, slen) != slen) { + ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n", + nr_hugepages_ul, strerror(errno)); goto close_fd; } diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index f0cb14ea860842..b6cfe0a4b7dfd4 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -293,7 +293,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, .iov_base = mem, .iov_len = size, }; - ssize_t cur, total, transferred; + ssize_t cur, total, transferred = 0; struct comm_pipes comm_pipes; char *old, *new; int ret, fds[2]; diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index b3d0e277109616..eba43ead13ae82 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions) self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); setup_sighandler(); - if (variant->backing == ANON_BACKED) + switch (variant->backing) { + case ANON_BACKED: return; - - self->fd = open_file( - variant->backing == SHMEM_BACKED ? "/tmp/" : "", - self->path); + case LOCAL_FILE_BACKED: + self->fd = open_file("", self->path); + break; + case SHMEM_BACKED: + self->fd = memfd_create(self->path, 0); + break; + } /* We truncate file to at least 100 pages, tests can modify as needed. */ ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); @@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file) char *ptr; int i; - if (variant->backing == ANON_BACKED) + if (variant->backing != LOCAL_FILE_BACKED) SKIP(return, "Read-only test specific to file-backed"); /* Map shared so we can populate with pattern, populate it, unmap. */ diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc222b..0b0d4ba1af2771 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 1bad310d282ad6..17bf2d1b0192e0 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,6 +3,8 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H +#include + #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 @@ -102,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) return; } +#define REPEAT_8(s) s s s s s s s s +#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \ + REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) +#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \ + REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) +#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \ + REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) +#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \ + REPEAT_4096(s) REPEAT_4096(s) + /* 4-byte instructions * 16384 = 64K page */ -#define __page_o_noops() asm(".rept 16384 ; nop; .endr") +#define __page_o_noops() asm(REPEAT_16384("nop\n")) static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c index ca4ad0d44ab2e9..255b332f7a08b2 100644 --- a/tools/testing/selftests/mm/pkey_util.c +++ b/tools/testing/selftests/mm/pkey_util.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#define __SANE_USERSPACE_TYPES__ #include #include diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 679542f565a484..532bb732bc6dd3 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -39,6 +39,7 @@ scm_rights sk_bind_sendto_listen sk_connect_zero_addr sk_so_peek_off +skf_net_off socket so_incoming_cpu so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 6d718b478ed83f..70a38f485d4d1c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += test_so_rcv.sh @@ -106,6 +107,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh +TEST_PROGS += skf_net_off.sh +TEST_GEN_FILES += skf_net_off # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index b866bab1d92a1c..c7cea556b41694 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -359,6 +359,23 @@ fib_rule6_test() "$getnomatch" "iif flowlabel masked redirect to table" \ "iif flowlabel masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule6_vrf_test() @@ -635,6 +652,23 @@ fib_rule4_test() "$getnomatch" "iif dscp masked redirect to table" \ "iif dscp masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule4_vrf_test() diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 90f8a244ea9015..e59fba366a0a67 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -194,6 +194,100 @@ other_tpid() tc qdisc del dev $h2 clsact } +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 00000000000000..8992aeabfe0b43 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a993..08c044ff6689d4 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -182,6 +185,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +216,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" @@ -247,3 +255,21 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 00000000000000..5b34f6e1f8314a --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4f55477ffe0877..e7a75341f0f323 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -206,9 +206,8 @@ chk_dump_one() local token local msg - ss_token="$(ss -inmHMN $ns | grep 'token:' |\ - head -n 1 |\ - sed 's/.*token:\([0-9a-f]*\).*/\1/')" + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ awk -F':[ \t]+' '/^token/ {print $2}')" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 13a3b68181ee14..befa66f5a366bb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1441,6 +1441,15 @@ chk_join_nr() fi fi + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "synack HMAC" + fail_test "got $count JOIN[s] synack HMAC failure expected 0" + fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then rc=${KSFT_SKIP} @@ -1450,6 +1459,15 @@ chk_join_nr() fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "ack HMAC" + fail_test "got $count JOIN[s] ack HMAC failure expected 0" + fi + print_results "join Rx" ${rc} join_syn_tx="${join_syn_tx:-${syn_nr}}" \ diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 47088b00539042..1f5979c1510c3c 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -387,6 +387,25 @@ race_repeat 0 perf_duration 0 " + +TYPE_avx2_mismatch=" +display avx2 false match +type_spec inet_proto . ipv6_addr +chain_spec meta l4proto . ip6 daddr +dst proto addr6 +src +start 1 +count 1 +src_delta 1 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1629,6 +1648,24 @@ test_bug_net_port_proto_match() { nft flush ruleset } +test_bug_avx2_mismatch() +{ + setup veth send_"${proto}" set || return ${ksft_skip} + + local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001" + local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001" + + nft "add element inet filter test { icmpv6 . $a1 }" + + dst_addr6="$a2" + send_icmp6 + + if [ "$(count_packets)" -gt "0" ]; then + err "False match for $a2" + return 1 + fi +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c new file mode 100644 index 00000000000000..1fdf61d6cd7fe9 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Open a tun device. + * + * [modifications: use IFF_NAPI_FRAGS, add sk filter] + * + * Expects the device to have been configured previously, e.g.: + * sudo ip tuntap add name tap1 mode tap + * sudo ip link set tap1 up + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad + * + * And to avoid premature pskb_may_pull: + * + * sudo ethtool -K tap1 gro off + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool cfg_do_filter; +static bool cfg_do_frags; +static int cfg_dst_port = 8000; +static char *cfg_ifname; + +static int tun_open(const char *tun_name) +{ + struct ifreq ifr = {0}; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) + error(1, errno, "open /dev/net/tun"); + + ifr.ifr_flags = IFF_TAP; + if (cfg_do_frags) + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + error(1, ret, "ioctl TUNSETIFF"); + + return fd; +} + +static void sk_set_filter(int fd) +{ + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); + + /* Filter UDP packets with destination port cfg_dst_port */ + struct sock_filter filter_code[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + + struct sock_fprog filter = { + sizeof(filter_code) / sizeof(filter_code[0]), + filter_code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) + error(1, errno, "setsockopt attach filter"); +} + +static int raw_open(void) +{ + int fd; + + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); + if (fd == -1) + error(1, errno, "socket raw (udp)"); + + if (cfg_do_filter) + sk_set_filter(fd); + + return fd; +} + +static void tun_write(int fd) +{ + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; + struct tun_pi pi = {0}; + struct ipv6hdr ip6h = {0}; + struct udphdr uh = {0}; + struct ethhdr eth = {0}; + uint32_t payload; + struct iovec iov[5]; + int ret; + + pi.proto = htons(ETH_P_IPV6); + + memcpy(eth.h_source, eth_src, sizeof(eth_src)); + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); + eth.h_proto = htons(ETH_P_IPV6); + + ip6h.version = 6; + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); + ip6h.nexthdr = IPPROTO_UDP; + ip6h.hop_limit = 8; + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) + error(1, errno, "inet_pton src"); + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) + error(1, errno, "inet_pton src"); + + uh.source = htons(8000); + uh.dest = htons(cfg_dst_port); + uh.len = ip6h.payload_len; + uh.check = 0; + + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ + + iov[0].iov_base = π + iov[0].iov_len = sizeof(pi); + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + iov[2].iov_base = &ip6h; + iov[2].iov_len = sizeof(ip6h); + iov[3].iov_base = &uh; + iov[3].iov_len = sizeof(uh); + iov[4].iov_base = &payload; + iov[4].iov_len = sizeof(payload); + + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); + if (ret <= 0) + error(1, errno, "writev"); +} + +static void raw_read(int fd) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct msghdr msg = {0}; + struct iovec iov[2]; + struct udphdr uh; + uint32_t payload[2]; + int ret; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcvtimeo udp"); + + iov[0].iov_base = &uh; + iov[0].iov_len = sizeof(uh); + + iov[1].iov_base = payload; + iov[1].iov_len = sizeof(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + ret = recvmsg(fd, &msg, 0); + if (ret <= 0) + error(1, errno, "read raw"); + if (ret != sizeof(uh) + sizeof(payload[0])) + error(1, errno, "read raw: len=%d\n", ret); + + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "fFi:")) != -1) { + switch (c) { + case 'f': + cfg_do_filter = true; + printf("bpf filter enabled\n"); + break; + case 'F': + cfg_do_frags = true; + printf("napi frags mode enabled\n"); + break; + case 'i': + cfg_ifname = optarg; + break; + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!cfg_ifname) + error(1, 0, "must specify tap interface name (-i)"); +} + +int main(int argc, char **argv) +{ + int fdt, fdr; + + parse_opts(argc, argv); + + fdr = raw_open(); + fdt = tun_open(cfg_ifname); + + tun_write(fdt); + raw_read(fdr); + + if (close(fdt)) + error(1, errno, "close tun"); + if (close(fdr)) + error(1, errno, "close udp"); + + fprintf(stderr, "OK\n"); + return 0; +} + diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh new file mode 100755 index 00000000000000..5da5066fb46538 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns-$(mktemp -u XXXXXX)" + +cleanup() { + ip netns del $NS +} + +ip netns add $NS +trap cleanup EXIT + +ip -netns $NS link set lo up +ip -netns $NS tuntap add name tap1 mode tap +ip -netns $NS link set tap1 up +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad +ip netns exec $NS ethtool -K tap1 gro off + +# disable early demux, else udp_v6_early_demux pulls udp header into linear +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 + +echo "no filter" +ip netns exec $NS ./skf_net_off -i tap1 + +echo "filter, linear skb (-f)" +ip netns exec $NS ./skf_net_off -i tap1 -f + +echo "filter, fragmented skb (-f) (-F)" +ip netns exec $NS ./skf_net_off -i tap1 -f -F diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9a85f93c33d86c..5ded3b3a7538a6 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1753,6 +1753,42 @@ TEST_F(tls_basic, rekey_tx) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls_basic, disconnect) +{ + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + struct tls_crypto_info_keys key; + struct sockaddr_in addr; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &key, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); + ASSERT_EQ(ret, 0); + + /* Pre-queue the data so that setsockopt parses it but doesn't + * dequeue it from the TCP socket. recvmsg would dequeue. + */ + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); + ASSERT_EQ(ret, 0); + + addr.sin_family = AF_UNSPEC; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + ret = connect(self->cfd, &addr, sizeof(addr)); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); +} + TEST_F(tls, rekey) { char const *test_str_1 = "test_message_before_rekey"; diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile index 48ec048f47afda..277f92f9d7537a 100644 --- a/tools/testing/selftests/pcie_bwctrl/Makefile +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -1,2 +1,3 @@ -TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh +TEST_PROGS = set_pcie_cooling_state.sh +TEST_FILES = set_pcie_speed.sh include ../lib.mk diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index b2f76a52215ad2..61acbd45ffaaf8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1629,14 +1629,8 @@ void teardown_trace_fixture(struct __test_metadata *_metadata, { if (tracer) { int status; - /* - * Extract the exit code from the other process and - * adopt it for ourselves in case its asserts failed. - */ ASSERT_EQ(0, kill(tracer, SIGUSR1)); ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); - if (WEXITSTATUS(status)) - _metadata->exit_code = KSFT_FAIL; } } @@ -3166,12 +3160,15 @@ TEST(syscall_restart) ret = get_syscall(_metadata, child_pid); #if defined(__arm__) /* - * FIXME: * - native ARM registers do NOT expose true syscall. * - compat ARM registers on ARM64 DO expose true syscall. + * - values of utsbuf.machine include 'armv8l' or 'armb8b' + * for ARM64 running in compat mode. */ ASSERT_EQ(0, uname(&utsbuf)); - if (strncmp(utsbuf.machine, "arm", 3) == 0) { + if ((strncmp(utsbuf.machine, "arm", 3) == 0) && + (strncmp(utsbuf.machine, "armv8l", 6) != 0) && + (strncmp(utsbuf.machine, "armv8b", 6) != 0)) { EXPECT_EQ(__NR_nanosleep, ret); } else #endif diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json index 1ba96c46775486..d9fc62ab476c9e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -412,5 +412,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY ingress" ] + }, + { + "id": "33f4", + "name": "Check echo of big filter command", + "category": [ + "infra", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 10: fq_codel" + ], + "cmdUnderTest": "bash -c '$TC -echo filter add dev $DUMMY parent 10: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) | grep \"added filter\"'", + "verifyCmd": "", + "expExitCode": "0", + "matchCount": "0", + "matchPattern": "", + "teardown": [ + "$TC qdisc del dev $DUMMY parent root fq_codel" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 25454fd955371a..ddc97ecd8b3911 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -158,5 +158,447 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4bb", + "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root htb default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 htb rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4be", + "name": "Test FQ_CODEL with QFQ parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root qfq", + "$TC class add dev $DUMMY parent 1: classid 1:10 qfq weight 1 maxpkt 1000", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 10 -s 1000 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4bf", + "name": "Test FQ_CODEL with HFSC parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root hfsc default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4c0", + "name": "Test FQ_CODEL with DRR parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root drr", + "$TC class add dev $DUMMY parent 1: classid 1:10 drr quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4c1", + "name": "Test FQ_CODEL with ETS parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 1", + "$TC class change dev $DUMMY parent 1: classid 1:1 ets", + "$TC qdisc add dev $DUMMY parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4c3", + "name": "Test HFSC with netem/blackhole - queue emptying during peek operation", + "category": [ + "qdisc", + "hfsc", + "netem", + "blackhole" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms", + "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true", + "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "$TC class del dev $DUMMY parent 3:0 classid 3:1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true" + ], + "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "90ec", + "name": "Test DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "1f1f", + "name": "Test ETS's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s class show dev $DUMMY", + "matchJSON": [ + { + "class": "ets", + "handle": "1:1", + "stats": { + "bytes": 196, + "packets": 2 + } + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "5e6d", + "name": "Test QFQ's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root qfq", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "qfq", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "bf1d", + "name": "Test HFSC's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", + "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", + "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "hfsc", + "handle": "1:", + "bytes": 392, + "packets": 4 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "7c3b", + "name": "Test nested DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr", + "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr", + "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "62c4", + "name": "Test HTB with FQ_CODEL - basic functionality", + "category": [ + "qdisc", + "htb", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 root handle 1: htb default 11", + "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit", + "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486", + "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn", + "sleep 0.5" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'", + "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 handle 1: root" + ] + }, + { + "id": "831d", + "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain", + "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"], + "plugins": { "requires": ["nsPlugin", "scapyPlugin"] }, + "setup": [ + "$IP link set dev $DEV1 up || true", + "$TC qdisc add dev $DEV1 root handle 1: drr", + "$TC filter add dev $DEV1 parent 1: basic classid 1:1", + "$TC class add dev $DEV1 parent 1: classid 1:1 drr", + "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem", + "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1", + "matchPattern": "qdisc hfsc", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json index e9469ee71e6fc7..6d515d0e5ed696 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json @@ -189,5 +189,29 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "deb1", + "name": "CODEL test qdisc limit trimming", + "category": ["qdisc", "codel"], + "plugins": { + "requires": ["nsPlugin", "scapyPlugin"] + }, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json index 3a537b2ec4c972..24faf4e12dfa05 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json @@ -377,5 +377,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "9479", + "name": "FQ test qdisc limit trimming", + "category": ["qdisc", "fq"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json index 9774b1e8801bba..4ce62b857fd7ab 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json @@ -294,5 +294,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "0436", + "name": "FQ_CODEL test qdisc limit trimming", + "category": ["qdisc", "fq_codel"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index d012d88d67fee6..229fe1bf4a9062 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -18,5 +18,27 @@ "matchCount": "1", "teardown": [ ] + }, + { + "id": "83bf", + "name": "FQ_PIE test qdisc limit trimming", + "category": ["qdisc", "fq_pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json index dbef5474b26bdc..0ca19fac54a57d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json @@ -188,5 +188,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "385f", + "name": "HHF test qdisc limit trimming", + "category": ["qdisc", "hhf"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json new file mode 100644 index 00000000000000..1a98b66e803071 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json @@ -0,0 +1,24 @@ +[ + { + "id": "6158", + "name": "PIE test qdisc limit trimming", + "category": ["qdisc", "pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 50e8d72781cbd3..28c6ce6da7dbb8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -228,5 +228,41 @@ "matchCount": "0", "teardown": [ ] + }, + { + "id": "7f8f", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 flows 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] + }, + { + "id": "5168", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 divisor 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tpm2/.gitignore b/tools/testing/selftests/tpm2/.gitignore new file mode 100644 index 00000000000000..6d6165c5e35da6 --- /dev/null +++ b/tools/testing/selftests/tpm2/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +AsyncTest.log +SpaceTest.log diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 168f4b166234af..3a60e6c6f5c949 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -6,6 +6,6 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip read tpm_version < /sys/class/tpm/tpm0/tpm_version_major -[ "$tpm_version" == 2 ] || exit $ksft_skip +[ "$tpm_version" = 2 ] || exit $ksft_skip python3 -m unittest -v tpm2_tests.SmokeTest 2>&1 diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index c7781efea0f33c..f34ac0bac69623 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -6,6 +6,10 @@ LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh TEST_PROGS += test_generic_02.sh TEST_PROGS += test_generic_03.sh +TEST_PROGS += test_generic_04.sh +TEST_PROGS += test_generic_05.sh +TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -21,12 +25,16 @@ TEST_PROGS += test_stripe_04.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh +TEST_PROGS += test_stress_03.sh +TEST_PROGS += test_stress_04.sh +TEST_PROGS += test_stress_05.sh TEST_GEN_PROGS_EXTENDED = kublk include ../lib.mk -$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c +$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \ + fault_inject.c check: shellcheck -x -f gcc *.sh diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c new file mode 100644 index 00000000000000..94a8e729ba4c8f --- /dev/null +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Fault injection ublk target. Hack this up however you like for + * testing specific behaviors of ublk_drv. Currently is a null target + * with a configurable delay before completing each I/O. This delay can + * be used to test ublk_drv's handling of I/O outstanding to the ublk + * server when it dies. + */ + +#include "kublk.h" + +static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, + struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + }; + + dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000); + return 0; +} + +static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe; + struct __kernel_timespec ts = { + .tv_nsec = (long long)q->dev->private_data, + }; + + ublk_queue_alloc_sqes(q, &sqe, 1); + io_uring_prep_timeout(sqe, &ts, 1, 0); + sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1); + + ublk_queued_tgt_io(q, tag, 1); + + return 0; +} + +static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + + if (cqe->res != -ETIME) + ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); + + if (ublk_completed_tgt_io(q, tag)) + ublk_complete_io(q, tag, iod->nr_sectors << 9); + else + ublk_err("%s: io not complete after 1 cqe\n", __func__); +} + +static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "delay_us", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->fault_inject.delay_us = 0; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "delay_us")) + ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10); + } + } +} + +static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tfault_inject: [--delay_us us (default 0)]\n"); +} + +const struct ublk_tgt_ops fault_inject_tgt_ops = { + .name = "fault_inject", + .init_tgt = ublk_fault_inject_tgt_init, + .queue_io = ublk_fault_inject_queue_io, + .tgt_io_done = ublk_fault_inject_tgt_io_done, + .parse_cmd_line = ublk_fault_inject_cmd_line, + .usage = ublk_fault_inject_usage, +}; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 91c282bc767449..842b40736a9b81 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -5,22 +5,24 @@ #include "kublk.h" +#define MAX_NR_TGT_ARG 64 + unsigned int ublk_dbg_mask = UBLK_LOG; static const struct ublk_tgt_ops *tgt_ops_list[] = { &null_tgt_ops, &loop_tgt_ops, &stripe_tgt_ops, + &fault_inject_tgt_ops, }; static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) { - const struct ublk_tgt_ops *ops; int i; if (name == NULL) return NULL; - for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++) + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) if (strcmp(tgt_ops_list[i]->name, name) == 0) return tgt_ops_list[i]; return NULL; @@ -118,6 +120,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev, return __ublk_ctrl_cmd(dev, &data); } +static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, + .flags = CTRL_CMD_HAS_DATA, + }; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + return __ublk_ctrl_cmd(dev, &data); +} + static int ublk_ctrl_add_dev(struct ublk_dev *dev) { struct ublk_ctrl_cmd_data data = { @@ -207,10 +230,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev) }; } +static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) +{ + unsigned done = 0; + int i; + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, set)) + done += snprintf(&buf[done], len - done, "%d ", i); + } +} + +static void ublk_adjust_affinity(cpu_set_t *set) +{ + int j, updated = 0; + + /* + * Just keep the 1st CPU now. + * + * In future, auto affinity selection can be tried. + */ + for (j = 0; j < CPU_SETSIZE; j++) { + if (CPU_ISSET(j, set)) { + if (!updated) { + updated = 1; + continue; + } + CPU_CLR(j, set); + } + } +} + +/* Caller must free the allocated buffer */ +static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, + .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, + }; + cpu_set_t *buf; + int i, ret; + + buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); + if (!buf) + return -ENOMEM; + + for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { + data.data[0] = i; + data.len = sizeof(cpu_set_t); + data.addr = (__u64)&buf[i]; + + ret = __ublk_ctrl_cmd(ctrl_dev, &data); + if (ret < 0) { + free(buf); + return ret; + } + ublk_adjust_affinity(&buf[i]); + } + + *ptr_buf = buf; + return 0; +} + static void ublk_ctrl_dump(struct ublk_dev *dev) { struct ublksrv_ctrl_dev_info *info = &dev->dev_info; struct ublk_params p; + cpu_set_t *affinity; int ret; ret = ublk_ctrl_get_params(dev, &p); @@ -219,12 +305,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev) return; } + ret = ublk_ctrl_get_affinity(dev, &affinity); + if (ret < 0) { + ublk_err("failed to get affinity %m\n"); + return; + } + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", info->dev_id, info->nr_hw_queues, info->queue_depth, 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", info->max_io_buf_bytes, info->ublksrv_pid, info->flags, ublk_dev_state_desc(dev)); + + if (affinity) { + char buf[512]; + int i; + + for (i = 0; i < info->nr_hw_queues; i++) { + ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); + printf("\tqueue %u: tid %d affinity(%s)\n", + i, dev->q[i].tid, buf); + } + free(affinity); + } + fflush(stdout); } @@ -347,7 +452,9 @@ static int ublk_queue_init(struct ublk_queue *q) } ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, - IORING_SETUP_COOP_TASKRUN); + IORING_SETUP_COOP_TASKRUN | + IORING_SETUP_SINGLE_ISSUER | + IORING_SETUP_DEFER_TASKRUN); if (ret < 0) { ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", q->dev->dev_info.dev_id, q->q_id, ret); @@ -429,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) if (!(io->flags & UBLKSRV_IO_FREE)) return 0; - /* we issue because we need either fetching or committing */ + /* + * we issue because we need either fetching or committing or + * getting data + */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + if (io->flags & UBLKSRV_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; else if (io->flags & UBLKSRV_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; @@ -551,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r, assert(tag < q->q_depth); if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + ublk_queue_io_cmd(q, io, tag); } else { /* * COMMIT_REQ will be completed immediately since no fetching @@ -602,9 +717,24 @@ static int ublk_process_io(struct ublk_queue *q) return reapped; } +static void ublk_queue_set_sched_affinity(const struct ublk_queue *q, + cpu_set_t *cpuset) +{ + if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) + ublk_err("ublk dev %u queue %u set affinity failed", + q->dev->dev_info.dev_id, q->q_id); +} + +struct ublk_queue_info { + struct ublk_queue *q; + sem_t *queue_sem; + cpu_set_t *affinity; +}; + static void *ublk_io_handler_fn(void *data) { - struct ublk_queue *q = data; + struct ublk_queue_info *info = data; + struct ublk_queue *q = info->q; int dev_id = q->dev->dev_info.dev_id; int ret; @@ -614,6 +744,10 @@ static void *ublk_io_handler_fn(void *data) dev_id, q->q_id); return NULL; } + /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ + ublk_queue_set_sched_affinity(q, info->affinity); + sem_post(info->queue_sem); + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", q->tid, dev_id, q->q_id); @@ -639,7 +773,7 @@ static void ublk_set_parameters(struct ublk_dev *dev) dev->dev_info.dev_id, ret); } -static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) +static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) { uint64_t id; int evtfd = ctx->_evtfd; @@ -652,36 +786,68 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) else id = ERROR_EVTFD_DEVID; + if (dev && ctx->shadow_dev) + memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); + if (write(evtfd, &id, sizeof(id)) != sizeof(id)) return -EINVAL; + close(evtfd); + shmdt(ctx->shadow_dev); + return 0; } static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) { - int ret, i; - void *thread_ret; const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; + struct ublk_queue_info *qinfo; + cpu_set_t *affinity_buf; + void *thread_ret; + sem_t queue_sem; + int ret, i; ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); + qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info), + dinfo->nr_hw_queues); + if (!qinfo) + return -ENOMEM; + + sem_init(&queue_sem, 0, 0); ret = ublk_dev_prep(ctx, dev); if (ret) return ret; + ret = ublk_ctrl_get_affinity(dev, &affinity_buf); + if (ret) + return ret; + for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; dev->q[i].q_id = i; + + qinfo[i].q = &dev->q[i]; + qinfo[i].queue_sem = &queue_sem; + qinfo[i].affinity = &affinity_buf[i]; pthread_create(&dev->q[i].thread, NULL, ublk_io_handler_fn, - &dev->q[i]); + &qinfo[i]); } + for (i = 0; i < dinfo->nr_hw_queues; i++) + sem_wait(&queue_sem); + free(qinfo); + free(affinity_buf); + /* everything is fine now, start us */ - ublk_set_parameters(dev); - ret = ublk_ctrl_start_dev(dev, getpid()); + if (ctx->recovery) + ret = ublk_ctrl_end_user_recovery(dev, getpid()); + else { + ublk_set_parameters(dev); + ret = ublk_ctrl_start_dev(dev, getpid()); + } if (ret < 0) { ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); goto fail; @@ -691,7 +857,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->fg) ublk_ctrl_dump(dev); else - ublk_send_dev_event(ctx, dev->dev_info.dev_id); + ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); /* wait until we are terminated */ for (i = 0; i < dinfo->nr_hw_queues; i++) @@ -856,7 +1022,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) } } - ret = ublk_ctrl_add_dev(dev); + if (ctx->recovery) + ret = ublk_ctrl_start_user_recovery(dev); + else + ret = ublk_ctrl_add_dev(dev); if (ret < 0) { ublk_err("%s: can't add dev id %d, type %s ret %d\n", __func__, dev_id, tgt_type, ret); @@ -870,7 +1039,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) fail: if (ret < 0) - ublk_send_dev_event(ctx, -1); + ublk_send_dev_event(ctx, dev, -1); ublk_ctrl_deinit(dev); return ret; } @@ -884,30 +1053,58 @@ static int cmd_dev_add(struct dev_ctx *ctx) if (ctx->fg) goto run; + ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); + if (ctx->_shmid < 0) { + ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); + exit(-1); + } + ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); + if (ctx->shadow_dev == (struct ublk_dev *)-1) { + ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); + exit(-1); + } ctx->_evtfd = eventfd(0, 0); if (ctx->_evtfd < 0) { ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); exit(-1); } - setsid(); res = fork(); if (res == 0) { + int res2; + + setsid(); + res2 = fork(); + if (res2 == 0) { + /* prepare for detaching */ + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); run: - res = __cmd_dev_add(ctx); - return res; + res = __cmd_dev_add(ctx); + return res; + } else { + /* detached from the foreground task */ + exit(EXIT_SUCCESS); + } } else if (res > 0) { uint64_t id; + int exit_code = EXIT_FAILURE; res = read(ctx->_evtfd, &id, sizeof(id)); close(ctx->_evtfd); if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { ctx->dev_id = id - 1; - return __cmd_dev_list(ctx); + if (__cmd_dev_list(ctx) >= 0) + exit_code = EXIT_SUCCESS; } - exit(EXIT_FAILURE); + shmdt(ctx->shadow_dev); + shmctl(ctx->_shmid, IPC_RMID, NULL); + /* wait for child and detach from it */ + wait(NULL); + exit(exit_code); } else { - return res; + exit(EXIT_FAILURE); } } @@ -969,6 +1166,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx) ublk_err("%s: can't get dev info from %d: %d\n", __func__, ctx->dev_id, ret); } else { + if (ctx->shadow_dev) + memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); + ublk_ctrl_dump(dev); } @@ -1039,14 +1239,47 @@ static int cmd_dev_get_features(void) return ret; } +static void __cmd_create_help(char *exe, bool recovery) +{ + int i; + + printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", + exe, recovery ? "recover" : "add"); + printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); + printf("\t[-e 0|1 ] [-i 0|1]\n"); + printf("\t[target options] [backfile1] [backfile2] ...\n"); + printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); + + for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + const struct ublk_tgt_ops *ops = tgt_ops_list[i]; + + if (ops->usage) + ops->usage(ops); + } +} + +static void cmd_add_help(char *exe) +{ + __cmd_create_help(exe, false); + printf("\n"); +} + +static void cmd_recover_help(char *exe) +{ + __cmd_create_help(exe, true); + printf("\tPlease provide exact command line for creating this device with real dev_id\n"); + printf("\n"); +} + static int cmd_dev_help(char *exe) { - printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); - printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); + cmd_add_help(exe); + cmd_recover_help(exe); + printf("%s del [-n dev_id] -a \n", exe); - printf("\t -a delete all devices -n delete specified device\n"); + printf("\t -a delete all devices -n delete specified device\n\n"); printf("%s list [-n dev_id] -a \n", exe); - printf("\t -a list all devices, -n list specified device, default -a \n"); + printf("\t -a list all devices, -n list specified device, default -a \n\n"); printf("%s features\n", exe); return 0; } @@ -1063,9 +1296,13 @@ int main(int argc, char *argv[]) { "quiet", 0, NULL, 0 }, { "zero_copy", 0, NULL, 'z' }, { "foreground", 0, NULL, 0 }, - { "chunk_size", 1, NULL, 0 }, + { "recovery", 1, NULL, 'r' }, + { "recovery_fail_io", 1, NULL, 'e'}, + { "recovery_reissue", 1, NULL, 'i'}, + { "get_data", 1, NULL, 'g'}, { 0, 0, 0, 0 } }; + const struct ublk_tgt_ops *ops = NULL; int option_idx, opt; const char *cmd = argv[1]; struct dev_ctx ctx = { @@ -1073,15 +1310,18 @@ int main(int argc, char *argv[]) .nr_hw_queues = 2, .dev_id = -1, .tgt_type = "unknown", - .chunk_size = 65536, /* def chunk size is 64K */ }; int ret = -EINVAL, i; + int tgt_argc = 1; + char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; + int value; if (argc == 1) return ret; + opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1103,6 +1343,24 @@ int main(int argc, char *argv[]) case 'z': ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; break; + case 'r': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY; + break; + case 'e': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; + break; + case 'i': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; + break; + case 'g': + ctx.flags |= UBLK_F_NEED_GET_DATA; + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); @@ -1110,8 +1368,26 @@ int main(int argc, char *argv[]) ublk_dbg_mask = 0; if (!strcmp(longopts[option_idx].name, "foreground")) ctx.fg = 1; - if (!strcmp(longopts[option_idx].name, "chunk_size")) - ctx.chunk_size = strtol(optarg, NULL, 10); + break; + case '?': + /* + * target requires every option must have argument + */ + if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { + fprintf(stderr, "every target option requires argument: %s %s\n", + argv[optind - 1], argv[optind]); + exit(EXIT_FAILURE); + } + + if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { + tgt_argv[tgt_argc++] = argv[optind - 1]; + tgt_argv[tgt_argc++] = argv[optind]; + } else { + fprintf(stderr, "too many target options\n"); + exit(EXIT_FAILURE); + } + optind += 1; + break; } } @@ -1120,9 +1396,25 @@ int main(int argc, char *argv[]) ctx.files[ctx.nr_files++] = argv[i++]; } + ops = ublk_find_tgt(ctx.tgt_type); + if (ops && ops->parse_cmd_line) { + optind = 0; + + tgt_argv[0] = ctx.tgt_type; + ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); + } + if (!strcmp(cmd, "add")) ret = cmd_dev_add(&ctx); - else if (!strcmp(cmd, "del")) + else if (!strcmp(cmd, "recover")) { + if (ctx.dev_id < 0) { + fprintf(stderr, "device id isn't provided for recovering\n"); + ret = -EINVAL; + } else { + ctx.recovery = 1; + ret = cmd_dev_add(&ctx); + } + } else if (!strcmp(cmd, "del")) ret = cmd_dev_del(&ctx); else if (!strcmp(cmd, "list")) { ctx.all = 1; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 760ff8ffb81070..44ee1e4ac55b2d 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -20,9 +20,15 @@ #include #include #include +#include +#include +#include #include -#include +#include + +/* allow ublk_dep.h to override ublk_cmd.h */ #include "ublk_dep.h" +#include #define __maybe_unused __attribute__((unused)) #define MAX_BACK_FILES 4 @@ -30,6 +36,8 @@ #define min(a, b) ((a) < (b) ? (a) : (b)) #endif +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + /****************** part 1: libublk ********************/ #define CTRL_DEV "/dev/ublk-control" @@ -42,8 +50,8 @@ #define UBLKSRV_IO_IDLE_SECS 20 #define UBLK_IO_MAX_BYTES (1 << 20) -#define UBLK_MAX_QUEUES 4 -#define UBLK_QUEUE_DEPTH 128 +#define UBLK_MAX_QUEUES 32 +#define UBLK_QUEUE_DEPTH 1024 #define UBLK_DBG_DEV (1U << 0) #define UBLK_DBG_QUEUE (1U << 1) @@ -55,6 +63,16 @@ struct ublk_dev; struct ublk_queue; +struct stripe_ctx { + /* stripe */ + unsigned int chunk_size; +}; + +struct fault_inject_ctx { + /* fault_inject */ + unsigned long delay_us; +}; + struct dev_ctx { char tgt_type[16]; unsigned long flags; @@ -66,11 +84,18 @@ struct dev_ctx { unsigned int logging:1; unsigned int all:1; unsigned int fg:1; - - /* stripe */ - unsigned int chunk_size; + unsigned int recovery:1; int _evtfd; + int _shmid; + + /* built from shmem, only for ublk_dump_dev() */ + struct ublk_dev *shadow_dev; + + union { + struct stripe_ctx stripe; + struct fault_inject_ctx fault_inject; + }; }; struct ublk_ctrl_cmd_data { @@ -90,6 +115,7 @@ struct ublk_io { #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) +#define UBLKSRV_NEED_GET_DATA (1UL << 3) unsigned short flags; unsigned short refs; /* used by target code only */ @@ -107,6 +133,14 @@ struct ublk_tgt_ops { int (*queue_io)(struct ublk_queue *, int tag); void (*tgt_io_done)(struct ublk_queue *, int tag, const struct io_uring_cqe *); + + /* + * Target specific command line handling + * + * each option requires argument for target command line + */ + void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); + void (*usage)(const struct ublk_tgt_ops *ops); }; struct ublk_tgt { @@ -357,6 +391,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q) extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; +extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 179731c3dd6fec..5dbd6392d83de2 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -281,7 +281,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, }, }; - unsigned chunk_size = ctx->chunk_size; + unsigned chunk_size = ctx->stripe.chunk_size; struct stripe_conf *conf; unsigned chunk_shift; loff_t bytes = 0; @@ -344,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) backing_file_tgt_deinit(dev); } +static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "chunk_size", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->stripe.chunk_size = 65536; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "chunk_size")) + ctx->stripe.chunk_size = strtol(optarg, NULL, 10); + } + } +} + +static void ublk_stripe_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n"); +} + const struct ublk_tgt_ops stripe_tgt_ops = { .name = "stripe", .init_tgt = ublk_stripe_tgt_init, .deinit_tgt = ublk_stripe_tgt_deinit, .queue_io = ublk_stripe_queue_io, .tgt_io_done = ublk_stripe_io_done, + .parse_cmd_line = ublk_stripe_cmd_line, + .usage = ublk_stripe_usage, }; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index a88b359432279b..a81210ca3e99d2 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -17,8 +17,8 @@ _get_disk_dev_t() { local minor dev=/dev/ublkb"${dev_id}" - major=$(stat -c '%Hr' "$dev") - minor=$(stat -c '%Lr' "$dev") + major="0x"$(stat -c '%t' "$dev") + minor="0x"$(stat -c '%T' "$dev") echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } @@ -30,18 +30,26 @@ _run_fio_verify_io() { } _create_backfile() { - local my_size=$1 - local my_file + local index=$1 + local new_size=$2 + local old_file + local new_file - my_file=$(mktemp ublk_file_"${my_size}"_XXXXX) - truncate -s "${my_size}" "${my_file}" - echo "$my_file" + old_file="${UBLK_BACKFILES[$index]}" + [ -f "$old_file" ] && rm -f "$old_file" + + new_file=$(mktemp ublk_file_"${new_size}"_XXXXX) + truncate -s "${new_size}" "${new_file}" + UBLK_BACKFILES["$index"]="$new_file" } -_remove_backfile() { - local file=$1 +_remove_files() { + local file - [ -f "$file" ] && rm -f "$file" + for file in "${UBLK_BACKFILES[@]}"; do + [ -f "$file" ] && rm -f "$file" + done + [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP" } _create_tmp_dir() { @@ -106,6 +114,7 @@ _prep_test() { local type=$1 shift 1 modprobe ublk_drv > /dev/null 2>&1 + UBLK_TMP=$(mktemp ublk_test_XXXXX) [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*" } @@ -129,7 +138,10 @@ _show_result() echo "$1 : [FAIL]" fi fi - [ "$2" -ne 0 ] && exit "$2" + if [ "$2" -ne 0 ]; then + _remove_files + exit "$2" + fi return 0 } @@ -138,16 +150,16 @@ _check_add_dev() { local tid=$1 local code=$2 - shift 2 + if [ "${code}" -ne 0 ]; then - _remove_test_files "$@" _show_result "${tid}" "${code}" fi } _cleanup_test() { "${UBLK_PROG}" del -a - rm -f "$UBLK_TMP" + + _remove_files } _have_feature() @@ -158,9 +170,11 @@ _have_feature() return 1 } -_add_ublk_dev() { - local kublk_temp; +_create_ublk_dev() { local dev_id; + local cmd=$1 + + shift 1 if [ ! -c /dev/ublk-control ]; then return ${UBLK_SKIP_CODE} @@ -171,17 +185,34 @@ _add_ublk_dev() { fi fi - kublk_temp=$(mktemp /tmp/kublk-XXXXXX) - if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then + if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then echo "fail to add ublk dev $*" - rm -f "${kublk_temp}" return 255 fi - - dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}') udevadm settle - rm -f "${kublk_temp}" - echo "${dev_id}" + + if [[ "$dev_id" =~ ^[0-9]+$ ]]; then + echo "${dev_id}" + else + return 255 + fi +} + +_add_ublk_dev() { + _create_ublk_dev "add" "$@" +} + +_recover_ublk_dev() { + local dev_id + local state + + dev_id=$(_create_ublk_dev "recover" "$@") + for ((j=0;j<20;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "LIVE" ] && break + sleep 1 + done + echo "$state" } # kill the ublk daemon and return ublk device state @@ -220,7 +251,7 @@ __run_io_and_remove() local kill_server=$3 fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \ + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 2 if [ "${kill_server}" = "yes" ]; then @@ -238,15 +269,80 @@ __run_io_and_remove() wait } +run_io_and_remove() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then + echo "/dev/ublkc$dev_id isn't removed" + exit 255 + fi +} + +run_io_and_kill_daemon() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then + echo "/dev/ublkc$dev_id isn't removed res ${res}" + exit 255 + fi +} + +run_io_and_recover() +{ + local state + local dev_id + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ + --runtime=20 --time_based > /dev/null 2>&1 & + sleep 4 + + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + if [ "$state" != "QUIESCED" ]; then + echo "device isn't quiesced($state) after killing daemon" + return 255 + fi + + state=$(_recover_ublk_dev -n "$dev_id" "$@") + if [ "$state" != "LIVE" ]; then + echo "faile to recover to LIVE($state)" + return 255 + fi + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi + wait +} + + _ublk_test_top_dir() { cd "$(dirname "$0")" && pwd } -UBLK_TMP=$(mktemp ublk_test_XXXXX) UBLK_PROG=$(_ublk_test_top_dir)/kublk UBLK_TEST_QUIET=1 UBLK_TEST_SHOW_RESULT=1 +UBLK_BACKFILES=() export UBLK_PROG export UBLK_TEST_QUIET export UBLK_TEST_SHOW_RESULT diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh new file mode 100755 index 00000000000000..8a3bc080c5771e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_04" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 & +ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh new file mode 100755 index 00000000000000..3bb00a34740201 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_05" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification (zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -z & +ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh new file mode 100755 index 00000000000000..b67230c42c847c --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_06.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_06" +ERR_CODE=0 + +_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server" + +# configure ublk server to sleep 2s before completing each I/O +dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000) +_check_add_dev $TID $? + +STARTTIME=${SECONDS} + +dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & +dd_pid=$! + +__ublk_kill_daemon ${dev_id} "DEAD" + +wait $dd_pid +dd_exitcode=$? + +ENDTIME=${SECONDS} +ELAPSED=$(($ENDTIME - $STARTTIME)) + +# assert that dd sees an error and exits quickly after ublk server is +# killed. previously this relied on seeing an I/O timeout and so would +# take ~30s +if [ $dd_exitcode -eq 0 ]; then + echo "dd unexpectedly exited successfully!" + ERR_CODE=255 +fi +if [ $ELAPSED -ge 5 ]; then + echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!" + ERR_CODE=255 +fi + +_cleanup_test "fault_inject" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 00000000000000..cba86451fa5e57 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_07" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index 1ef8b6044777cb..833fa0dbc7009d 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh index 03863d825e07da..874568b3646b62 100755 --- a/tools/testing/selftests/ublk/test_loop_02.sh +++ b/tools/testing/selftests/ublk/test_loop_02.sh @@ -8,15 +8,13 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index e9ca744de8b134..c30f797c642955 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -12,9 +12,9 @@ fi _prep_test "loop" "write and verify over zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -22,6 +22,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh index 1435422c38ec8c..b01d75b3214d21 100755 --- a/tools/testing/selftests/ublk/test_loop_04.sh +++ b/tools/testing/selftests/ublk/test_loop_04.sh @@ -8,15 +8,14 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount with zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh index 2e6e2e6978fca5..de21415330742e 100755 --- a/tools/testing/selftests/ublk/test_loop_05.sh +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index a8be24532b24b7..7d3150f057d440 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -4,44 +4,31 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_01" ERR_CODE=0 -DEV_ID=-1 ublk_io_and_remove() { - local size=$1 - shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi - DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then - echo "/dev/ublkc${DEV_ID} isn't removed" - _remove_backfile "${backfile}" - exit 255 + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "stress" "run IO and remove device" -ublk_io_and_remove 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +ublk_io_and_remove 8G -t null -q 4 & +ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait -ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" -ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2159e4cc8140ec..1a9065125ae1b2 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -4,44 +4,31 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_02" ERR_CODE=0 -DEV_ID=-1 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi ublk_io_and_kill_daemon() { - local size=$1 - shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi - DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then - echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" - _remove_backfile "${backfile}" - exit 255 + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +ublk_io_and_kill_daemon 8G -t null -q 4 & +ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" -ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh new file mode 100755 index 00000000000000..e0854f71d35b9d --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_03" +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -z & +ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh new file mode 100755 index 00000000000000..1798a98387e887 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_04" +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -z & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh new file mode 100755 index 00000000000000..88601b48f1cd37 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_05" +ERR_CODE=0 + +run_io_and_remove() +{ + local size=$1 + local dev_id + local dev_pid + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev $TID $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \ + --runtime=40 --time_based > /dev/null 2>&1 & + sleep 4 + + dev_pid=$(_get_ublk_daemon_pid "$dev_id") + kill -9 "$dev_pid" + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi +} + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +_prep_test "stress" "run IO and remove device with recovery enabled" + +_create_backfile 0 256M +_create_backfile 1 256M + +for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + wait +done + +if _have_feature "ZERO_COPY"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + wait + done +fi + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index 7e387ef656ea89..4e4f0fdf3c9b5b 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh index e8a45fa82dde04..5820ab2efba474 100755 --- a/tools/testing/selftests/ublk/test_stripe_02.sh +++ b/tools/testing/selftests/ublk/test_stripe_02.sh @@ -8,17 +8,14 @@ ERR_CODE=0 _prep_test "stripe" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "$backfile_0" "$backfile_1" +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh index c1b34af36145a8..20b977e27814c4 100755 --- a/tools/testing/selftests/ublk/test_stripe_03.sh +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh new file mode 100755 index 00000000000000..1b51ed2f1d843e --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_04" +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on zero copy" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "stripe" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile new file mode 100644 index 00000000000000..8ff2d7226c7f3f --- /dev/null +++ b/tools/testing/selftests/x86/bugs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py +TEST_FILES := common.py +include ../../lib.mk diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py new file mode 100755 index 00000000000000..2f9664a80617a6 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/common.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# This contains kselftest framework adapted common functions for testing +# mitigation for x86 bugs. + +import os, sys, re, shutil + +sys.path.insert(0, '../../kselftest') +import ksft + +def read_file(path): + if not os.path.exists(path): + return None + with open(path, 'r') as file: + return file.read().strip() + +def cpuinfo_has(arg): + cpuinfo = read_file('/proc/cpuinfo') + if arg in cpuinfo: + return True + return False + +def cmdline_has(arg): + cmdline = read_file('/proc/cmdline') + if arg in cmdline: + return True + return False + +def cmdline_has_either(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg in cmdline: + return True + return False + +def cmdline_has_none(args): + return not cmdline_has_either(args) + +def cmdline_has_all(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg not in cmdline: + return False + return True + +def get_sysfs(bug): + return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug) + +def sysfs_has(bug, mitigation): + status = get_sysfs(bug) + if mitigation in status: + return True + return False + +def sysfs_has_either(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if sysfs_has(bug, mitigation): + return True + return False + +def sysfs_has_none(bugs, mitigations): + return not sysfs_has_either(bugs, mitigations) + +def sysfs_has_all(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if not sysfs_has(bug, mitigation): + return False + return True + +def bug_check_pass(bug, found): + ksft.print_msg(f"\nFound: {found}") + # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_pass(f'{bug}: {found}') + +def bug_check_fail(bug, found, expected): + ksft.print_msg(f'\nFound:\t {found}') + ksft.print_msg(f'Expected:\t {expected}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def bug_status_unknown(bug, found): + ksft.print_msg(f'\nUnknown status: {found}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def basic_checks_sufficient(bug, mitigation): + if not mitigation: + bug_status_unknown(bug, "None") + return True + elif mitigation == "Not affected": + ksft.test_result_pass(bug) + return True + elif mitigation == "Vulnerable": + if cmdline_has_either([f'{bug}=off', 'mitigations=off']): + bug_check_pass(bug, mitigation) + return True + return False + +def get_section_info(vmlinux, section_name): + from elftools.elf.elffile import ELFFile + with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + section = elffile.get_section_by_name(section_name) + if section is None: + ksft.print_msg("Available sections in vmlinux:") + for sec in elffile.iter_sections(): + ksft.print_msg(sec.name) + raise ValueError(f"Section {section_name} not found in {vmlinux}") + return section['sh_addr'], section['sh_offset'], section['sh_size'] + +def get_patch_sites(vmlinux, offset, size): + import struct + output = [] + with open(vmlinux, 'rb') as f: + f.seek(offset) + i = 0 + while i < size: + data = f.read(4) # s32 + if not data: + break + sym_offset = struct.unpack(' 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites') +ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size) +total_retpoline_tests = len(sites_offset) +ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites") + +prog = c.get_runtime_kernel() +retpolines_start_kcore = prog.symbol('__retpoline_sites').address +ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}') + +x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address +ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(0, len(sites_offset)): + site = retpolines_start_kcore + sites_offset[i] + vmlinux_site = retpolines_start_vmlinux + sites_offset[i] + passed = unknown = failed = False + try: + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + operand = kcore_insn.op_str + insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if (site & 0x20) ^ (insn_end & 0x20): + ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}") + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if operand.startswith('0xffffffff'): + thunk = int(operand, 16) + if thunk > x86_indirect_its_thunk_r15: + insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0] + operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' ' + if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20: + ksft.print_msg(f"\tPASSED: Found {operand} at safe address") + passed = True + if not passed: + if kcore_insn.operands[0].type == capstone.CS_OP_IMM: + operand += ' <' + prog.symbol(int(operand, 16)) + '>' + if '__x86_indirect_its_thunk_' in operand: + ksft.print_msg(f"\tPASSED: Found {operand}") + else: + ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.") + passed = True + else: + unknown = True + if passed: + tests_passed += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed") +else: + ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py new file mode 100755 index 00000000000000..3204f4728c62cc --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_permutations.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) cmdline permutations with other bugs +# like spectre_v2 and retbleed. + +import os, sys, subprocess, itertools, re, shutil + +test_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, test_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) + +if not mitigation or "Not affected" in mitigation: + ksft.test_result_skip("Skipping its_permutations.py: not applicable") + ksft.finished() + +if shutil.which('vng') is None: + ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.") + ksft.finished() + +TEST = f"{test_dir}/its_sysfs.py" +default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1'] + +DEBUG = " -v " + +# Install dependencies +# https://github.com/arighi/virtme-ng +# apt install virtme-ng +BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage " +#BOOT_CMD += DEBUG + +bug = "indirect_target_selection" + +input_options = { + 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'], + 'retbleed' : ['off', 'stuff', 'auto'], + 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'], +} + +def pretty_print(output): + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + + # Define patterns and their corresponding colors + patterns = { + r"^ok \d+": OKGREEN, + r"^not ok \d+": FAIL, + r"^# Testing .*": OKBLUE, + r"^# Found: .*": WARNING, + r"^# Totals: .*": BOLD, + r"pass:([1-9]\d*)": OKGREEN, + r"fail:([1-9]\d*)": FAIL, + r"skip:([1-9]\d*)": WARNING, + } + + # Apply colors based on patterns + for pattern, color in patterns.items(): + output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE) + + print(output) + +combinations = list(itertools.product(*input_options.values())) +ksft.print_header() +ksft.set_plan(len(combinations)) + +logs = "" + +for combination in combinations: + append = "" + log = "" + for p in default_kparam: + append += f' --append={p}' + command = BOOT_CMD + append + test_params = "" + for i, key in enumerate(input_options.keys()): + param = f'{key}={combination[i]}' + test_params += f' {param}' + command += f" --append={param}" + command += f" -- {TEST}" + test_name = f"{bug} {test_params}" + pretty_print(f'# Testing {test_name}') + t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + t.wait() + output, _ = t.communicate() + if t.returncode == 0: + ksft.test_result_pass(test_name) + else: + ksft.test_result_fail(test_name) + output = output.decode() + log += f" {output}" + pretty_print(log) + logs += output + "\n" + +# Optionally use tappy to parse the output +# apt install python3-tappy +with open("logs.txt", "w") as f: + f.write(logs) + +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py new file mode 100755 index 00000000000000..f40078d9f6ffc1 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Tests if the RETs are correctly patched by evaluating the +# vmlinux .return_sites in /proc/kcore. +# +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Run on target machine +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_ret_alignment.py + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites') +ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size) +total_rethunk_tests = len(sites_offset) +ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites") + +prog = c.get_runtime_kernel() +rethunks_start_kcore = prog.symbol('__return_sites').address +ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}') + +its_return_thunk = prog.symbol('its_return_thunk').address +ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 +tests_skipped = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(len(sites_offset)): + site = rethunks_start_kcore + sites_offset[i] + vmlinux_site = rethunks_start_vmlinux + sites_offset[i] + try: + passed = unknown = failed = skipped = False + + symbol = identify_address(prog, site) + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + + insn_end = site + kcore_insn.size - 1 + + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if "jmp" in kcore_insn.mnemonic: + passed = True + elif "ret" not in kcore_insn.mnemonic: + skipped = True + + if passed: + ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}") + tests_passed += 1 + elif skipped: + ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'") + tests_skipped += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed.") +else: + ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py new file mode 100755 index 00000000000000..7bca81f2f6065b --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_sysfs.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for Indirect Target Selection(ITS) mitigation sysfs status. + +import sys, os, re +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft + +from common import * + +bug = "indirect_target_selection" +mitigation = get_sysfs(bug) + +ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks" +ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB" +ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected" +ITS_MITIGATION_VULNERABLE = "Vulnerable" + +def check_mitigation(): + if mitigation == ITS_MITIGATION_ALIGNED_THUNKS: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF) + return + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS) + return + + if mitigation == ITS_MITIGATION_RETPOLINE_STUFF: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + if sysfs_has('retbleed', 'Stuffing'): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VMEXIT_ONLY: + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VULNERABLE: + if sysfs_has("spectre_v2", "Vulnerable"): + bug_check_pass(bug, ITS_MITIGATION_VULNERABLE) + else: + bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE) + + bug_status_unknown(bug, mitigation) + return + +ksft.print_header() +ksft.set_plan(1) +ksft.print_msg(f'{bug}: {mitigation} ...') + +if not basic_checks_sufficient(bug, mitigation): + check_mitigation() + +ksft.finished() diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 66dbb362385f3c..0f97fb0d19e19c 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -150,7 +150,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) { if (kmalloc_verbose) - pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); + pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) @@ -168,7 +168,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, size_t i; if (kmalloc_verbose) - pr_debug("Bulk alloc %lu\n", size); + pr_debug("Bulk alloc %zu\n", size); pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { diff --git a/tools/testing/shared/linux/cleanup.h b/tools/testing/shared/linux/cleanup.h new file mode 100644 index 00000000000000..ea3081426ee95b --- /dev/null +++ b/tools/testing/shared/linux/cleanup.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../../include/linux/cleanup.h" diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index d0f6d253ac72d0..613551132a9663 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1264,21 +1264,25 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_expectln("RECEIVED"); - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); - if (ret < 0) { - if (errno == EOPNOTSUPP) { - fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); - } else { + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. Repeat SIOCOUTQ until it returns 0. + */ + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + break; + } perror("ioctl"); exit(EXIT_FAILURE); } - } else if (ret == 0 && sock_bytes_unsent != 0) { - fprintf(stderr, - "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", - sock_bytes_unsent); - exit(EXIT_FAILURE); - } - + timeout_check("SIOCOUTQ"); + } while (sock_bytes_unsent != 0); + timeout_end(); close(fd); } diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c index 5abee884037aeb..0bc44ce5d69bd9 100644 --- a/tools/tracing/rtla/src/timerlat_bpf.c +++ b/tools/tracing/rtla/src/timerlat_bpf.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #ifdef HAVE_BPF_SKEL +#define _GNU_SOURCE #include "timerlat.h" #include "timerlat_bpf.h" #include "timerlat.skel.h" diff --git a/usr/include/Makefile b/usr/include/Makefile index e3d6b03527fecc..f02f41941b60c8 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -59,6 +59,10 @@ ifeq ($(SRCARCH),arc) no-header-test += linux/bpf_perf_event.h endif +ifeq ($(SRCARCH),openrisc) +no-header-test += linux/bpf_perf_event.h +endif + ifeq ($(SRCARCH),powerpc) no-header-test += linux/bpf_perf_event.h endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 746e1f466aa647..727b542074e7ef 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -75,7 +75,7 @@ config KVM_COMPAT depends on KVM && COMPAT && !(S390 || ARM64 || RISCV) config HAVE_KVM_IRQ_BYPASS - bool + tristate select IRQ_BYPASS_MANAGER config HAVE_KVM_VCPU_ASYNC_IOCTL diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 249ba5b72e9b09..11e5d1e3f12eae 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -149,7 +149,7 @@ irqfd_shutdown(struct work_struct *work) /* * It is now safe to release the object's resources */ -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) irq_bypass_unregister_consumer(&irqfd->consumer); #endif eventfd_ctx_put(irqfd->eventfd); @@ -274,7 +274,7 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) write_seqcount_end(&irqfd->irq_entry_sc); } -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) void __attribute__((weak)) kvm_arch_irq_bypass_stop( struct irq_bypass_consumer *cons) { @@ -424,7 +424,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (events & EPOLLIN) schedule_work(&irqfd->inject); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) if (kvm_arch_has_irq_bypass()) { irqfd->consumer.token = (void *)irqfd->eventfd; irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; @@ -609,14 +609,14 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_lock_irq(&kvm->irqfds.lock); list_for_each_entry(irqfd, &kvm->irqfds.items, list) { -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) /* Under irqfds.lock, so can read irq_entry safely */ struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry; #endif irqfd_update(kvm, irqfd); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) if (irqfd->producer && kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) { int ret = kvm_arch_update_irqfd_routing(