diff --git a/gcc/config.gcc b/gcc/config.gcc index c678b801f705..2c568d05077c 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -559,7 +559,7 @@ riscv*) extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o" extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o riscv-avlprop.o riscv-vect-permconst.o" extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o sifive-vector-builtins-bases.o andes-vector-builtins-bases.o" - extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-bclr-lowest-set-bit.o" + extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-bclr-lowest-set-bit.o arcv.o" d_target_objs="riscv-d.o" extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h riscv_th_vector.h sifive_vector.h andes_vector.h" target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc" diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md new file mode 100644 index 000000000000..7cbabac29a58 --- /dev/null +++ b/gcc/config/riscv/arcv-rhx100.md @@ -0,0 +1,106 @@ +;; DFA scheduling description of the Synopsys RHX-100 cpu +;; for GNU C compiler +;; Copyright (C) 2025 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "arcv_rhx100") + +(define_cpu_unit "arcv_rhx100_ALU_A_fuse0_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_ALU_A_fuse1_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_ALU_B_fuse0_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_ALU_B_fuse1_early" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_MPY32" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_DIV" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_DMP_fuse0" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_DMP_fuse1" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_fdivsqrt" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueA_fuse0" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueA_fuse1" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueB_fuse0" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_issueB_fuse1" "arcv_rhx100") + +;; Instruction reservation for arithmetic instructions (pipe A, pipe B). +(define_insn_reservation "arcv_rhx100_alu_early_arith" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "unknown,move,const,arith,shift,slt,multi,auipc,nop,logical,\ + bitmanip,min,max,minu,maxu,clz,ctz,atomic,\ + condmove,mvpair,zicond,cpop,clmul")) + "((arcv_rhx100_issueA_fuse0 + arcv_rhx100_ALU_A_fuse0_early) | (arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse1_early)) | ((arcv_rhx100_issueB_fuse0 + arcv_rhx100_ALU_B_fuse0_early) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse1_early))") + +(define_insn_reservation "arcv_rhx100_imul_fused" 4 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "imul_fused")) + "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early + arcv_rhx100_MPY32), nothing*3") + +(define_insn_reservation "arcv_rhx100_alu_fused" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "alu_fused")) + "(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early) | (arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse0_early + arcv_rhx100_ALU_B_fuse1_early)") + +(define_insn_reservation "arcv_rhx100_jmp_insn" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "branch,jump,call,jalr,ret,trap")) + "arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1") + +(define_insn_reservation "arcv_rhx100_div_insn" 12 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "idiv")) + "arcv_rhx100_issueA_fuse0 + arcv_rhx100_DIV, nothing*11") + +(define_insn_reservation "arcv_rhx100_mpy32_insn" 4 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "imul")) + "arcv_rhx100_issueA_fuse0 + arcv_rhx100_MPY32, nothing*3") + +(define_insn_reservation "arcv_rhx100_load_insn" 3 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "load,fpload")) + "(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)") + +(define_insn_reservation "arcv_rhx100_store_insn" 1 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "store,fpstore")) + "(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)") + +;; (soft) floating points +(define_insn_reservation "arcv_rhx100_xfer" 3 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "mfc,mtc,fcvt,fcvt_i2f,fcvt_f2i,fmove,fcmp")) + "(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early), nothing*2") + +(define_insn_reservation "arcv_rhx100_fmul" 5 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "fadd,fmul,fmadd")) + "(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early)") + +(define_insn_reservation "arcv_rhx100_fdiv" 20 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "fdiv,fsqrt")) + "arcv_rhx100_fdivsqrt*20") + +;; Bypasses +(define_bypass 1 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") + +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_alu_early_arith") +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_mpy*_insn") +(define_bypass 2 "arcv_rhx100_load_insn" "arcv_rhx100_load_insn") +(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_div_insn") + +(define_bypass 3 "arcv_rhx100_mpy32_insn" "arcv_rhx100_mpy*_insn") +(define_bypass 3 "arcv_rhx100_mpy32_insn" "arcv_rhx100_div_insn") diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md new file mode 100644 index 000000000000..29dde32a40c2 --- /dev/null +++ b/gcc/config/riscv/arcv-rmx100.md @@ -0,0 +1,103 @@ +;; DFA scheduling description of the Synopsys RMX-100 cpu +;; for GNU C compiler +;; Copyright (C) 2025 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "arcv_rmx100") + +(define_cpu_unit "arcv_rmx100_ALU" "arcv_rmx100") +;(define_cpu_unit "arcv_rmx100_CSR" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_FPU" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_MPY" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_DIV" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_DMP" "arcv_rmx100") + +;; Instruction reservation for arithmetic instructions. +(define_insn_reservation "arcv_rmx100_alu_arith" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "unknown, const, arith, shift, slt, multi, auipc, nop, + logical, move, atomic, mvpair, bitmanip, clz, ctz, cpop, + zicond, condmove, clmul, min, max, minu, maxu, rotate")) + "arcv_rmx100_ALU") + +(define_insn_reservation "arcv_rmx100_jmp_insn" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "branch, jump, call, jalr, ret, trap")) + "arcv_rmx100_ALU") + +; DIV insn: latency may be overridden by a define_bypass +(define_insn_reservation "arcv_rmx100_div_insn" 35 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "idiv")) + "arcv_rmx100_DIV*35") + +; MPY insn: latency may be overridden by a define_bypass +(define_insn_reservation "arcv_rmx100_mpy32_insn" 9 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "imul")) + "arcv_rmx100_MPY") + +(define_insn_reservation "arcv_rmx100_load_insn" 3 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "load")) + "arcv_rmx100_DMP,nothing*2") + +(define_insn_reservation "arcv_rmx100_store_insn" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "store,fpstore")) + "arcv_rmx100_DMP") + +;; FPU scheduling. FIXME: This is based on the "fast" unit for now, the "slow" +;; option remains to be implemented later (together with the -mfpu flag). + +(define_insn_reservation "arcv_rmx100_fpload_insn" 3 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fpload")) + "arcv_rmx100_DMP,nothing*2") + +(define_insn_reservation "arcv_rmx100_farith_insn" 2 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fadd,fcmp")) + "arcv_rmx100_FPU,nothing") + +(define_insn_reservation "arcv_rmx100_xfer" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f")) + "arcv_rmx100_FPU") + +(define_insn_reservation "arcv_rmx100_fmul_insn" 2 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fmul")) + "arcv_rmx100_FPU,nothing") + +(define_insn_reservation "arcv_rmx100_fmac_insn" 2 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fmadd")) + "arcv_rmx100_FPU,nothing") + +(define_insn_reservation "arcv_rmx100_fdiv_insn" 10 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fdiv,fsqrt")) + "arcv_rmx100_FPU") + + +(define_bypass 1 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") +(define_bypass 2 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p") + +(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") +(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p") diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc new file mode 100644 index 000000000000..f0a39a23cb77 --- /dev/null +++ b/gcc/config/riscv/arcv.cc @@ -0,0 +1,882 @@ +/* Subroutines used for code generation for Synopsys ARC-V processors. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "tm.h" +#include "optabs.h" +#include "regs.h" +#include "emit-rtl.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "stor-layout.h" +#include "alias.h" +#include "fold-const.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "explow.h" +#include "calls.h" +#include "varasm.h" +#include "expr.h" +#include "tm_p.h" +#include "df.h" +#include "reload.h" +#include "sched-int.h" +#include "tm-constrs.h" +#include "arcv.h" + +/* Scheduler state tracking for dual-pipe ARCV architectures. */ + +struct arcv_sched_state { + /* True if the ALU pipe has been scheduled for the current cycle. + The ALU pipe handles arithmetic, logical, and other computational + instructions. */ + int alu_pipe_scheduled_p; + + /* True if pipe B has been scheduled for the current cycle. + Pipe B is the second execution pipe, typically used for memory + operations (loads/stores) but can also handle other instructions. */ + int pipeB_scheduled_p; + + /* The last instruction that was scheduled. Used to detect fusion + opportunities by looking ahead at the next instruction to be + scheduled. */ + rtx_insn *last_scheduled_insn; + + /* Cached value of how many more instructions can be issued in the + current cycle. Updated as instructions are scheduled and pipes + become occupied. */ + short cached_can_issue_more; +}; + +static struct arcv_sched_state sched_state; + +/* Implement one boolean function for each of the values of the + arcv_mpy_option enum, for the needs of rhx100.md. */ + +bool +arcv_mpy_1c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_1C; +} + +bool +arcv_mpy_2c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_2C; +} + +bool +arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_10C; +} + +/* Return TRUE if the target microarchitecture supports macro-op + fusion for two memory operations of mode MODE (the direction + of transfer is determined by the IS_LOAD parameter). */ + +static bool +arcv_pair_fusion_mode_allowed_p (machine_mode mode, bool is_load) +{ + if (!TARGET_ARCV_RHX100) + return true; + + return ((is_load && (mode == SImode + || mode == HImode + || mode == QImode)) + || (!is_load && mode == SImode)); +} + +/* Return TRUE if two addresses can be fused. */ + +static bool +arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load) +{ + rtx base0, base1, tmp; + HOST_WIDE_INT off0 = 0, off1 = 0; + + if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND) + addr0 = XEXP (addr0, 0); + + if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND) + addr1 = XEXP (addr1, 0); + + if (!MEM_P (addr0) || !MEM_P (addr1)) + return false; + + /* Require the accesses to have the same mode. */ + if (GET_MODE (addr0) != GET_MODE (addr1)) + return false; + + /* Check if the mode is allowed. */ + if (!arcv_pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load)) + return false; + + rtx reg0 = XEXP (addr0, 0); + rtx reg1 = XEXP (addr1, 0); + + if (GET_CODE (reg0) == PLUS) + { + base0 = XEXP (reg0, 0); + tmp = XEXP (reg0, 1); + if (!CONST_INT_P (tmp)) + return false; + off0 = INTVAL (tmp); + } + else if (REG_P (reg0)) + base0 = reg0; + else + return false; + + if (GET_CODE (reg1) == PLUS) + { + base1 = XEXP (reg1, 0); + tmp = XEXP (reg1, 1); + if (!CONST_INT_P (tmp)) + return false; + off1 = INTVAL (tmp); + } + else if (REG_P (reg1)) + base1 = reg1; + else + return false; + + /* Check if we have the same base. */ + gcc_assert (REG_P (base0) && REG_P (base1)); + if (REGNO (base0) != REGNO (base1)) + return false; + + /* Fuse adjacent aligned addresses. */ + if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0) + && (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant ())) + return true; + + return false; +} + +/* Helper function to check if instruction type is arithmetic-like. */ + +static bool +arcv_arith_type_insn_p (rtx_insn *insn) +{ + enum attr_type type = get_attr_type (insn); + + return (type == TYPE_ARITH + || type == TYPE_LOGICAL + || type == TYPE_SHIFT + || type == TYPE_SLT + || type == TYPE_BITMANIP + || type == TYPE_MIN + || type == TYPE_MAX + || type == TYPE_MINU + || type == TYPE_MAXU + || type == TYPE_CLZ + || type == TYPE_CTZ); +} + +/* Helper to check if curr's source operand is valid for fusion. */ + +static bool +arcv_arith_src_p (rtx curr_set) +{ + rtx src = SET_SRC (curr_set); + + /* Immediate operand or register operand. */ + return CONST_INT_P (src) || REG_P (XEXP (src, 0)); +} + +/* Helper to check if curr operation is compatible with load's destination. */ + +static bool +arcv_load_arith_pair_p (rtx prev_set, rtx curr_set) +{ + rtx load_addr = XEXP (SET_SRC (prev_set), 0); + rtx load_dest = SET_DEST (prev_set); + rtx arith_src = XEXP (SET_SRC (curr_set), 0); + rtx arith_dest = SET_DEST (curr_set); + + /* Address register must be a register. */ + if (!REG_P (load_addr)) + return false; + + /* Address register must match first source operand of arithmetic op. */ + if (REGNO (load_addr) != REGNO (arith_src)) + return false; + + /* Address register must not be the load destination (no clobber). */ + if (REGNO (load_addr) == REGNO (load_dest)) + return false; + + /* Load and arithmetic destinations must be different. */ + if (REGNO (load_dest) == REGNO (arith_dest)) + return false; + + /* Check operand constraints for different arithmetic formats. */ + rtx src = SET_SRC (curr_set); + + /* Unary operation: (set (reg:X rd1) (not (reg:X rs1))). */ + if (GET_RTX_LENGTH (GET_CODE (src)) == 1) + return true; + + /* Immediate operation: (set (reg:X rd2) (op (reg:X rs1) (const_int))). */ + if (CONST_INT_P (XEXP (src, 1))) + return true; + + /* Binary register operation: ensure load dest != second source register. */ + if (REGNO (load_dest) != REGNO (XEXP (src, 1))) + return true; + + return false; +} + +/* Helper to check if curr operation is compatible with store's address. */ + +static bool +arcv_store_arith_pair_p (rtx prev_set, rtx curr_set) +{ + rtx store_addr = XEXP (SET_DEST (prev_set), 0); + rtx arith_src = XEXP (SET_SRC (curr_set), 0); + + /* Address register must be a register. */ + if (!REG_P (store_addr)) + return false; + + /* Address register must match first source operand of arithmetic op. */ + if (REGNO (store_addr) != REGNO (arith_src)) + return false; + + /* Check operand constraints for different arithmetic formats. */ + rtx src = SET_SRC (curr_set); + + /* Unary operation. */ + if (GET_RTX_LENGTH (GET_CODE (src)) == 1) + return true; + + /* Immediate operation. */ + if (CONST_INT_P (XEXP (src, 1))) + return true; + + /* Binary register operation: store addr == second source is OK. */ + if (REGNO (store_addr) == REGNO (XEXP (src, 1))) + return true; + + return false; +} + +/* Return true if PREV and CURR constitute an ordered load/store + op/opimm + pair, for the purposes of ARCV-specific macro-op fusion. */ +static bool +arcv_memop_arith_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + gcc_assert (prev_set); + gcc_assert (curr_set); + + /* Check if curr is an arithmetic-type instruction. */ + if (!arcv_arith_type_insn_p (curr)) + return false; + + /* Check if curr has valid source operands. */ + if (!arcv_arith_src_p (curr_set)) + return false; + + /* Check for load + arithmetic fusion. */ + if (get_attr_type (prev) == TYPE_LOAD) + return arcv_load_arith_pair_p (prev_set, curr_set); + + /* Check for store + arithmetic fusion. */ + if (get_attr_type (prev) == TYPE_STORE) + return arcv_store_arith_pair_p (prev_set, curr_set); + + return false; +} + + +/* Return true if PREV and CURR constitute an ordered load/store + lui pair, for + the purposes of ARCV-specific macro-op fusion. */ + +static bool +arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + gcc_assert (prev_set); + gcc_assert (curr_set); + + /* Check if curr is a LUI instruction: + - LUI via HIGH: (set (reg:X rd) (high (const_int))) + - LUI via immediate: (set (reg:X rd) (const_int UPPER_IMM_20)) */ + bool is_lui = (REG_P (curr) + && ((get_attr_type (curr) == TYPE_MOVE + && GET_CODE (SET_SRC (curr_set)) == HIGH) + || (CONST_INT_P (SET_SRC (curr_set)) + && LUI_OPERAND (INTVAL (SET_SRC (curr_set)))))); + + if (!is_lui) + return false; + + /* Check for load + LUI fusion: + Load and LUI destinations must be different to avoid hazard. */ + if (get_attr_type (prev) == TYPE_LOAD) + return REGNO (SET_DEST (prev_set)) != REGNO (SET_DEST (curr_set)); + + /* Check for store + LUI fusion (always allowed). */ + if (get_attr_type (prev) == TYPE_STORE) + return true; + + return false; +} + + +/* Return true if PREV and CURR should be kept together during scheduling. */ + +bool +arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) +{ + rtx prev_set = single_set (prev); + rtx curr_set = single_set (curr); + + /* Fuse multiply-add pair: + prev: (set rd_mult (mult rs1 rs2)) + curr: (set rd_add (plus rd_mult rs3)) */ + if (prev_set && curr_set + && GET_CODE (SET_SRC (prev_set)) == MULT + && GET_CODE (SET_SRC (curr_set)) == PLUS) + { + rtx curr_plus = SET_SRC (curr_set); + rtx mult_dest = SET_DEST (prev_set); + unsigned int mult_dest_regno = REGNO (mult_dest); + + /* Check if multiply result is used in either operand of the addition. */ + if (REG_P (XEXP (curr_plus, 0)) + && REGNO (XEXP (curr_plus, 0)) == mult_dest_regno) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MULT_ADD (op0)\n"); + return true; + } + + if (REG_P (XEXP (curr_plus, 1)) + && REGNO (XEXP (curr_plus, 1)) == mult_dest_regno) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MULT_ADD (op1)\n"); + return true; + } + } + + /* Fuse logical shift left with logical shift right (bit-extract pattern): + prev: (set rd (ashift rs imm1)) + curr: (set rd (lshiftrt rd imm2)) */ + if (prev_set && curr_set + && GET_CODE (SET_SRC (prev_set)) == ASHIFT + && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT + && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) + && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_SHIFT_BITEXTRACT\n"); + return true; + } + + /* Fuse load-immediate with a dependent conditional branch: + prev: (set rd imm) + curr: (if_then_else (cond rd ...) ...) */ + if (get_attr_type (prev) == TYPE_MOVE + && get_attr_move_type (prev) == MOVE_TYPE_CONST + && any_condjump_p (curr)) + { + if (!curr_set) + return false; + + rtx comp = XEXP (SET_SRC (curr_set), 0); + rtx prev_dest = SET_DEST (prev_set); + + if ((REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == prev_dest) + || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == prev_dest)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_LI_BRANCH\n"); + return true; + } + return false; + } + + /* Do not fuse loads/stores before sched2. */ + if (!reload_completed || sched_fusion) + return false; + + /* prev and curr are simple SET insns i.e. no flag setting or branching. */ + bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); + + /* Don't handle anything with a jump past this point. */ + if (!simple_sets_p) + return false; + + /* Fuse adjacent loads. */ + if (get_attr_type (prev) == TYPE_LOAD + && get_attr_type (curr) == TYPE_LOAD) + { + if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_ADJACENT_LOAD\n"); + return true; + } + } + + /* Fuse adjacent stores. */ + if (get_attr_type (prev) == TYPE_STORE + && get_attr_type (curr) == TYPE_STORE) + { + if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_ADJACENT_STORE\n"); + return true; + } + } + + /* Look ahead 1 insn to prioritize adjacent load/store pairs. + If curr and next form a better fusion opportunity, defer this fusion. */ + rtx_insn *next = next_insn (curr); + if (next) + { + rtx next_set = single_set (next); + + /* Defer if next instruction forms an adjacent load pair with curr. */ + if (next_set + && get_attr_type (curr) == TYPE_LOAD + && get_attr_type (next) == TYPE_LOAD + && arcv_fused_addr_p (SET_SRC (curr_set), SET_SRC (next_set), true)) + return false; + + /* Defer if next instruction forms an adjacent store pair with curr. */ + if (next_set + && get_attr_type (curr) == TYPE_STORE + && get_attr_type (next) == TYPE_STORE + && arcv_fused_addr_p (SET_DEST (curr_set), SET_DEST (next_set), false)) + return false; + } + + /* Fuse a pre- or post-update memory operation: + Examples: load+add, add+load, store+add, add+store. */ + if (arcv_memop_arith_pair_p (prev, curr)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_ARITH (prev, curr)\n"); + return true; + } + if (arcv_memop_arith_pair_p (curr, prev)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_ARITH (curr, prev)\n"); + return true; + } + + /* Fuse a memory operation preceded or followed by a LUI: + Examples: load+lui, lui+load, store+lui, lui+store. */ + if (arcv_memop_lui_pair_p (prev, curr)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_LUI (prev, curr)\n"); + return true; + } + if (arcv_memop_lui_pair_p (curr, prev)) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_MEMOP_LUI (curr, prev)\n"); + return true; + } + + /* Fuse load-immediate with a store of the destination register: + prev: (set rd imm) + curr: (set (mem ...) rd) */ + if (get_attr_type (prev) == TYPE_MOVE + && get_attr_move_type (prev) == MOVE_TYPE_CONST + && get_attr_type (curr) == TYPE_STORE) + { + rtx store_src = SET_SRC (curr_set); + rtx load_dest = SET_DEST (prev_set); + + if (REG_P (store_src) && store_src == load_dest) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_LI_STORE\n"); + return true; + } + + if (SUBREG_P (store_src) && SUBREG_REG (store_src) == load_dest) + { + if (dump_file) + fprintf (dump_file, "ARCV_FUSE_LI_STORE (subreg)\n"); + return true; + } + } + + return false; +} + +/* Initialize ARCV scheduler state at the beginning of scheduling. */ + +void +arcv_sched_init (void) +{ + sched_state.last_scheduled_insn = 0; +} + +/* Try to reorder ready queue to promote ARCV fusion opportunities. + Returns the number of instructions that can be issued this cycle. */ + +int +arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) +{ + if (sched_fusion) + return sched_state.cached_can_issue_more; + + if (!sched_state.cached_can_issue_more) + return 0; + + /* Fuse double load/store instances missed by sched_fusion. */ + if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn + && ready && *n_readyp > 0 + && !SCHED_GROUP_P (sched_state.last_scheduled_insn) + && (get_attr_type (sched_state.last_scheduled_insn) == TYPE_LOAD + || get_attr_type (sched_state.last_scheduled_insn) == TYPE_STORE)) + { + for (int i = 1; i <= *n_readyp; i++) + { + if (NONDEBUG_INSN_P (ready[*n_readyp - i]) + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && (!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) + && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, + ready[*n_readyp - i])) + { + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; + sched_state.pipeB_scheduled_p = 1; + return sched_state.cached_can_issue_more; + } + } + sched_state.pipeB_scheduled_p = 1; + } + + /* Try to fuse a non-memory last_scheduled_insn. */ + if ((!sched_state.alu_pipe_scheduled_p || !sched_state.pipeB_scheduled_p) + && sched_state.last_scheduled_insn && ready && *n_readyp > 0 + && !SCHED_GROUP_P (sched_state.last_scheduled_insn) + && (get_attr_type (sched_state.last_scheduled_insn) != TYPE_LOAD + && get_attr_type (sched_state.last_scheduled_insn) != TYPE_STORE)) + { + for (int i = 1; i <= *n_readyp; i++) + { + if (NONDEBUG_INSN_P (ready[*n_readyp - i]) + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && (!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))) + && arcv_macro_fusion_pair_p (sched_state.last_scheduled_insn, + ready[*n_readyp - i])) + { + if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE) + if (sched_state.pipeB_scheduled_p) + continue; + else + sched_state.pipeB_scheduled_p = 1; + else if (!sched_state.alu_pipe_scheduled_p) + sched_state.alu_pipe_scheduled_p = 1; + else + sched_state.pipeB_scheduled_p = 1; + + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + SCHED_GROUP_P (ready[*n_readyp - 1]) = 1; + return sched_state.cached_can_issue_more; + } + } + sched_state.alu_pipe_scheduled_p = 1; + } + + /* When pipe B is scheduled, we can have no more memops this cycle. */ + if (sched_state.pipeB_scheduled_p && *n_readyp > 0 + && NONDEBUG_INSN_P (ready[*n_readyp - 1]) + && recog_memoized (ready[*n_readyp - 1]) >= 0 + && !SCHED_GROUP_P (ready[*n_readyp - 1]) + && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD + || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + { + if (sched_state.alu_pipe_scheduled_p) + return 0; + + for (int i = 2; i <= *n_readyp; i++) + { + if ((NONDEBUG_INSN_P (ready[*n_readyp - i]) + && recog_memoized (ready[*n_readyp - i]) >= 0 + && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD + && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE + && !SCHED_GROUP_P (ready[*n_readyp - i]) + && ((!next_insn (ready[*n_readyp - i]) + || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i]))))) + || ((next_insn (ready[*n_readyp - i]) + && NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i])) + && recog_memoized (next_insn (ready[*n_readyp - i])) >= 0 + && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD + && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE))) + { + std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); + sched_state.alu_pipe_scheduled_p = 1; + sched_state.cached_can_issue_more = 1; + return 1; + } + } + return 0; + } + + /* If all else fails, schedule a single instruction. */ + if (ready && *n_readyp > 0 + && NONDEBUG_INSN_P (ready[*n_readyp - 1]) + && recog_memoized (ready[*n_readyp - 1]) >= 0) + { + rtx_insn *insn = ready[*n_readyp - 1]; + enum attr_type insn_type = get_attr_type (insn); + + /* Memory operations go to pipeB if available. */ + if (!sched_state.pipeB_scheduled_p + && (insn_type == TYPE_LOAD || insn_type == TYPE_STORE)) + { + sched_state.pipeB_scheduled_p = 1; + sched_state.cached_can_issue_more = 1; + return 1; + } + /* Non-memory operations go to ALU pipe. */ + else if (insn_type != TYPE_LOAD && insn_type != TYPE_STORE) + { + sched_state.alu_pipe_scheduled_p = 1; + sched_state.cached_can_issue_more = 1; + return 1; + } + } + + return sched_state.cached_can_issue_more; +} + +int +arcv_sched_adjust_priority (rtx_insn *insn, int priority) +{ + if (!TARGET_ARCV_RHX100) + return priority; + + if (DEBUG_INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return priority; + + /* Bump the priority of fused load-store pairs for easier + scheduling of the memory pipe. The specific increase + value is determined empirically. */ + if (next_insn (insn) && INSN_P (next_insn (insn)) + && SCHED_GROUP_P (next_insn (insn)) + && ((get_attr_type (insn) == TYPE_STORE + && get_attr_type (next_insn (insn)) == TYPE_STORE) + || (get_attr_type (insn) == TYPE_LOAD + && get_attr_type (next_insn (insn)) == TYPE_LOAD))) + return priority + 1; + + return priority; +} + +/* Adjust scheduling cost for ARCV fusion. */ + +int +arcv_sched_adjust_cost (rtx_insn *insn, int dep_type, int cost) +{ + if (dep_type == REG_DEP_ANTI && !SCHED_GROUP_P (insn)) + return cost + 1; + + return cost; +} + +/* If INSN is a load or store of address in the form of [base+offset], + extract the two parts and set to BASE and OFFSET. IS_LOAD is set + to TRUE if it's a load. Return TRUE if INSN is such an instruction, + otherwise return FALSE. */ + +static bool +arcv_fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode, + bool *is_load) +{ + rtx x, dest, src; + + gcc_assert (INSN_P (insn)); + x = PATTERN (insn); + if (GET_CODE (x) != SET) + return false; + + src = SET_SRC (x); + dest = SET_DEST (x); + + if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND) + && MEM_P (XEXP (src, 0))) + src = XEXP (src, 0); + + if (REG_P (src) && MEM_P (dest)) + { + *is_load = false; + if (extract_base_offset_in_addr (dest, base, offset)) + *mode = GET_MODE (dest); + } + else if (MEM_P (src) && REG_P (dest)) + { + *is_load = true; + if (extract_base_offset_in_addr (src, base, offset)) + *mode = GET_MODE (src); + } + else + return false; + + return (*base != NULL_RTX && *offset != NULL_RTX); +} + +void +arcv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, + int *pri) +{ + rtx base, offset; + machine_mode mode = SImode; + bool is_load; + + gcc_assert (INSN_P (insn)); + + /* Default priority for non-fusible instructions. */ + int default_pri = max_pri - 1; + + /* Check if this is a fusible load/store instruction. */ + if (!arcv_fusion_load_store (insn, &base, &offset, &mode, &is_load) + || !arcv_pair_fusion_mode_allowed_p (mode, is_load)) + { + *pri = default_pri; + *fusion_pri = default_pri; + return; + } + + /* Start with half the default priority to distinguish fusible from + non-fusible instructions. */ + int priority = default_pri / 2; + + /* Scale priority by access width - narrower accesses get lower priority. + HImode: divide by 2, QImode: divide by 4. This encourages wider + accesses to be scheduled together. */ + if (mode == HImode) + priority /= 2; + else if (mode == QImode) + priority /= 4; + + /* Factor in base register: instructions with smaller register numbers + get higher priority. The shift by 20 bits ensures this is the most + significant component of the priority. */ + const int BASE_REG_SHIFT = 20; + const int BASE_REG_MASK = 0xff; + priority -= ((REGNO (base) & BASE_REG_MASK) << BASE_REG_SHIFT); + + /* Calculate fusion priority: group loads/stores with adjacent addresses + into the same scheduling group. We divide the offset by (mode_size * 2) + to group pairs of adjacent accesses, then shift left by 1 to make room + for the load/store bit. */ + int off_val = (int)(INTVAL (offset)); + int addr_group = off_val / (GET_MODE_SIZE (mode).to_constant () * 2); + *fusion_pri = priority - (addr_group << 1) + is_load; + + /* Factor in the actual offset value: instructions with smaller offsets + get higher priority. We use only the lower 20 bits to avoid overflow. */ + const int OFFSET_MASK = 0xfffff; + if (off_val >= 0) + priority -= (off_val & OFFSET_MASK); + else + priority += ((-off_val) & OFFSET_MASK); + + *pri = priority; +} + + +bool +arcv_can_issue_more_p (int more) +{ + /* Beginning of cycle - reset variables. */ + if (more == riscv_get_tune_param_issue_rate ()) + { + sched_state.alu_pipe_scheduled_p = 0; + sched_state.pipeB_scheduled_p = 0; + } + + if (sched_state.alu_pipe_scheduled_p && sched_state.pipeB_scheduled_p) + { + sched_state.cached_can_issue_more = 0; + return false; + } + + sched_state.cached_can_issue_more = more; + + return true; +} + +int +arcv_sched_variable_issue (rtx_insn *insn, int more) +{ + if (next_insn (insn) && INSN_P (next_insn (insn)) + && SCHED_GROUP_P (next_insn (insn))) + { + if (get_attr_type (insn) == TYPE_LOAD + || get_attr_type (insn) == TYPE_STORE + || get_attr_type (next_insn (insn)) == TYPE_LOAD + || get_attr_type (next_insn (insn)) == TYPE_STORE) + sched_state.pipeB_scheduled_p = 1; + else + sched_state.alu_pipe_scheduled_p = 1; + } + + if (get_attr_type (insn) == TYPE_ALU_FUSED + || get_attr_type (insn) == TYPE_IMUL_FUSED) + { + sched_state.alu_pipe_scheduled_p = 1; + more -= 1; + } + + sched_state.last_scheduled_insn = insn; + sched_state.cached_can_issue_more = more - 1; + + return sched_state.cached_can_issue_more; +} diff --git a/gcc/config/riscv/arcv.h b/gcc/config/riscv/arcv.h new file mode 100644 index 000000000000..f83d08d75732 --- /dev/null +++ b/gcc/config/riscv/arcv.h @@ -0,0 +1,34 @@ +/* Definition of Synopsys ARC-V processors. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_RISCV_ARCV_H +#define GCC_RISCV_ARCV_H + +/* ARCV scheduler interface functions. */ +extern bool arcv_can_issue_more_p (int); +extern int arcv_sched_variable_issue (rtx_insn *, int); +extern bool arcv_macro_fusion_pair_p (rtx_insn *, rtx_insn *); +extern void arcv_sched_init (void); +extern int arcv_sched_reorder2 (rtx_insn **, int *); +extern int arcv_sched_adjust_priority (rtx_insn *, int); +extern int arcv_sched_adjust_cost (rtx_insn *, int, int); +extern void arcv_sched_fusion_priority (rtx_insn *, int, int *, int *); + +#endif /* GCC_RISCV_ARCV_H */ + diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index 35de17f76cd9..df979031cd88 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -218,6 +218,8 @@ (zero_extract "srliw")]) (define_code_attr extract_shift [(sign_extract "ashiftrt") (zero_extract "lshiftrt")]) +(define_code_attr is_zero_extract [(sign_extract "false") + (zero_extract "true")]) ;; This code iterator allows the two right shift instructions to be ;; generated from the same template. diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index cc9d5c03cb8c..3b5da61d0bc9 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -50,6 +50,8 @@ RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info) RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info) +RISCV_TUNE("arc-v-rmx-100-series", arcv_rmx100, arcv_rmx100_tune_info) +RISCV_TUNE("arc-v-rhx-100-series", arcv_rhx100, arcv_rhx100_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 4e4e9d8930e2..632d426503be 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -61,6 +61,8 @@ enum riscv_microarchitecture_type { generic_ooo, mips_p8700, tt_ascalon_d8, + arcv_rmx100, + arcv_rhx100, }; extern enum riscv_microarchitecture_type riscv_microarchitecture; @@ -85,6 +87,13 @@ enum rvv_max_lmul_enum { RVV_DYNAMIC = 9 }; +/* ARC-V multiply option. */ +enum arcv_mpy_option_enum { + ARCV_MPY_OPTION_1C = 1, + ARCV_MPY_OPTION_2C = 2, + ARCV_MPY_OPTION_10C = 8, +}; + enum riscv_multilib_select_kind { /* Select multilib by builtin way. */ select_by_builtin, diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index cdb706ab82ac..88bc33cd58fd 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -165,6 +165,9 @@ extern bool riscv_epilogue_uses (unsigned int); extern bool riscv_can_use_return_insn (void); extern rtx riscv_function_value (const_tree, const_tree, enum machine_mode); extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_1c_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_2c_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_10c_bypass_p (rtx_insn *, rtx_insn *); extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); extern void riscv_reinit (void); @@ -818,6 +821,7 @@ extern unsigned int th_int_get_mask (unsigned int); extern unsigned int th_int_get_save_adjustment (void); extern rtx th_int_adjust_cfi_prologue (unsigned int); extern const char *th_asm_output_opcode (FILE *asm_out_file, const char *p); + #ifdef RTX_CODE extern const char* th_mempair_output_move (rtx[4], bool, machine_mode, RTX_CODE); @@ -851,6 +855,9 @@ extern rtx riscv_prefetch_cookie (rtx, rtx); extern bool riscv_prefetch_offset_address_p (rtx, machine_mode); struct riscv_tune_param; + +extern int riscv_get_tune_param_issue_rate (void); + /* Information about one micro-arch we know about. */ struct riscv_tune_info { /* This micro-arch canonical name. */ diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 63404d3d5143..fb5551cf6f16 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. If not see #include "target-def.h" #include "riscv-vector-costs.h" #include "riscv-subset.h" +#include "arcv.h" /* Target variants that support full conditional move. */ #define TARGET_COND_MOV \ @@ -291,6 +292,7 @@ enum riscv_fusion_pairs RISCV_FUSE_BFEXT = (1 << 11), RISCV_FUSE_EXPANDED_LD = (1 << 12), RISCV_FUSE_B_ALUI = (1 << 13), + RISCV_FUSE_ARCV = (1 << 14), }; /* Costs of various operations on the different architectures. */ @@ -339,6 +341,9 @@ unsigned riscv_stack_boundary; /* Whether in riscv_output_mi_thunk. */ static bool riscv_in_thunk_func = false; +/* Forward declaration for scheduler functions. */ +static bool riscv_fusion_enabled_p (enum riscv_fusion_pairs); + /* If non-zero, this is an offset to be added to SP to redefine the CFA when restoring the FP register from the stack. Only valid when generating the epilogue. */ @@ -685,6 +690,54 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = { true, /* prefer-agnostic. */ }; +/* Costs to use when optimizing for Synopsys RMX-100. */ +static const struct riscv_tune_param arcv_rmx100_tune_info = { + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_mul */ + {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* int_div */ + 1, /* issue_rate */ + 4, /* branch_cost */ + 2, /* memory_cost */ + 4, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ +}; + +/* Costs to use when optimizing for Synopsys RHX-100. */ +static const struct riscv_tune_param arcv_rhx100_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ + {COSTS_N_INSNS (27), COSTS_N_INSNS (43)}, /* int_div */ + 4, /* issue_rate */ + 9, /* branch_cost */ + 2, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_ARCV, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ +}; + /* Costs to use when optimizing for size. */ static const struct riscv_tune_param optimize_size_tune_info = { {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */ @@ -854,6 +907,12 @@ typedef enum typedef insn_code (*code_for_push_pop_t) (machine_mode); +int +riscv_get_tune_param_issue_rate (void) +{ + return tune_param->issue_rate; +} + void riscv_frame_info::reset(void) { total_size = 0; @@ -4282,7 +4341,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN } gcc_fallthrough (); case SIGN_EXTRACT: - if (TARGET_XTHEADBB && outer_code == SET + if ((TARGET_ARCV_RHX100 || TARGET_XTHEADBB) + && outer_code == SET && CONST_INT_P (XEXP (x, 1)) && CONST_INT_P (XEXP (x, 2))) { @@ -10580,6 +10640,11 @@ riscv_issue_rate (void) static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) { + + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) + if (!arcv_can_issue_more_p (more)) + return 0; + if (DEBUG_INSN_P (insn)) return more; @@ -10600,6 +10665,9 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) + return arcv_sched_variable_issue (insn, more); + return more - 1; } @@ -11327,9 +11395,27 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) } } + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) + return arcv_macro_fusion_pair_p (prev, curr); + return false; } +static void +riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri, + int *pri) +{ + if (TARGET_ARCV_RHX100) + { + arcv_sched_fusion_priority (insn, max_pri, fusion_pri, pri); + return; + } + + /* Default priority. */ + *pri = max_pri - 1; + *fusion_pri = max_pri - 1; +} + /* Adjust the cost/latency of instructions for scheduling. For now this is just used to change the latency of vector instructions according to their LMUL. We assume that an insn with LMUL == 8 requires @@ -11338,17 +11424,21 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) we currently only perform the adjustment when -madjust-lmul-cost is given. */ static int -riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost, - unsigned int) +riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, + int cost, unsigned int) { + /* Use ARCV-specific cost adjustment for RHX-100. */ + if (TARGET_ARCV_RHX100) + return arcv_sched_adjust_cost (insn, dep_type, cost); + /* Only do adjustments for the generic out-of-order scheduling model. */ if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo) return cost; - if (recog_memoized (insn) < 0) + if (recog_memoized (dep_insn) < 0) return cost; - enum attr_type type = get_attr_type (insn); + enum attr_type type = get_attr_type (dep_insn); if (type == TYPE_VFREDO || type == TYPE_VFWREDO) { @@ -11420,6 +11510,43 @@ riscv_sched_can_speculate_insn (rtx_insn *insn) } } +/* Implement TARGET_SCHED_ADJUST_PRIORITY hook. */ + +static int +riscv_sched_adjust_priority (rtx_insn *insn, int priority) +{ + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) + return arcv_sched_adjust_priority (insn, priority); + + return priority; +} + +/* Implement TARGET_SCHED_INIT hook. */ + +static void +riscv_sched_init (FILE *file ATTRIBUTE_UNUSED, + int verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) + arcv_sched_init (); +} + +/* Implement TARGET_SCHED_REORDER2 hook. */ + +static int +riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED, + int verbose ATTRIBUTE_UNUSED, + rtx_insn **ready, + int *n_readyp, + int clock ATTRIBUTE_UNUSED) +{ + if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV)) + return arcv_sched_reorder2 (ready, n_readyp); + + return 0; +} + /* Auxiliary function to emit RISC-V ELF attribute. */ static void riscv_emit_attribute () @@ -16011,6 +16138,9 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_MACRO_FUSION_PAIR_P #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p +#undef TARGET_SCHED_FUSION_PRIORITY +#define TARGET_SCHED_FUSION_PRIORITY riscv_sched_fusion_priority + #undef TARGET_SCHED_VARIABLE_ISSUE #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue @@ -16020,6 +16150,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_CAN_SPECULATE_INSN #define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn +#undef TARGET_SCHED_ADJUST_PRIORITY +#define TARGET_SCHED_ADJUST_PRIORITY riscv_sched_adjust_priority + +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 riscv_sched_reorder2 + +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT riscv_sched_init + #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index a0ad75c765a1..f2910877544b 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -971,6 +971,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use); || (riscv_microarchitecture == sifive_p400) \ || (riscv_microarchitecture == sifive_p600)) +/* True if the target is ARC-V RHX100. */ +#define TARGET_ARCV_RHX100 \ + (riscv_microarchitecture == arcv_rhx100) + /* True if the target supports misaligned vector loads and stores. */ #define TARGET_VECTOR_MISALIGN_SUPPORTED \ riscv_vector_unaligned_access_p diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 640ca5f9b0ea..f52f4eabd6ef 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -518,7 +518,7 @@ vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down, vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, - vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16, + vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,imul_fused,alu_fused, sf_vc,sf_vc_se" (cond [(eq_attr "got" "load") (const_string "load") @@ -672,7 +672,7 @@ ;; Microarchitectures we know how to tune for. ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8" + "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100,arcv_rhx100" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. @@ -3100,6 +3100,7 @@ ;; * Single-bit extraction (SFB) ;; * Extraction instruction th.ext(u) (XTheadBb) ;; * lshrsi3_extend_2 (see above) +;; * Zero extraction fusion (ARC-V) (define_insn_and_split "*3" [(set (match_operand:GPR 0 "register_operand" "=r") (any_extract:GPR @@ -3112,6 +3113,8 @@ && (INTVAL (operands[2]) == 1)) && !TARGET_XTHEADBB && !TARGET_XANDESPERF + && !(TARGET_ARCV_RHX100 + && ) && !(TARGET_64BIT && (INTVAL (operands[3]) > 0) && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))" @@ -4501,7 +4504,63 @@ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand")) (sign_extend:SI (match_operand:HI 2 "register_operand"))) (match_operand:SI 3 "register_operand")))] - "TARGET_XTHEADMAC" + "TARGET_XTHEADMAC || (TARGET_ARCV_RHX100 + && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL))" + { + if (TARGET_ARCV_RHX100) + { + rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); + emit_insn (gen_extendhisi2 (tmp0, operands[1])); + emit_insn (gen_extendhisi2 (tmp1, operands[2])); + + if (TARGET_64BIT) + { + rtx op0 = gen_reg_rtx (DImode); + emit_insn (gen_madd_split_fused_extended (op0, tmp0, tmp1, operands[3])); + op0 = gen_lowpart (SImode, op0); + SUBREG_PROMOTED_VAR_P (op0) = 1; + SUBREG_PROMOTED_SET (op0, SRP_SIGNED); + emit_move_insn (operands[0], op0); + } + else + { + emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); + } + + DONE; + } + } +) + +(define_expand "umaddhisi4" + [(set (match_operand:SI 0 "register_operand") + (plus:SI + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand")) + (zero_extend:SI (match_operand:HI 2 "register_operand"))) + (match_operand:SI 3 "register_operand")))] + "TARGET_ARCV_RHX100 + && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" + { + rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode); + emit_insn (gen_zero_extendhisi2 (tmp0, operands[1])); + emit_insn (gen_zero_extendhisi2 (tmp1, operands[2])); + + if (TARGET_64BIT) + { + rtx op0 = gen_reg_rtx (DImode); + emit_insn (gen_madd_split_fused_extended (op0, tmp0, tmp1, operands[3])); + op0 = gen_lowpart (SImode, op0); + SUBREG_PROMOTED_VAR_P (op0) = 1; + SUBREG_PROMOTED_SET (op0, SRP_SIGNED); + emit_move_insn (operands[0], op0); + } + else + { + emit_insn (gen_madd_split_fused (operands[0], tmp0, tmp1, operands[3])); + } + + DONE; + } ) (define_expand "msubhisi4" @@ -4513,6 +4572,68 @@ "TARGET_XTHEADMAC" ) +(define_insn "madd_split_fused" + [(set (match_operand:SI 0 "register_operand" "=&r,r") + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "register_operand" "r,r")) + (match_operand:SI 3 "register_operand" "r,?0"))) + (clobber (match_scratch:SI 4 "=&r,&r"))] + "TARGET_ARCV_RHX100 + && !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)" + { + if (REGNO (operands[0]) == REGNO (operands[3])) + { + return "mul\t%4,%1,%2\n\tadd\t%4,%3,%4\n\tmv\t%0,%4"; + } + else + { + return "mul\t%0,%1,%2\n\tadd\t%0,%0,%3"; + } + } + [(set_attr "type" "imul_fused")] +) + +(define_insn "madd_split_fused_extended" + [(set (match_operand:DI 0 "register_operand" "=&r,r") + (sign_extend:DI + (plus:SI + (mult:SI (match_operand:SI 1 "register_operand" "r,r") + (match_operand:SI 2 "register_operand" "r,r")) + (match_operand:SI 3 "register_operand" "r,?0")))) + (clobber (match_scratch:SI 4 "=&r,&r"))] + "TARGET_ARCV_RHX100 + && (TARGET_ZMMUL || TARGET_MUL)" + { + if (REGNO (operands[0]) == REGNO (operands[3])) + { + return "mulw\t%4,%1,%2\n\taddw\t%4,%3,%4\n\tmv\t%0,%4"; + } + else + { + return "mulw\t%0,%1,%2\n\taddw\t%0,%0,%3"; + } + } + [(set_attr "type" "imul_fused")] +) + +(define_insn "*zero_extract_fused" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand")))] + "TARGET_ARCV_RHX100 && !TARGET_64BIT + && (INTVAL (operands[2]) > 1 || !TARGET_ZBS)" + { + int amount = INTVAL (operands[2]); + int end = INTVAL (operands[3]) + amount; + operands[2] = GEN_INT (BITS_PER_WORD - end); + operands[3] = GEN_INT (BITS_PER_WORD - amount); + return "slli\t%0,%1,%2\n\tsrli\t%0,%0,%3"; + } + [(set_attr "type" "alu_fused")] +) + ;; String compare with length insn. ;; Argument 0 is the target (result) ;; Argument 1 is the source1 @@ -4966,3 +5087,5 @@ (include "generic-vector-ooo.md") (include "generic-ooo.md") (include "tt-ascalon-d8.md") +(include "arcv-rmx100.md") +(include "arcv-rhx100.md") diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 6543fd1c4a72..663acf62dac4 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -396,3 +396,20 @@ Specifies whether the fence.tso instruction should be used. mautovec-segment Target Integer Var(riscv_mautovec_segment) Init(1) Enable (default) or disable generation of vector segment load/store instructions. + +Enum +Name(arcv_mpy_option) Type(enum arcv_mpy_option_enum) +Valid arguments to -param=arcv_mpy_option=: + +EnumValue +Enum(arcv_mpy_option) String(1c) Value(ARCV_MPY_OPTION_1C) + +EnumValue +Enum(arcv_mpy_option) String(2c) Value(ARCV_MPY_OPTION_2C) + +EnumValue +Enum(arcv_mpy_option) String(10c) Value(ARCV_MPY_OPTION_10C) + +-param=arcv-mpy-option= +Target RejectNegative Joined Enum(arcv_mpy_option) Var(arcv_mpy_option) Init(ARCV_MPY_OPTION_2C) +The type of MPY unit used by the RMX-100 core (to be used in combination with -mtune=rmx100) (default: 2c). diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index b53a2dff2cf7..1a25ef54bdb9 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -181,6 +181,16 @@ riscv-zicfilp.o: $(srcdir)/config/riscv/riscv-zicfilp.cc \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-zicfilp.cc +arcv.o: $(srcdir)/config/riscv/arcv.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(BACKEND_H) $(TARGET_H) $(RTL_H) \ + $(TREE_H) memmodel.h $(TM_H) $(OPTABS_H) $(REGS_H) $(EMIT_RTL_H) \ + $(RECOG_H) $(DIAGNOSTIC_CORE_H) stor-layout.h $(ALIAS_H) fold-const.h \ + output.h $(INSN_ATTR_H) $(FLAGS_H) explow.h $(CALLS_H) varasm.h \ + $(EXPR_H) tm-constrs.h $(TM_P_H) $(DF_H) reload.h sched-int.h \ + $(srcdir)/config/riscv/arcv.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/riscv/arcv.cc + PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \ diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi index a2a4d3e77dbb..8ffb3db906fe 100644 --- a/gcc/doc/riscv-mtune.texi +++ b/gcc/doc/riscv-mtune.texi @@ -50,6 +50,10 @@ particular CPU name. Permissible values for this option are: @samp{xiangshan-kunminghu}, +@samp{arc-v-rmx-100-series}, + +@samp{arc-v-rhx-100-series}, + @samp{generic-ooo}, @samp{size}, diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c new file mode 100644 index 000000000000..cc2a56a2e086 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-limm-condbr.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=arc-v-rhx-100-series" } */ + +int +f (int x) +{ + begin: + if (x <= 3) + goto begin; +} + +/* { dg-final { scan-assembler "\\sli\\sa5,3\n\\sble\\sa0,a5,.L\[0-9\]+\n" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c new file mode 100644 index 000000000000..eb8665f576c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-madd.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv32 } */ +/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */ +/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im -mabi=ilp32" } */ + +int +f (int x, int y, int z, int v, int w) +{ + return x + y * z + v * w; +} + +/* { dg-final { scan-assembler {\smul\s([ast][0-9]+),a1,a2\n\sadd\s\1,\1,a0\n\smul\sa0,a3,a4\n\sadd\sa0,a0,\1\n} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c new file mode 100644 index 000000000000..7abf54ec1448 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arcv-fusion-xbfu.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv32 } */ +/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-Oz" "-Os" } } */ +/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im_zbs -mabi=ilp32 -dp" } */ + +#define bit_extract(x,start,amt) (((x)>>(start)) & (~(0xffffffff << (amt)))) + +int +f (int x) +{ + return bit_extract(x,10,14) + bit_extract(x,1,1); +} + +/* { dg-final { scan-assembler {\sslli\s([ast][0-9]+),a0,8.*zero_extract_fused\n\ssrli\s([ast][0-9]+),\1,18\n\sbexti\sa0,a0,1.*\n\sadd\sa0,\2,a0.*\n} } } */