Skip to content

Commit aee1f34

Browse files
committed
arcv: apex: Add LTO support for APEX intrinsics.
APEX (ARC Processor Extension) intrinsics are registered dynamically via #pragma intrinsic directives rather than being statically defined. This creates a challenge for LTO where intrinsic definitions from different translation units must be preserved and made available during link-time optimization. This patch implements LTO serialization for APEX intrinsics by: 1. Creating a dedicated .gnu.lto_riscv_apex section to store APEX intrinsic metadata (name, mnemonic, opcode, instruction formats) 2. Writing all registered APEX intrinsics during the compilation phase 3. Reading and re-registering all APEX intrinsics during the LTO phase 4. Integrating with the LTO streamer infrastructure Without this support, LTO would lose APEX intrinsic definitions, causing "unavailable intrinsics" errors during link-time optimization. Signed-off-by: Luis Silva <[email protected]>
1 parent 37970bf commit aee1f34

File tree

9 files changed

+443
-1
lines changed

9 files changed

+443
-1
lines changed

gcc/config.gcc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ riscv*)
557557
extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o"
558558
extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o riscv-avlprop.o"
559559
extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o sifive-vector-builtins-bases.o"
560-
extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o"
560+
extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-apex-lto.o"
561561
d_target_objs="riscv-d.o"
562562
extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h riscv_th_vector.h sifive_vector.h"
563563
target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc"

gcc/config/riscv/riscv-apex-lto.cc

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
/* LTO serialization for RISC-V APEX intrinsics.
2+
Copyright (C) 2025 Free Software Foundation, Inc.
3+
4+
This file is part of GCC.
5+
6+
GCC is free software; you can redistribute it and/or modify it under
7+
the terms of the GNU General Public License as published by the Free
8+
Software Foundation; either version 3, or (at your option) any later
9+
version.
10+
11+
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12+
WARRANTY; without even the implied warranty of MERCHANTABILITY or
13+
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14+
for more details.
15+
16+
You should have received a copy of the GNU General Public License
17+
along with GCC; see the file COPYING3. If not see
18+
<http://www.gnu.org/licenses/>. */
19+
20+
/* RISC-V APEX (ARC Processor Extension) intrinsics are unique in GCC
21+
because they are registered dynamically at compile-time via #pragma intrinsic
22+
directives, rather than being statically defined like normal target builtins.
23+
24+
This creates a challenge for LTO (Link Time Optimization): when compiling
25+
with -flto, each translation unit may register different APEX intrinsics via
26+
pragmas. During the link-time optimization phase, all these intrinsic
27+
definitions must be preserved and made available for code generation.
28+
29+
This file implements LTO serialization support for APEX intrinsics by:
30+
31+
1. Writing Phase (produce_asm_for_decls):
32+
- Iterates through all registered APEX intrinsics
33+
- Serializes their metadata (name, mnemonic, opcode, instruction formats)
34+
- Writes to a dedicated .gnu.lto_riscv_apex section in object files
35+
36+
2. Reading Phase (read_cgraph_and_symbols):
37+
- Reads .gnu.lto_riscv_apex sections from all input object files
38+
- Reconstructs and re-registers all APEX intrinsics
39+
- Makes them available for optimization and code generation
40+
41+
Without this support, LTO would lose APEX intrinsic definitions, causing
42+
unavailable intrinsics errors during link-time optimization. */
43+
44+
#include "config.h"
45+
#include "system.h"
46+
#include "coretypes.h"
47+
#include "backend.h"
48+
#include "tree.h"
49+
#include "gimple.h"
50+
#include "cgraph.h"
51+
#include "lto-streamer.h"
52+
#include "ipa-utils.h"
53+
#include "data-streamer.h"
54+
#include "stringpool.h"
55+
#include "attribs.h"
56+
57+
/* Declarations from riscv-builtins.cc for accessing
58+
APEX builtin information. */
59+
extern int arcv_apex_get_builtin_count (void);
60+
extern void arcv_apex_get_builtin_info (int, const char **, const char **,
61+
unsigned int *, unsigned int *);
62+
extern void arcv_apex_lto_register_builtin (const char *, const char *,
63+
unsigned int, unsigned int, bool,
64+
tree);
65+
extern const char *arcv_apex_get_fn_name (unsigned int);
66+
67+
/* Write RISC-V APEX intrinsic information to the LTO bytecode stream.
68+
69+
This function is called during the compilation phase when producing LTO
70+
bytecode. It serializes all APEX intrinsics that were registered via
71+
#pragma directives in the current translation unit.
72+
73+
The serialization format for each intrinsic is:
74+
- Function name length (uhwi)
75+
- Function name characters
76+
- Instruction name length (uhwi)
77+
- Instruction name characters
78+
- Opcode (uhwi)
79+
- Instruction format flags (uhwi) */
80+
81+
void
82+
arcv_apex_lto_write_section (void)
83+
{
84+
/* Get the number of registered APEX builtins in this compilation unit. */
85+
int apex_count = arcv_apex_get_builtin_count ();
86+
87+
/* If no APEX builtins were registered via pragmas, skip section creation.
88+
This is common for translation units that don't use APEX intrinsics. */
89+
if (apex_count == 0)
90+
return;
91+
92+
/* Collect indices of intrinsics that are actually used and not optimized
93+
away. Use an auto_vec to avoid manual memory management. */
94+
auto_vec<int> used_indices;
95+
for (int i = 0; i < apex_count; i++)
96+
{
97+
const char *fn_name = arcv_apex_get_fn_name (i);
98+
gcc_assert (fn_name);
99+
100+
/* Check if the intrinsic is still referenced in the program. */
101+
symtab_node *snode = symtab_node::get_for_asmname (
102+
get_identifier (fn_name));
103+
104+
/* Only keep intrinsics that exist and are actually used.
105+
Check if the symbol is referred to anywhere in the program. */
106+
if (snode && snode->referred_to_p ())
107+
used_indices.safe_push (i);
108+
}
109+
110+
/* If all intrinsics were optimized away, skip section creation. */
111+
if (used_indices.is_empty ())
112+
return;
113+
114+
/* Create a new LTO section for APEX intrinsics. */
115+
struct lto_simple_output_block *ob
116+
= lto_create_simple_output_block (LTO_section_riscv_apex);
117+
118+
if (!ob)
119+
return;
120+
121+
/* Write the number of used APEX builtins so the reader knows
122+
how many to expect. */
123+
streamer_write_uhwi_stream (ob->main_stream, used_indices.length ());
124+
125+
/* Serialize only the intrinsics that are still used. */
126+
for (unsigned int idx = 0; idx < used_indices.length (); idx++)
127+
{
128+
int i = used_indices[idx];
129+
const char *fn_name = NULL;
130+
const char *insn_name = NULL;
131+
unsigned int opcode = 0;
132+
unsigned int insn_formats = 0;
133+
134+
/* Get builtin information from the registry. */
135+
arcv_apex_get_builtin_info (i, &fn_name, &insn_name,
136+
&opcode, &insn_formats);
137+
138+
/* Function and instruction names must exist. */
139+
gcc_assert (fn_name && insn_name);
140+
141+
/* Write function name as length-prefixed string. */
142+
size_t name_len = strlen (fn_name);
143+
streamer_write_uhwi_stream (ob->main_stream, name_len);
144+
for (size_t j = 0; j < name_len; j++)
145+
streamer_write_char_stream (ob->main_stream, fn_name[j]);
146+
147+
/* Write instruction name as length-prefixed string. */
148+
size_t insn_name_len = strlen (insn_name);
149+
streamer_write_uhwi_stream (ob->main_stream, insn_name_len);
150+
for (size_t j = 0; j < insn_name_len; j++)
151+
streamer_write_char_stream (ob->main_stream, insn_name[j]);
152+
153+
/* Write opcode value. */
154+
streamer_write_uhwi_stream (ob->main_stream, opcode);
155+
156+
/* Write instruction format flags. */
157+
streamer_write_uhwi_stream (ob->main_stream, insn_formats);
158+
}
159+
160+
lto_destroy_simple_output_block (ob);
161+
}
162+
163+
/* Read RISC-V APEX intrinsic information from the LTO bytecode stream.
164+
165+
This function is called during the link-time optimization phase. It reads
166+
the .gnu.lto_riscv_apex sections from all input object files and
167+
re-registers all APEX intrinsics so they are available for optimization
168+
and code generation in the LTRANS phase.
169+
170+
The function iterates over all input files, reads their APEX sections,
171+
and re-registers each intrinsic by calling riscv_register_apex_builtin. */
172+
173+
void
174+
arcv_apex_lto_read_section (void)
175+
{
176+
struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
177+
struct lto_file_decl_data *file_data;
178+
unsigned int j = 0;
179+
180+
/* Process each input file's APEX section. */
181+
while ((file_data = file_data_vec[j++]))
182+
{
183+
const char *data;
184+
size_t len;
185+
class lto_input_block *ib
186+
= lto_create_simple_input_block (file_data, LTO_section_riscv_apex,
187+
&data, &len);
188+
189+
/* Skip files that don't have an APEX section
190+
(did not use APEX intrinsics). */
191+
if (!ib)
192+
continue;
193+
194+
/* Read the count of APEX builtins in this file. */
195+
unsigned int apex_count = streamer_read_uhwi (ib);
196+
unsigned int registered_count = 0;
197+
198+
/* Deserialize each APEX intrinsic. */
199+
for (unsigned int i = 0; i < apex_count; i++)
200+
{
201+
/* Read function name. */
202+
unsigned int fn_name_len = streamer_read_uhwi (ib);
203+
char *fn_name = XNEWVEC (char, fn_name_len + 1);
204+
for (unsigned int k = 0; k < fn_name_len; k++)
205+
fn_name[k] = streamer_read_uchar (ib);
206+
fn_name[fn_name_len] = '\0';
207+
208+
/* Read instruction name. */
209+
unsigned int insn_name_len = streamer_read_uhwi (ib);
210+
char *insn_name = XNEWVEC (char, insn_name_len + 1);
211+
for (unsigned int k = 0; k < insn_name_len; k++)
212+
insn_name[k] = streamer_read_uchar (ib);
213+
insn_name[insn_name_len] = '\0';
214+
215+
/* Read opcode and instruction format flags. */
216+
unsigned int opcode = streamer_read_uhwi (ib);
217+
unsigned int insn_formats = streamer_read_uhwi (ib);
218+
219+
/* Look up the function declaration in the merged symbol table.
220+
During LTO, all function declarations from all compilation units
221+
are merged into a single global symbol table. */
222+
symtab_node *snode = symtab_node::get_for_asmname (
223+
get_identifier (fn_name));
224+
225+
cgraph_node *node = dyn_cast<cgraph_node *> (snode);
226+
if (node)
227+
{
228+
tree fndecl = node->decl;
229+
if (fndecl && TREE_CODE (fndecl) == FUNCTION_DECL)
230+
{
231+
/* Re-register the intrinsic so it's available for code generation.
232+
The !flag_wpa parameter controls whether to print .extInstruction
233+
directives (only needed in final LTRANS phase, not WPA phase). */
234+
arcv_apex_lto_register_builtin (fn_name, insn_name, opcode,
235+
insn_formats, !flag_wpa, fndecl);
236+
registered_count++;
237+
}
238+
}
239+
240+
/* Free allocated memory. */
241+
XDELETEVEC (fn_name);
242+
XDELETEVEC (insn_name);
243+
}
244+
245+
/* Verify we successfully re-registered all APEX intrinsics
246+
from the section. If this fails, the LTO section is
247+
likely corrupted. */
248+
gcc_assert (registered_count == apex_count);
249+
250+
lto_destroy_simple_input_block (file_data, LTO_section_riscv_apex,
251+
ib, data, len);
252+
}
253+
}
254+

0 commit comments

Comments
 (0)