@@ -324,6 +324,11 @@ class Vectorizer {
324324 Instruction *ChainElem, Instruction *ChainBegin,
325325 const DenseMap<Instruction *, APInt /* OffsetFromLeader*/ > &ChainOffsets);
326326
327+ // / Merges the equivalence classes if they have uderlying objects that differ
328+ // / by one level of indirection (i.e., one is a getelementptr and the other is
329+ // / the base pointer in that getelementptr).
330+ void mergeEquivalenceClasses (EquivalenceClassMap &EQClasses) const ;
331+
327332 // / Collects loads and stores grouped by "equivalence class", where:
328333 // / - all elements in an eq class are a load or all are a store,
329334 // / - they all load/store the same element size (it's OK to have e.g. i8 and
@@ -1305,6 +1310,128 @@ std::optional<APInt> Vectorizer::getConstantOffsetSelects(
13051310 return std::nullopt ;
13061311}
13071312
1313+ void Vectorizer::mergeEquivalenceClasses (EquivalenceClassMap &EQClasses) const {
1314+ if (EQClasses.size () < 2 ) // There is nothing to merge.
1315+ return ;
1316+
1317+ // The reduced key has all elements of the ECClassKey except the underlying
1318+ // object. Check that EqClassKey has 4 elements and define the reduced key.
1319+ static_assert (std::tuple_size_v<EqClassKey> == 4 ,
1320+ " EqClassKey has changed - EqClassReducedKey needs changes too" );
1321+ using EqClassReducedKey =
1322+ std::tuple<std::tuple_element_t <1 , EqClassKey> /* AddrSpace */ ,
1323+ std::tuple_element_t <2 , EqClassKey> /* Element size */ ,
1324+ std::tuple_element_t <3 , EqClassKey> /* IsLoad; */ >;
1325+ using ECReducedKeyToUnderlyingObjectMap =
1326+ MapVector<EqClassReducedKey,
1327+ SmallPtrSet<std::tuple_element_t <0 , EqClassKey>, 4 >>;
1328+
1329+ // Form a map from the reduced key (without the underlying object) to the
1330+ // underlying objects: 1 reduced key to many underlying objects, to form
1331+ // groups of potentially merge-able equivalence classes.
1332+ ECReducedKeyToUnderlyingObjectMap RedKeyToUOMap;
1333+ bool FoundPotentiallyOptimizableEC = false ;
1334+ for (const auto &EC : EQClasses) {
1335+ const auto &Key = EC.first ;
1336+ EqClassReducedKey RedKey{std::get<1 >(Key), std::get<2 >(Key),
1337+ std::get<3 >(Key)};
1338+ RedKeyToUOMap[RedKey].insert (std::get<0 >(Key));
1339+ if (RedKeyToUOMap[RedKey].size () > 1 )
1340+ FoundPotentiallyOptimizableEC = true ;
1341+ }
1342+ if (!FoundPotentiallyOptimizableEC)
1343+ return ;
1344+
1345+ LLVM_DEBUG ({
1346+ dbgs () << " LSV: mergeEquivalenceClasses: before merging:\n " ;
1347+ for (const auto &EC : EQClasses) {
1348+ dbgs () << " Key: ([" << std::get<0 >(EC.first )
1349+ << " ]: " << *std::get<0 >(EC.first ) << " , " << std::get<1 >(EC.first )
1350+ << " , " << std::get<2 >(EC.first ) << " , "
1351+ << static_cast <int >(std::get<3 >(EC.first )) << " )\n " ;
1352+ for (const auto &Inst : EC.second )
1353+ dbgs () << " \t Inst:\t " << *Inst << " \n " ;
1354+ }
1355+ });
1356+ LLVM_DEBUG ({
1357+ dbgs () << " LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n " ;
1358+ for (const auto &RedKeyToUO : RedKeyToUOMap) {
1359+ dbgs () << " Reduced key: (" << std::get<0 >(RedKeyToUO.first ) << " , "
1360+ << std::get<1 >(RedKeyToUO.first ) << " , "
1361+ << static_cast <int >(std::get<2 >(RedKeyToUO.first )) << " ) --> "
1362+ << RedKeyToUO.second .size () << " underlying objects:\n " ;
1363+ for (auto UObject : RedKeyToUO.second )
1364+ dbgs () << " [" << UObject << " ]: " << *UObject << " \n " ;
1365+ }
1366+ });
1367+
1368+ using UObjectToUObjectMap = DenseMap<const Value *, const Value *>;
1369+
1370+ // Compute the ultimate targets for a set of underlying objects.
1371+ auto GetUltimateTargets =
1372+ [](SmallPtrSetImpl<const Value *> &UObjects) -> UObjectToUObjectMap {
1373+ UObjectToUObjectMap IndirectionMap;
1374+ for (const auto *UObject : UObjects) {
1375+ const unsigned MaxLookupDepth = 1 ; // look for 1-level indirections only
1376+ const auto *UltimateTarget =
1377+ llvm::getUnderlyingObject (UObject, MaxLookupDepth);
1378+ if (UltimateTarget != UObject)
1379+ IndirectionMap[UObject] = UltimateTarget;
1380+ }
1381+ UObjectToUObjectMap UltimateTargetsMap;
1382+ for (const auto *UObject : UObjects) {
1383+ auto Target = UObject;
1384+ auto It = IndirectionMap.find (Target);
1385+ for (; It != IndirectionMap.end (); It = IndirectionMap.find (Target))
1386+ Target = It->second ;
1387+ UltimateTargetsMap[UObject] = Target;
1388+ }
1389+ return UltimateTargetsMap;
1390+ };
1391+
1392+ // For each item in RedKeyToUOMap, if it has more than one underlying object,
1393+ // try to merge the equivalence classes.
1394+ for (auto &RedKeyToUO : RedKeyToUOMap) {
1395+ auto UObjects = RedKeyToUO.second ;
1396+ if (UObjects.size () < 2 )
1397+ continue ;
1398+ const auto RedKey = RedKeyToUO.first ;
1399+ auto UTMap = GetUltimateTargets (UObjects);
1400+ for (const auto &UT : UTMap) {
1401+ const Value *UObject = UT.first ;
1402+ const Value *UltimateTarget = UT.second ;
1403+ if (UObject == UltimateTarget)
1404+ continue ;
1405+
1406+ EqClassKey KeyFrom{UObject, std::get<0 >(RedKey), std::get<1 >(RedKey),
1407+ std::get<2 >(RedKey)};
1408+ EqClassKey KeyTo{UltimateTarget, std::get<0 >(RedKey), std::get<1 >(RedKey),
1409+ std::get<2 >(RedKey)};
1410+ auto VecFrom = EQClasses[KeyFrom];
1411+ auto VecTo = EQClasses[KeyTo];
1412+ SmallVector<Instruction *, 8 > MergedVec;
1413+ std::merge (VecFrom.begin (), VecFrom.end (), VecTo.begin (), VecTo.end (),
1414+ std::back_inserter (MergedVec),
1415+ [](Instruction *A, Instruction *B) {
1416+ return A && B && A->comesBefore (B);
1417+ });
1418+ EQClasses[KeyTo] = std::move (MergedVec);
1419+ EQClasses.erase (KeyFrom);
1420+ }
1421+ }
1422+ LLVM_DEBUG ({
1423+ dbgs () << " LSV: mergeEquivalenceClasses: after merging:\n " ;
1424+ for (const auto &EC : EQClasses) {
1425+ dbgs () << " Key: ([" << std::get<0 >(EC.first )
1426+ << " ]: " << *std::get<0 >(EC.first ) << " , " << std::get<1 >(EC.first )
1427+ << " , " << std::get<2 >(EC.first ) << " , "
1428+ << static_cast <int >(std::get<3 >(EC.first )) << " )\n " ;
1429+ for (const auto &Inst : EC.second )
1430+ dbgs () << " \t Inst:\t " << *Inst << " \n " ;
1431+ }
1432+ });
1433+ }
1434+
13081435EquivalenceClassMap
13091436Vectorizer::collectEquivalenceClasses (BasicBlock::iterator Begin,
13101437 BasicBlock::iterator End) {
@@ -1377,6 +1504,7 @@ Vectorizer::collectEquivalenceClasses(BasicBlock::iterator Begin,
13771504 .emplace_back (&I);
13781505 }
13791506
1507+ mergeEquivalenceClasses (Ret);
13801508 return Ret;
13811509}
13821510
0 commit comments