1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
|
https://gcc.gnu.org/PR116878
https://gcc.gnu.org/PR116616
From b9883d33e6338b1a44247b3f8772a5e5d2757593 Mon Sep 17 00:00:00 2001
Message-ID: <b9883d33e6338b1a44247b3f8772a5e5d2757593.1727650422.git.sam@gentoo.org>
From: Filip Kastl <fkastl@suse.cz>
Date: Tue, 10 Sep 2024 19:22:27 +0200
Subject: [PATCH] gimple ssa: Don't use __builtin_popcount in switch exp
transform
Switch exponential transformation in the switch conversion pass
currently generates
tmp1 = __builtin_popcount (var);
tmp2 = tmp1 == 1;
when inserting code to determine if var is power of two. If the target
doesn't support expanding the builtin as special instructions switch
conversion relies on this whole pattern being expanded as bitmagic.
However, it is possible that other GIMPLE optimizations move the two
statements of the pattern apart. In that case the builtin becomes a
libgcc call in the final binary. The call is slow and in case of
freestanding programs can result in linking error (this bug was
originally found while compiling Linux kernel).
This patch modifies switch conversion to insert the bitmagic
(var ^ (var - 1)) > (var - 1) instead of the builtin.
gcc/ChangeLog:
PR tree-optimization/116616
* tree-switch-conversion.cc (can_pow2p): Remove this function.
(gen_pow2p): Generate bitmagic instead of a builtin. Remove the
TYPE parameter.
(switch_conversion::is_exp_index_transform_viable): Don't call
can_pow2p.
(switch_conversion::exp_index_transform): Call gen_pow2p without
the TYPE parameter.
* tree-switch-conversion.h: Remove
m_exp_index_transform_pow2p_type.
gcc/testsuite/ChangeLog:
PR tree-optimization/116616
* gcc.target/i386/switch-exp-transform-1.c: Don't test for
presence of the POPCOUNT internal fn call.
Signed-off-by: Filip Kastl <fkastl@suse.cz>
---
.../gcc.target/i386/switch-exp-transform-1.c | 7 +-
gcc/tree-switch-conversion.cc | 84 +++++--------------
gcc/tree-switch-conversion.h | 6 +-
3 files changed, 23 insertions(+), 74 deletions(-)
diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
index a8c9e03e515f..4832f5b52c33 100644
--- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
+++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
@@ -1,10 +1,8 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-widening_mul -mpopcnt -mbmi" } */
+/* { dg-options "-O2 -fdump-tree-switchconv -mbmi" } */
/* Checks that exponential index transform enables switch conversion to convert
- this switch into an array lookup. Also checks that the "index variable is a
- power of two" check has been generated and that it has been later expanded
- into an internal function. */
+ this switch into an array lookup. */
int foo(unsigned bar)
{
@@ -30,4 +28,3 @@ int foo(unsigned bar)
}
/* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */
-/* { dg-final { scan-tree-dump "POPCOUNT" "widening_mul" } } */
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index c1332a260943..00426d400006 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -133,75 +133,33 @@ gen_log2 (tree op, location_t loc, tree *result, tree type)
return stmts;
}
-/* Is it possible to efficiently check that a value of TYPE is a power of 2?
-
- If yes, returns TYPE. If no, returns NULL_TREE. May also return another
- type. This indicates that logarithm of the variable can be computed but
- only after it is converted to this type.
-
- Also see gen_pow2p. */
-
-static tree
-can_pow2p (tree type)
-{
- /* __builtin_popcount supports the unsigned type or its long and long long
- variants. Choose the smallest out of those that can still fit TYPE. */
- int prec = TYPE_PRECISION (type);
- int i_prec = TYPE_PRECISION (unsigned_type_node);
- int li_prec = TYPE_PRECISION (long_unsigned_type_node);
- int lli_prec = TYPE_PRECISION (long_long_unsigned_type_node);
-
- if (prec <= i_prec)
- return unsigned_type_node;
- else if (prec <= li_prec)
- return long_unsigned_type_node;
- else if (prec <= lli_prec)
- return long_long_unsigned_type_node;
- else
- return NULL_TREE;
-}
-
-/* Build a sequence of gimple statements checking that OP is a power of 2. Use
- special optabs if target supports them. Return the result as a
- boolean_type_node ssa name through RESULT. Assumes that OP's value will
- be non-negative. The generated check may give arbitrary answer for negative
- values.
-
- Before computing the check, OP may have to be converted to another type.
- This should be specified in TYPE. Use can_pow2p to decide what this type
- should be.
-
- Should only be used if can_pow2p returns true for type of OP. */
+/* Build a sequence of gimple statements checking that OP is a power of 2.
+ Return the result as a boolean_type_node ssa name through RESULT. Assumes
+ that OP's value will be non-negative. The generated check may give
+ arbitrary answer for negative values. */
static gimple_seq
-gen_pow2p (tree op, location_t loc, tree *result, tree type)
+gen_pow2p (tree op, location_t loc, tree *result)
{
gimple_seq stmts = NULL;
gimple_stmt_iterator gsi = gsi_last (stmts);
- built_in_function fn;
- if (type == unsigned_type_node)
- fn = BUILT_IN_POPCOUNT;
- else if (type == long_unsigned_type_node)
- fn = BUILT_IN_POPCOUNTL;
- else
- {
- fn = BUILT_IN_POPCOUNTLL;
- gcc_checking_assert (type == long_long_unsigned_type_node);
- }
+ tree type = TREE_TYPE (op);
+ tree utype = unsigned_type_for (type);
- tree orig_type = TREE_TYPE (op);
+ /* Build (op ^ (op - 1)) > (op - 1). */
tree tmp1;
- if (type != orig_type)
- tmp1 = gimple_convert (&gsi, false, GSI_NEW_STMT, loc, type, op);
- else
+ if (types_compatible_p (type, utype))
tmp1 = op;
- /* Build __builtin_popcount{l,ll} (op) == 1. */
- tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc,
- as_combined_fn (fn), integer_type_node, tmp1);
- *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
- boolean_type_node, tmp2,
- build_one_cst (integer_type_node));
+ else
+ tmp1 = gimple_convert (&gsi, false, GSI_NEW_STMT, loc, utype, op);
+ tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, MINUS_EXPR, utype,
+ tmp1, build_one_cst (utype));
+ tree tmp3 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, BIT_XOR_EXPR,
+ utype, tmp1, tmp2);
+ *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, GT_EXPR,
+ boolean_type_node, tmp3, tmp2);
+
return stmts;
}
@@ -371,9 +329,6 @@ switch_conversion::is_exp_index_transform_viable (gswitch *swtch)
m_exp_index_transform_log2_type = can_log2 (index_type, opt_type);
if (!m_exp_index_transform_log2_type)
return false;
- m_exp_index_transform_pow2p_type = can_pow2p (index_type);
- if (!m_exp_index_transform_pow2p_type)
- return false;
/* Check that each case label corresponds only to one value
(no case 1..3). */
@@ -467,8 +422,7 @@ switch_conversion::exp_index_transform (gswitch *swtch)
new_edge2->probability = profile_probability::even ();
tree tmp;
- gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp,
- m_exp_index_transform_pow2p_type);
+ gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp);
gsi = gsi_last_bb (cond_bb);
gsi_insert_seq_after (&gsi, stmts, GSI_LAST_NEW_STMT);
gcond *stmt_cond = gimple_build_cond (NE_EXPR, tmp, boolean_false_node,
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 14610499e5fb..6468995eb316 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -920,11 +920,9 @@ public:
bool m_exp_index_transform_applied;
/* If switch conversion decided exponential index transform is viable, here
- will be stored the types to which index variable has to be converted
- before the logarithm and the "is power of 2" operations which are part of
- the transform. */
+ will be stored the type to which index variable has to be converted
+ before the logarithm operation which is a part of the transform. */
tree m_exp_index_transform_log2_type;
- tree m_exp_index_transform_pow2p_type;
};
void
--
2.46.2
|