WebKit Bugzilla
Attachment 372881 Details for
Bug 199203
: Add B3 Strength Reduction to turn multiply-immediate to adds and shifts
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch
bug-199203-20190625171348.patch (text/plain), 7.43 KB, created by
Justin Michaud
on 2019-06-25 17:13:48 PDT
(
hide
)
Description:
Patch
Filename:
MIME Type:
Creator:
Justin Michaud
Created:
2019-06-25 17:13:48 PDT
Size:
7.43 KB
patch
obsolete
>Subversion Revision: 246642 >diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog >index 10940f37a1f8fe59ad14b87261354c6ac9601853..ab860e5a77091a1f20118b60ac4e810adaff2a70 100644 >--- a/Source/JavaScriptCore/ChangeLog >+++ b/Source/JavaScriptCore/ChangeLog >@@ -1,3 +1,17 @@ >+2019-06-25 Justin Michaud <justin_michaud@apple.com> >+ >+ Add B3 Strength Reduction to turn multiply-immediate to adds and shifts >+ https://bugs.webkit.org/show_bug.cgi?id=199203 >+ >+ Reviewed by NOBODY (OOPS!). >+ >+ Turn multiplications by an immediate into shifts and adds in some cases. For example, >+ x*60 becomes x*(64-4) = x<<6 - x<<2, and x*68 becomes x<<6 + x<<2. We do this optimization >+ only on arm64, for immediates that would cause at most two shifts. This was the only case >+ that did not show a regression on the new microbenchmark. >+ >+ * b3/B3ReduceStrength.cpp: >+ > 2019-06-18 Darin Adler <darin@apple.com> > > Tidy up the remaining bits of the AtomicString to AtomString rename >diff --git a/Source/JavaScriptCore/b3/B3ReduceStrength.cpp b/Source/JavaScriptCore/b3/B3ReduceStrength.cpp >index 102478f38045e9688fe89cc330655db4e346a918..b902a6b1ec29846f73874f08d12d49c4a5ed59aa 100644 >--- a/Source/JavaScriptCore/b3/B3ReduceStrength.cpp >+++ b/Source/JavaScriptCore/b3/B3ReduceStrength.cpp >@@ -732,16 +732,69 @@ private: > replaceWithNew<Value>(Neg, m_value->origin(), m_value->child(0)); > break; > } >- >+ > // Turn this: Mul(value, constant) >- // Into this: Shl(value, log2(constant)) >- if (hasOneBitSet(factor)) { >- unsigned shiftAmount = WTF::fastLog2(static_cast<uint64_t>(factor)); >- replaceWithNewValue( >- m_proc.add<Value>( >- Shl, m_value->origin(), m_value->child(0), >+ // Into this: Add/Sub(Shl(value, log2(constant1)), ...) >+ bool isNegative = factor < 0; >+ if (isNegative) >+ factor *= -1; >+#if CPU(ARM64) >+ constexpr unsigned cutoff = 2; >+#else >+ constexpr unsigned cutoff = 1; >+#endif >+ >+ unsigned bitCount = bitCount64(factor); >+ int64_t invertedBase = factor; >+ // Set all bits to the right of the msb >+ invertedBase |= invertedBase >> 32; >+ invertedBase |= invertedBase >> 16; >+ invertedBase |= invertedBase >> 8; >+ invertedBase |= invertedBase >> 4; >+ invertedBase |= invertedBase >> 2; >+ invertedBase |= invertedBase >> 1; >+ // No sign bit, so unsigned overflow should not happen >+ invertedBase += 1; >+ >+ const int64_t invertedFactor = ((~factor) & (invertedBase - 1)) + 1; >+ ASSERT(invertedFactor > 0); >+ const unsigned invertedBitCount = bitCount64(invertedFactor) + 1; >+ ASSERT(invertedBase - invertedFactor == factor); >+ >+ if ((bitCount > 0 && bitCount <= cutoff) || (invertedBitCount > 0 && invertedBitCount <= cutoff)) { >+ Value* add = nullptr; >+ const int64_t value = bitCount <= invertedBitCount ? factor : invertedFactor; >+ >+ for (unsigned shiftAmount = 0; shiftAmount < 63; ++shiftAmount) { >+ if (!(value&(1ul << shiftAmount))) >+ continue; >+ >+ Value* thisBit = shiftAmount? m_insertionSet.insert<Value>( >+ m_index, Shl, m_value->origin(), m_value->child(0), >+ m_insertionSet.insert<Const32Value>( >+ m_index, m_value->origin(), shiftAmount)) : m_value->child(0); >+ >+ if (!add) >+ add = thisBit; >+ else >+ add = m_insertionSet.insert<Value>(m_index, Add, m_value->origin(), thisBit, add); >+ } >+ >+ if (value == invertedFactor) { >+ unsigned shiftAmount = WTF::fastLog2(static_cast<uint64_t>(invertedBase)); >+ >+ Value* thisBit = m_insertionSet.insert<Value>( >+ m_index, Shl, m_value->origin(), m_value->child(0), > m_insertionSet.insert<Const32Value>( >- m_index, m_value->origin(), shiftAmount))); >+ m_index, m_value->origin(), shiftAmount)); >+ >+ add = m_insertionSet.insert<Value>(m_index, Sub, m_value->origin(), thisBit, add); >+ } >+ >+ if (isNegative) >+ add = m_insertionSet.insert<Value>(m_index, Neg, m_value->origin(), add); >+ >+ replaceWithIdentity(add); > break; > } > } else if (m_value->child(1)->hasDouble()) { >@@ -2176,6 +2229,16 @@ private: > } > } > >+ inline static unsigned bitCount64(uint64_t value) >+ { >+ unsigned bitCount = 0; >+ for (unsigned shiftAmount = 0; shiftAmount < 63; ++shiftAmount) { >+ if (value&(1ul << shiftAmount)) >+ ++bitCount; >+ } >+ return bitCount; >+ } >+ > // Find a node that: > // - functor(node) returns true. > // - it's reachable from the given node via children. >diff --git a/Source/JavaScriptCore/b3/testb3.cpp b/Source/JavaScriptCore/b3/testb3.cpp >index 7f31679b413fcc4d47b5615e1904a33dd8917855..57ae65eb46842719ebe015e5d8e449c26112289f 100644 >--- a/Source/JavaScriptCore/b3/testb3.cpp >+++ b/Source/JavaScriptCore/b3/testb3.cpp >@@ -17261,6 +17261,24 @@ void run(const char* filter) > RUN(testMulArgImm(7, 16)); > RUN(testMulArgImm(7, 0x80000000llu)); > RUN(testMulArgImm(7, 0x800000000000llu)); >+ RUN(testMulArgImm(0x800000000000llu, 7)); >+ RUN(testMulArgImm(0x800000000000llu, 7)); >+ RUN(testMulArgImm(0x80000000u, 0x80000000u)); >+ RUN(testMulArgImm(0x80000000u, 0x80000001u)); >+ RUN(testMulArgImm(0x81111111111111u, 2)); >+ RUN(testMulArgImm(2, 0x81111111111111u)); >+ RUN(testMulArgImm(0x00000000FFFFFFFF, 0x00000000FFFFFFFF)); >+ RUN(testMulArgImm(0x00000000FFFFFFFF, -0x00000000FFFFFFFF)); >+ RUN(testMulArgImm(-0x00000000FFFFFFFF, 0x00000000FFFFFFFF)); >+ RUN(testMulArgImm(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF)); >+ RUN(testMulArgImm(0xFFFFFFFFFFFFFFF1, 2)); >+ RUN(testMulArgImm(2, 0xFFFFFFFFFFFFFFF1)); >+ RUN(testMulArgImm(65, 20)); >+ RUN(testMulArgImm(20, 65)); >+ RUN(testMulArgImm(-65, 20)); >+ RUN(testMulArgImm(65, -20)); >+ RUN(testMulArgImm(63, 20)); >+ RUN(testMulArgImm(20, 63)); > RUN(testMulArgImm(-42, 2)); > RUN(testMulArgImm(-42, 4)); > RUN(testMulArgImm(-42, 8)); >diff --git a/JSTests/microbenchmarks/multiply-immediate.js b/JSTests/microbenchmarks/multiply-immediate.js >new file mode 100644 >index 0000000000000000000000000000000000000000..32a945217920b18231aa2778a59734c02893c221 >--- /dev/null >+++ b/JSTests/microbenchmarks/multiply-immediate.js >@@ -0,0 +1,13 @@ >+function doTest(max) { >+ let sum = 0 >+ for (let i=0; i<max; ++i) { >+ sum = (sum|0) + ((i*256)|0) - ((i*9)|0) - ((i*31)|0) - ((i*67)|0) - ((i*64)|0) >+ } >+ return sum >+} >+noInline(doTest); >+ >+for (let i=0; i<100000; ++i) doTest(10000) >+ >+if (doTest(1000) != 42457500) >+ throw "Error: bad result: " + doTest(1000);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Formatted Diff
|
Diff
Attachments on
bug 199203
:
372861
|
372880
|
372881
|
373928