WebKit Bugzilla
Attachment 373149 Details for
Bug 199251
: Add b3 macro lowering for CheckMul on arm64
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch
bug-199251-20190628141246.patch (text/plain), 17.16 KB, created by
Justin Michaud
on 2019-06-28 14:12:47 PDT
(
hide
)
Description:
Patch
Filename:
MIME Type:
Creator:
Justin Michaud
Created:
2019-06-28 14:12:47 PDT
Size:
17.16 KB
patch
obsolete
>Subversion Revision: 246928 >diff --git a/Source/JavaScriptCore/ChangeLog b/Source/JavaScriptCore/ChangeLog >index a776bcda24ab5658b70e657b298302538895c5df..41d1981ad746dc8ff6631dbe7b09cb91b3d871f5 100644 >--- a/Source/JavaScriptCore/ChangeLog >+++ b/Source/JavaScriptCore/ChangeLog >@@ -1,3 +1,35 @@ >+2019-06-28 Justin Michaud <justin_michaud@apple.com> >+ >+ Add b3 macro lowering for CheckMul on arm64 >+ https://bugs.webkit.org/show_bug.cgi?id=199251 >+ >+ Reviewed by Saam Barati. >+ >+ - Lower CheckMul for 33-bit arguments on arm64 into a mul and then an overflow check. >+ - Add a new opcode to air on arm64 for smull (multiplySignExtend32). >+ - Fuse sign extend 32 + mul into smull (taking two 32-bit arguments and producing 64 bits). >+ - 1.25x speedup on power of two microbenchmark, 1.15x speedup on normal constant microbenchmark, >+ and no change on the no-constant benchmark. >+ Also, skip some of the b3 tests that were failing before this patch so that the new tests can run >+ to completion. >+ >+ * assembler/MacroAssemblerARM64.h: >+ (JSC::MacroAssemblerARM64::multiplySignExtend32): >+ * assembler/testmasm.cpp: >+ (JSC::testMul32SignExtend): >+ (JSC::run): >+ * b3/B3LowerMacros.cpp: >+ * b3/B3LowerToAir.cpp: >+ * b3/air/AirOpcode.opcodes: >+ * b3/testb3.cpp: >+ (JSC::B3::testMulArgs32SignExtend): >+ (JSC::B3::testMulImm32SignExtend): >+ (JSC::B3::testMemoryFence): >+ (JSC::B3::testStoreFence): >+ (JSC::B3::testLoadFence): >+ (JSC::B3::testPinRegisters): >+ (JSC::B3::run): >+ > 2019-06-28 Konstantin Tokarev <annulen@yandex.ru> > > Remove traces of ENABLE_ICONDATABASE remaining after its removal in 219733 >diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h >index 3f4c16d60f89b23f32cc6cb7bdd881ff01bf0af0..2ea911e6c9db5ba953df96942935dc8ec1f64689 100644 >--- a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h >+++ b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h >@@ -569,6 +569,11 @@ public: > m_assembler.msub<64>(dest, mulLeft, mulRight, ARM64Registers::zr); > } > >+ void multiplySignExtend32(RegisterID left, RegisterID right, RegisterID dest) >+ { >+ m_assembler.smull(dest, left, right); >+ } >+ > void div32(RegisterID dividend, RegisterID divisor, RegisterID dest) > { > m_assembler.sdiv<32>(dest, dividend, divisor); >diff --git a/Source/JavaScriptCore/assembler/testmasm.cpp b/Source/JavaScriptCore/assembler/testmasm.cpp >index 1050b39983e4e65052824c04f0bb1254a313e2c8..7c4fe3258dd38c20896adc80bee6b34784e2ea0f 100644 >--- a/Source/JavaScriptCore/assembler/testmasm.cpp >+++ b/Source/JavaScriptCore/assembler/testmasm.cpp >@@ -351,6 +351,25 @@ void testMul32WithImmediates() > } > } > >+#if CPU(ARM64) >+void testMul32SignExtend() >+{ >+ for (auto value : int32Operands()) { >+ auto mul = compile([=] (CCallHelpers& jit) { >+ jit.emitFunctionPrologue(); >+ >+ jit.multiplySignExtend32(GPRInfo::argumentGPR0, GPRInfo::argumentGPR1, GPRInfo::returnValueGPR); >+ >+ jit.emitFunctionEpilogue(); >+ jit.ret(); >+ }); >+ >+ for (auto value2 : int32Operands()) >+ CHECK_EQ(invoke<long int>(mul, value, value2), ((long int) value) * ((long int) value2)); >+ } >+} >+#endif >+ > #if CPU(X86) || CPU(X86_64) || CPU(ARM64) > void testCompareFloat(MacroAssembler::DoubleCondition condition) > { >@@ -1110,6 +1129,10 @@ void run(const char* filter) > RUN(testCompareDouble(MacroAssembler::DoubleLessThanOrEqualOrUnordered)); > RUN(testMul32WithImmediates()); > >+#if CPU(ARM64) >+ RUN(testMul32SignExtend()); >+#endif >+ > #if CPU(X86) || CPU(X86_64) || CPU(ARM64) > RUN(testCompareFloat(MacroAssembler::DoubleEqual)); > RUN(testCompareFloat(MacroAssembler::DoubleNotEqual)); >diff --git a/Source/JavaScriptCore/b3/B3LowerMacros.cpp b/Source/JavaScriptCore/b3/B3LowerMacros.cpp >index 4c594d24b33bb2e41e29ecdbe5b7052709096a13..4384716f1c72a6cb390636d2c40918a7459985be 100644 >--- a/Source/JavaScriptCore/b3/B3LowerMacros.cpp >+++ b/Source/JavaScriptCore/b3/B3LowerMacros.cpp >@@ -177,6 +177,34 @@ private: > break; > } > >+ case CheckMul: { >+ if (isARM64() && m_value->child(0)->type() == Int32) { >+ CheckValue* checkMul = m_value->as<CheckValue>(); >+ >+ Value* left = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(0)); >+ Value* right = m_insertionSet.insert<Value>(m_index, SExt32, m_origin, m_value->child(1)); >+ Value* mulResult = m_insertionSet.insert<Value>(m_index, Mul, m_origin, left, right); >+ Value* mulResult32 = m_insertionSet.insert<Value>(m_index, Trunc, m_origin, mulResult); >+ Value* upperResult = m_insertionSet.insert<Value>(m_index, Trunc, m_origin, >+ m_insertionSet.insert<Value>(m_index, SShr, m_origin, mulResult, m_insertionSet.insert<Const32Value>(m_index, m_origin, 32))); >+ Value* signBit = m_insertionSet.insert<Value>(m_index, SShr, m_origin, >+ mulResult32, >+ m_insertionSet.insert<Const32Value>(m_index, m_origin, 31)); >+ Value* hasOverflowed = m_insertionSet.insert<Value>(m_index, NotEqual, m_origin, upperResult, signBit); >+ >+ CheckValue* check = m_insertionSet.insert<CheckValue>(m_index, Check, m_origin, hasOverflowed); >+ check->setGenerator(checkMul->generator()); >+ check->clobberEarly(checkMul->earlyClobbered()); >+ check->clobberLate(checkMul->lateClobbered()); >+ check->append(checkMul->constrainedChild(0)); >+ check->append(checkMul->constrainedChild(1)); >+ >+ m_value->replaceWithIdentity(mulResult32); >+ m_changed = true; >+ } >+ break; >+ } >+ > case Switch: { > SwitchValue* switchValue = m_value->as<SwitchValue>(); > Vector<SwitchCase> cases; >diff --git a/Source/JavaScriptCore/b3/B3LowerToAir.cpp b/Source/JavaScriptCore/b3/B3LowerToAir.cpp >index b4098bcdd19796a3fb213196d8a03ea92c64cf21..1b92e5ad44006a652e9fb15db63ead0ba64cd671 100644 >--- a/Source/JavaScriptCore/b3/B3LowerToAir.cpp >+++ b/Source/JavaScriptCore/b3/B3LowerToAir.cpp >@@ -2602,6 +2602,27 @@ private: > } > > case Mul: { >+ if (m_value->type() == Int64 >+ && isValidForm(MultiplySignExtend32, Arg::Tmp, Arg::Tmp, Arg::Tmp) >+ && m_value->child(0)->opcode() == SExt32 >+ && !m_locked.contains(m_value->child(0))) { >+ Value* opLeft = m_value->child(0); >+ Value* left = opLeft->child(0); >+ Value* opRight = m_value->child(1); >+ Value* right = nullptr; >+ >+ if (opRight->opcode() == SExt32 && !m_locked.contains(opRight->child(0))) { >+ right = opRight->child(0); >+ } else if (m_value->child(1)->isRepresentableAs<int32_t>()) { >+ // We just use the 64-bit const int as a 32 bit const int directly >+ right = opRight; >+ } >+ >+ if (right) { >+ append(MultiplySignExtend32, tmp(left), tmp(right), tmp(m_value)); >+ return; >+ } >+ } > appendBinOp<Mul32, Mul64, MulDouble, MulFloat, Commutative>( > m_value->child(0), m_value->child(1)); > return; >diff --git a/Source/JavaScriptCore/b3/air/AirOpcode.opcodes b/Source/JavaScriptCore/b3/air/AirOpcode.opcodes >index 11ada411d64380592133bb24f36baae57d2efb74..cb7d63797b4d1a070a2f8332ba8086c19993408c 100644 >--- a/Source/JavaScriptCore/b3/air/AirOpcode.opcodes >+++ b/Source/JavaScriptCore/b3/air/AirOpcode.opcodes >@@ -261,6 +261,9 @@ arm64: MultiplyNeg32 U:G:32, U:G:32, ZD:G:32 > arm64: MultiplyNeg64 U:G:64, U:G:64, ZD:G:64 > Tmp, Tmp, Tmp > >+arm64: MultiplySignExtend32 U:G:32, U:G:32, ZD:G:64 >+ Tmp, Tmp, Tmp >+ > arm64: Div32 U:G:32, U:G:32, ZD:G:32 > Tmp, Tmp, Tmp > >diff --git a/Source/JavaScriptCore/b3/testb3.cpp b/Source/JavaScriptCore/b3/testb3.cpp >index 7f31679b413fcc4d47b5615e1904a33dd8917855..c9fd18e9baf2140c5c03626fbe61f694973a6acd 100644 >--- a/Source/JavaScriptCore/b3/testb3.cpp >+++ b/Source/JavaScriptCore/b3/testb3.cpp >@@ -1189,6 +1189,47 @@ void testMulArgs32(int a, int b) > CHECK(compileAndRun<int>(proc, a, b) == a * b); > } > >+void testMulArgs32SignExtend(int a, int b) >+{ >+ Procedure proc; >+ if (proc.optLevel() < 1) >+ return; >+ BasicBlock* root = proc.addBlock(); >+ Value* arg1 = root->appendNew<Value>( >+ proc, Trunc, Origin(), >+ root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0)); >+ Value* arg2 = root->appendNew<Value>( >+ proc, Trunc, Origin(), >+ root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1)); >+ Value* arg164 = root->appendNew<Value>(proc, SExt32, Origin(), arg1); >+ Value* arg264 = root->appendNew<Value>(proc, SExt32, Origin(), arg2); >+ Value* mul = root->appendNew<Value>(proc, Mul, Origin(), arg164, arg264); >+ root->appendNewControlValue(proc, Return, Origin(), mul); >+ >+ auto code = compileProc(proc); >+ >+ CHECK(invoke<long int>(*code, a, b) == ((long int) a) * ((long int) b)); >+} >+ >+void testMulImm32SignExtend(const int a, int b) >+{ >+ Procedure proc; >+ if (proc.optLevel() < 1) >+ return; >+ BasicBlock* root = proc.addBlock(); >+ Value* arg1 = root->appendNew<Const64Value>(proc, Origin(), a); >+ Value* arg2 = root->appendNew<Value>( >+ proc, Trunc, Origin(), >+ root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1)); >+ Value* arg264 = root->appendNew<Value>(proc, SExt32, Origin(), arg2); >+ Value* mul = root->appendNew<Value>(proc, Mul, Origin(), arg1, arg264); >+ root->appendNewControlValue(proc, Return, Origin(), mul); >+ >+ auto code = compileProc(proc); >+ >+ CHECK(invoke<long int>(*code, b) == ((long int) a) * ((long int) b)); >+} >+ > void testMulLoadTwice() > { > auto test = [&] () { >@@ -14635,55 +14676,55 @@ void testPatchpointTerminalReturnValue(bool successIsRare) > void testMemoryFence() > { > Procedure proc; >- >+ > BasicBlock* root = proc.addBlock(); >- >+ > root->appendNew<FenceValue>(proc, Origin()); > root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42)); >- >+ > auto code = compileProc(proc); > CHECK_EQ(invoke<int>(*code), 42); > if (isX86()) > checkUsesInstruction(*code, "lock or $0x0, (%rsp)"); > if (isARM64()) >- checkUsesInstruction(*code, "dmb ish"); >+ checkUsesInstruction(*code, "dmb ish"); > checkDoesNotUseInstruction(*code, "mfence"); >- checkDoesNotUseInstruction(*code, "dmb ishst"); >+ checkDoesNotUseInstruction(*code, "dmb ishst"); > } > > void testStoreFence() > { > Procedure proc; >- >+ > BasicBlock* root = proc.addBlock(); >- >+ > root->appendNew<FenceValue>(proc, Origin(), HeapRange::top(), HeapRange()); > root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42)); >- >+ > auto code = compileProc(proc); > CHECK_EQ(invoke<int>(*code), 42); > checkDoesNotUseInstruction(*code, "lock"); > checkDoesNotUseInstruction(*code, "mfence"); > if (isARM64()) >- checkUsesInstruction(*code, "dmb ishst"); >+ checkUsesInstruction(*code, "dmb ishst"); > } > > void testLoadFence() > { > Procedure proc; >- >+ > BasicBlock* root = proc.addBlock(); >- >+ > root->appendNew<FenceValue>(proc, Origin(), HeapRange(), HeapRange::top()); > root->appendNew<Value>(proc, Return, Origin(), root->appendIntConstant(proc, Origin(), Int32, 42)); >- >+ > auto code = compileProc(proc); > CHECK_EQ(invoke<int>(*code), 42); > checkDoesNotUseInstruction(*code, "lock"); > checkDoesNotUseInstruction(*code, "mfence"); > if (isARM64()) >- checkUsesInstruction(*code, "dmb ish"); >- checkDoesNotUseInstruction(*code, "dmb ishst"); >+ checkUsesInstruction(*code, "dmb ish"); >+ checkDoesNotUseInstruction(*code, "dmb ishst"); > } > > void testTrappingLoad() >@@ -14961,7 +15002,7 @@ void testPinRegisters() > usesCSRs |= csrs.get(regAtOffset.reg()); > CHECK_EQ(usesCSRs, !pin); > }; >- >+ > go(true); > go(false); > } >@@ -17157,6 +17198,29 @@ void run(const char* filter) > Deque<RefPtr<SharedTask<void()>>> tasks; > > auto shouldRun = [&] (const char* testName) -> bool { >+ // FIXME: These tests fail <https://bugs.webkit.org/show_bug.cgi?id=199330>. >+ if (!filter && isARM64()) { >+ for (auto& failingTest : { >+ "testReportUsedRegistersLateUseFollowedByEarlyDefDoesNotMarkUseAsDead", >+ "testNegFloatWithUselessDoubleConversion", >+ "testPinRegisters", >+ }) { >+ if (WTF::findIgnoringASCIICaseWithoutLength(testName, failingTest) != WTF::notFound) { >+ dataLogLn("*** Warning: Skipping known-bad test: ", testName); >+ return false; >+ } >+ } >+ } >+ if (!filter && isX86()) { >+ for (auto& failingTest : { >+ "testReportUsedRegistersLateUseFollowedByEarlyDefDoesNotMarkUseAsDead", >+ }) { >+ if (WTF::findIgnoringASCIICaseWithoutLength(testName, failingTest) != WTF::notFound) { >+ dataLogLn("*** Warning: Skipping known-bad test: ", testName); >+ return false; >+ } >+ } >+ } > return !filter || WTF::findIgnoringASCIICaseWithoutLength(testName, filter) != WTF::notFound; > }; > >@@ -17277,8 +17341,21 @@ void run(const char* filter) > RUN(testMulImmArg(0, 2)); > RUN(testMulImmArg(1, 0)); > RUN(testMulImmArg(3, 3)); >+ RUN(testMulImm32SignExtend(1, 2)); >+ RUN(testMulImm32SignExtend(0, 2)); >+ RUN(testMulImm32SignExtend(1, 0)); >+ RUN(testMulImm32SignExtend(3, 3)); >+ RUN(testMulImm32SignExtend(0xFFFFFFFF, 0xFFFFFFFF)); >+ RUN(testMulImm32SignExtend(0xFFFFFFFE, 0xFFFFFFFF)); >+ RUN(testMulImm32SignExtend(0xFFFFFFFF, 0xFFFFFFFE)); > RUN(testMulArgs32(1, 1)); > RUN(testMulArgs32(1, 2)); >+ RUN(testMulArgs32(0xFFFFFFFF, 0xFFFFFFFF)); >+ RUN(testMulArgs32(0xFFFFFFFE, 0xFFFFFFFF)); >+ RUN(testMulArgs32SignExtend(1, 1)); >+ RUN(testMulArgs32SignExtend(1, 2)); >+ RUN(testMulArgs32SignExtend(0xFFFFFFFF, 0xFFFFFFFF)); >+ RUN(testMulArgs32SignExtend(0xFFFFFFFE, 0xFFFFFFFF)); > RUN(testMulLoadTwice()); > RUN(testMulAddArgsLeft()); > RUN(testMulAddArgsRight()); >diff --git a/JSTests/ChangeLog b/JSTests/ChangeLog >index e8f283874f88fa31d0ca1a6f1c90f6e3f9e06e4c..4fd96fdd1c0530a8dee9b5200128fe47dd08ed3b 100644 >--- a/JSTests/ChangeLog >+++ b/JSTests/ChangeLog >@@ -1,3 +1,17 @@ >+2019-06-28 Justin Michaud <justin_michaud@apple.com> >+ >+ Add b3 macro lowering for CheckMul on arm64 >+ https://bugs.webkit.org/show_bug.cgi?id=199251 >+ >+ Reviewed by Saam Barati. >+ >+ * microbenchmarks/check-mul-constant.js: Added. >+ (doTest): >+ * microbenchmarks/check-mul-no-constant.js: Added. >+ (doTest): >+ * microbenchmarks/check-mul-power-of-two.js: Added. >+ (doTest): >+ > 2019-06-26 Keith Miller <keith_miller@apple.com> > > speciesConstruct needs to throw if the result is a DataView >diff --git a/JSTests/microbenchmarks/check-mul-constant.js b/JSTests/microbenchmarks/check-mul-constant.js >new file mode 100644 >index 0000000000000000000000000000000000000000..e4452ea4053d36b8138e48db0e054eb53dd90176 >--- /dev/null >+++ b/JSTests/microbenchmarks/check-mul-constant.js >@@ -0,0 +1,13 @@ >+function doTest(max) { >+ let sum = 0 >+ for (let i=0; i<max; ++i) { >+ sum = sum + i * 304; >+ } >+ return sum >+} >+noInline(doTest); >+ >+for (let i=0; i<100000; ++i) doTest(10000) >+ >+if (doTest(10000) != 15198480000) >+ throw "Error: bad result: " + doTest(10000); >diff --git a/JSTests/microbenchmarks/check-mul-no-constant.js b/JSTests/microbenchmarks/check-mul-no-constant.js >new file mode 100644 >index 0000000000000000000000000000000000000000..962c2a0531061b63bf1acbfad3e4ca53e4a50814 >--- /dev/null >+++ b/JSTests/microbenchmarks/check-mul-no-constant.js >@@ -0,0 +1,13 @@ >+function doTest(max) { >+ let sum = 0 >+ for (let i=0; i<max; ++i) { >+ sum = sum + i * i; >+ } >+ return sum >+} >+noInline(doTest); >+ >+for (let i=0; i<100000; ++i) doTest(10000) >+ >+if (doTest(10000) != 333283335000) >+ throw "Error: bad result: " + doTest(10000); >diff --git a/JSTests/microbenchmarks/check-mul-power-of-two.js b/JSTests/microbenchmarks/check-mul-power-of-two.js >new file mode 100644 >index 0000000000000000000000000000000000000000..92658838860275514bbba0ff723e407bc936cb81 >--- /dev/null >+++ b/JSTests/microbenchmarks/check-mul-power-of-two.js >@@ -0,0 +1,13 @@ >+function doTest(max) { >+ let sum = 0 >+ for (let i=0; i<max; ++i) { >+ sum = sum + i * 64; >+ } >+ return sum >+} >+noInline(doTest); >+ >+for (let i=0; i<100000; ++i) doTest(10000) >+ >+if (doTest(10000) != 3199680000) >+ throw "Error: bad result: " + doTest(10000);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Formatted Diff
|
Diff
Attachments on
bug 199251
:
372977
|
373149
|
373155
|
373926