Title: Performance improvements Author: rodarima Created: Sun, 13 Apr 2025 16:52:07 +0000 State: open Rendering https://html.spec.whatwg.org/ several times (via refresh) leads to the following perf trace on armv7: ``` # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 169K of event 'cycles:Pu' # Event count (approx.): 33718562150 # # Overhead Command Shared Object Symbol # ........ ....... ....................... ................................................................................................................................................ # 7.03% dillo dillo [.] lout::misc::NotSoSimpleVector::getRef(int) const 6.34% dillo ld-musl-armhf.so.1 [.] __strchrnul 2.69% dillo dillo [.] Html_write_raw(DilloHtml*, char*, int, int) 2.22% dillo ld-musl-armhf.so.1 [.] __libc_malloc_impl 1.91% dillo ld-musl-armhf.so.1 [.] strncmp 1.86% dillo dillo [.] lout::misc::NotSoSimpleVector::size() const 1.78% dillo dillo [.] dw::Textblock::accumulateWordData(int) 1.64% dillo libz.so.1.3.1 [.] crc32_z 1.51% dillo ld-musl-armhf.so.1 [.] get_meta 1.45% dillo dillo [.] CssStyleSheet::apply(CssPropertyList*, Doctree*, DoctreeNode const*, MatchCache*) const 1.37% dillo dillo [.] int lout::misc::max(int, int) 1.29% dillo dillo [.] dw::Textblock::BadnessAndPenalty::penaltyValue(int, int) 1.29% dillo ld-musl-armhf.so.1 [.] strcspn 1.22% dillo ld-musl-armhf.so.1 [.] __libc_free 1.20% dillo dillo [.] dw::Textblock::BadnessAndPenalty::badnessValue(int) 1.17% dillo dillo [.] lout::identity::IdentifiableObject::instanceOf(int) 1.16% dillo dillo [.] dw::Textblock::addText(char const*, unsigned int, dw::core::style::Style*) 1.16% dillo dillo [.] dw::Textblock::wrapWordInFlow(int, bool) 1.09% dillo dillo [.] a_Html_get_attr(DilloHtml*, char const*, int, char const*) 1.03% dillo dillo [.] dw::Textblock::getWidgetRegardingBorderForLine(int, int) 1.00% dillo dillo [.] CssSelector::match(Doctree*, DoctreeNode const*, int, CssSelector::Combinator, MatchCache*) 0.92% dillo ld-musl-armhf.so.1 [.] cached_aligned32 0.89% dillo dillo [.] dw::Textblock::getLineStretchability(int) 0.86% dillo ld-musl-armhf.so.1 [.] strlen 0.85% dillo dillo [.] dw::core::Widget::getStyle() 0.85% dillo dillo [.] dw::Textblock::calcBorders(int, int) 0.83% dillo dillo [.] dw::Textblock::handleWordExtremes(int) 0.80% dillo ld-musl-armhf.so.1 [.] alloc_slot 0.79% dillo dillo [.] dw::Textblock::BadnessAndPenalty::compareTo(int, dw::Textblock::BadnessAndPenalty*) 0.78% dillo dillo [.] lout::misc::SimpleVector::size() const 0.77% dillo dillo [.] lout::container::untyped::Vector::get(int) const 0.77% dillo dillo [.] lout::object::ConstString::hashValue(char const*) 0.66% dillo libgcc_s.so.1 [.] __aeabi_idiv 0.66% dillo dillo [.] CssSimpleSelector::match(DoctreeNode const*) 0.64% dillo dillo [.] lout::container::untyped::HashSet::findNode(lout::object::Object*) const 0.63% dillo dillo [.] lout::misc::SimpleVector::getRef(int) const ``` There is a bottleneck in getRef of the NotSoSimpleVector. We could probably optimize the hot path as it is doing several checks that are not really needed. Including two asserts. ``` │ 0005f7fc ::getRef(int) const>: ▒ │ /** ▒ │ * \brief Return the reference of one element. ▒ │ * ▒ │ * \sa misc::SimpleVector::get ▒ │ */ ▒ │ inline T* getRef (int i) const ▒ 2.72 │ push {r7, lr} ▒ 1.83 │ sub sp, #8 ▒ 2.42 │ add r7, sp, #0 ▒ 5.87 │ str r0, [r7, #4] ▒ 1.99 │ str r1, [r7, #0] ▒ │ { ▒ │ if (this->startExtra == -1) { ▒ 2.01 │ ldr r3, [r7, #4] ▒ 5.73 │ ldr r3, [r3, #28] ▒ 3.61 │ cmp.w r3, #4294967295 ▒ │ bne.n 5f84e ::getRef(int) const+0x52> ▒ │ assert (i >= 0 && i < this->numMain); ▒ 5.79 │ ldr r3, [r7, #0] ▒ 3.48 │ cmp r3, #0 ▒ 0.34 │ blt.n 5f820 ::getRef(int) const+0x24> ▒ 5.49 │ ldr r3, [r7, #4] ▒ 5.47 │ ldr r3, [r3, #12] ▒ 1.67 │ ldr r2, [r7, #0] ▒ 3.40 │ cmp r2, r3 ▒ │ blt.n 5f834 ::getRef(int) const+0x38> ▒ │ ldr r3, [pc, #228] @ (5f908 ::getRef(int) const+0x10c>) ▒ │ add r3, pc ▒ │ movw r2, #479 @ 0x1df ▒ │ ldr r1, [pc, #224] @ (5f90c ::getRef(int) const+0x110>) ▒ │ add r1, pc ▒ │ ldr r0, [pc, #224] @ (5f910 ::getRef(int) const+0x114>) ▒ │ add r0, pc ▒ │ → blx strrchr@plt ▒ │ return this->arrayMain + i; ▒ 5.98 │ ldr r3, [r7, #4] ▒ 5.65 │ ldr r1, [r3, #0] ▒ 1.80 │ ldr r2, [r7, #0] ▒ 3.96 │ mov r3, r2 ▒ 4.10 │ lsls r3, r3, #1 ▒ 1.86 │ add r3, r2 ▒ 3.54 │ lsls r2, r3, #3 ▒ 1.68 │ subs r2, r2, r3 ▒ 3.85 │ lsls r3, r2, #2 ▒ 2.06 │ mov r2, r3 ▒ 1.84 │ mov r3, r2 ▒ 2.02 │ add r3, r1 ▒ │ b.n 5f8fe ::getRef(int) const+0x102> ▒ │ } else { ▒ │ if (i < this->startExtra) { ▒ │ ldr r3, [r7, #4] ▒ │ ldr r3, [r3, #28] ▒ │ ldr r2, [r7, #0] ▒ │ cmp r2, r3 ▒ │ bge.n 5f88c ::getRef(int) const+0x90> ▒ │ assert (i >= 0); ▒ │ ldr r3, [r7, #0] ▒ │ cmp r3, #0 ▒ │ bge.n 5f872 ::getRef(int) const+0x76> ▒ │ ldr r3, [pc, #180] @ (5f914 ::getRef(int) const+0x118>) ▒ │ add r3, pc ▒ │ movw r2, #483 @ 0x1e3 ▒ │ ldr r1, [pc, #176] @ (5f918 ::getRef(int) const+0x11c>) ▒ │ add r1, pc ▒ │ ldr r0, [pc, #176] @ (5f91c ::getRef(int) const+0x120>) ▒ │ add r0, pc ▒ │ → blx strrchr@plt ... ```