If you use a compiled language, you should periodically look at Godbolt and see what your code is doing and what changes to your code will do in the compiled output.

In this case a positively insane way of calculating squares and cubes generates 311 lines of ARM assembler output that will swallow your memory. With even something as simple as -O1 on the command line it’s replaced by one or two multiplications respectively. With -fwhole-program it removes the functions entirely and interlaces them into the loop in main().

Know your tools. It makes huge differences!

  • mo_ztt ✅
    link
    English
    41 year ago
    int cube(int num) {
        char rv[num][num][num];
        return sizeof(rv);
    }
    

    “Doctor, it hurts when I do this.”

    • r2p2
      link
      21 year ago

      Not sure if op is trolling. Seems to be at the same level of sleep sort. (But there not even compiler optimizations can help.)

      • @ttmrichterOP
        link
        01 year ago

        Not trolling. Just:

        • showing how clever optimizers can get these days
        • introducing a cool web site
        • highlighting the importance of occasionally peeking under the hood to spot gross inefficiencies
        • mo_ztt ✅
          link
          English
          0
          edit-2
          1 year ago

          Eh. Honestly, I think what you’re saying, and the points the article is making, are pretty valid. That’s still gonna be way, way overshadowed by the absolutely ridiculous example they chose to use to make their point. Like “Since you’re writing code that’s ridiculous to such a degree that it wouldn’t even occur to most people that the way you’re doing it would even work, you better turn optimizations on, so the compiler can fix your code back to normalcy behind the scenes for you.”

    • @ttmrichterOP
      link
      01 year ago

      Multiplication hurts? 😲

      cube:
              mul     r3, r0, r0
              mul     r0, r3, r0
              bx      lr
      
      • @ttmrichterOP
        link
        01 year ago

        I mean it could hurt:

        cube:
                push    {r4, r5, r6, r7, r8, r9, r10, fp}
                sub     sp, sp, #112
                add     r7, sp, #0
                str     r0, [r7, #92]
                mov     r3, sp
                mov     ip, r3
                ldr     r1, [r7, #92]
                ldr     r0, [r7, #92]
                ldr     r6, [r7, #92]
                subs    r3, r1, #1
                str     r3, [r7, #108]
                mov     r2, r1
                movs    r3, #0
                mov     r4, r2
                mov     r5, r3
                mov     r2, #0
                mov     r3, #0
                lsls    r3, r5, #3
                orr     r3, r3, r4, lsr #29
                lsls    r2, r4, #3
                subs    r3, r0, #1
                str     r3, [r7, #104]
                mov     r2, r1
                movs    r3, #0
                str     r2, [r7, #80]
                str     r3, [r7, #84]
                mov     r2, r0
                movs    r3, #0
                str     r2, [r7, #64]
                str     r3, [r7, #68]
                ldrd    r4, [r7, #80]
                mov     r3, r5
                ldr     r2, [r7, #64]
                mul     r2, r2, r3
                ldr     r3, [r7, #68]
                strd    r4, [r7, #80]
                ldr     r4, [r7, #80]
                mul     r3, r4, r3
                add     r3, r3, r2
                ldr     r2, [r7, #80]
                ldr     r4, [r7, #64]
                umull   r8, r9, r2, r4
                add     r3, r3, r9
                mov     r9, r3
                mov     r2, #0
                mov     r3, #0
                lsl     r3, r9, #3
                orr     r3, r3, r8, lsr #29
                lsl     r2, r8, #3
                subs    r3, r6, #1
                str     r3, [r7, #100]
                mov     r2, r1
                movs    r3, #0
                str     r2, [r7, #32]
                str     r3, [r7, #36]
                mov     r2, r0
                movs    r3, #0
                str     r2, [r7, #72]
                str     r3, [r7, #76]
                ldrd    r4, [r7, #32]
                mov     r3, r5
                ldrd    r8, [r7, #72]
                mov     r2, r8
                mul     r2, r2, r3
                strd    r8, [r7, #72]
                ldr     r3, [r7, #76]
                mov     r8, r4
                mov     r9, r5
                mov     r4, r8
                mul     r3, r4, r3
                add     r3, r3, r2
                mov     r2, r8
                ldr     r4, [r7, #72]
                umull   r10, fp, r2, r4
                add     r3, r3, fp
                mov     fp, r3
                mov     r2, r6
                movs    r3, #0
                str     r2, [r7, #24]
                str     r3, [r7, #28]
                ldrd    r4, [r7, #24]
                mov     r3, r4
                mul     r2, r3, fp
                mov     r3, r5
                mul     r3, r10, r3
                add     r3, r3, r2
                mov     r2, r4
                umull   r4, r2, r10, r2
                str     r2, [r7, #60]
                mov     r2, r4
                str     r2, [r7, #56]
                ldr     r2, [r7, #60]
                add     r3, r3, r2
                str     r3, [r7, #60]
                mov     r2, #0
                mov     r3, #0
                ldrd    r8, [r7, #56]
                mov     r4, r9
                lsls    r3, r4, #3
                mov     r4, r8
                orr     r3, r3, r4, lsr #29
                mov     r4, r8
                lsls    r2, r4, #3
                mov     r2, r1
                movs    r3, #0
                str     r2, [r7, #16]
                str     r3, [r7, #20]
                mov     r2, r0
                movs    r3, #0
                str     r2, [r7, #8]
                str     r3, [r7, #12]
                ldrd    r8, [r7, #16]
                mov     r3, r9
                ldrd    r10, [r7, #8]
                mov     r2, r10
                mul     r2, r2, r3
                mov     r3, fp
                mov     r4, r8
                mul     r3, r4, r3
                add     r3, r3, r2
                mov     r2, r8
                mov     r4, r10
                umull   r4, r2, r2, r4
                str     r2, [r7, #52]
                mov     r2, r4
                str     r2, [r7, #48]
                ldr     r2, [r7, #52]
                add     r3, r3, r2
                str     r3, [r7, #52]
                mov     r2, r6
                movs    r3, #0
                str     r2, [r7]
                str     r3, [r7, #4]
                ldrd    r8, [r7, #48]
                mov     r3, r9
                ldrd    r10, [r7]
                mov     r2, r10
                mul     r2, r2, r3
                mov     r3, fp
                mov     r4, r8
                mul     r3, r4, r3
                add     r3, r3, r2
                mov     r2, r8
                mov     r4, r10
                umull   r4, r2, r2, r4
                str     r2, [r7, #44]
                mov     r2, r4
                str     r2, [r7, #40]
                ldr     r2, [r7, #44]
                add     r3, r3, r2
                str     r3, [r7, #44]
                mov     r2, #0
                mov     r3, #0
                ldrd    r8, [r7, #40]
                mov     r4, r9
                lsls    r3, r4, #3
                mov     r4, r8
                orr     r3, r3, r4, lsr #29
                mov     r4, r8
                lsls    r2, r4, #3
                mov     r3, r1
                mov     r2, r0
                mul     r3, r2, r3
                mov     r2, r6
                mul     r3, r2, r3
                adds    r3, r3, #7
                lsrs    r3, r3, #3
                lsls    r3, r3, #3
                sub     sp, sp, r3
                mov     r3, sp
                str     r3, [r7, #96]
                mov     r3, r1
                mov     r2, r0
                mul     r3, r2, r3
                mov     r2, r6
                mul     r3, r2, r3
                mov     sp, ip
                mov     r0, r3
                adds    r7, r7, #112
                mov     sp, r7
                pop     {r4, r5, r6, r7, r8, r9, r10, fp}
                bx      lr