Input Data Messages (IDM)
Decentralized communication on Ethereum.
Filter by:
31,985 IDM

||FILE:asm/visual/ascii_renderer.s||//==============================================================================// ASCII Renderer - ARM64 Assembly Implementation//// Port of ascii_renderer.c to pure ARM64 assembly// Functions for bitmap font rendering//==============================================================================.text.align 4//==============================================================================// Constants//==============================================================================ascii_constants:char_width: .word 8char_height: .word 8 vis_width: .word 800vis_height: .word 600//==============================================================================// ASCII Font Data - 8x8 bitmap font// Each character uses 2 uint32 values (64 bits total)// 256 characters arranged in 16x16 grid//==============================================================================.align 4ascii_font: // Character row 0 (chars 0-15) .word 0x00000000, 0x00000000 // char 0 0x00 .word 0x00000000, 0x00000000 // char 1 0x01 .word 0x00000000, 0x00000000 // char 2 0x02 .word 0x00000000, 0x00000000 // char 3 0x03 .word 0x00000000, 0x00000000 // char 4 0x04 .word 0x00000000, 0x00000000 // char 5 0x05 .word 0x00000000, 0x00000000 // char 6 0x06 .word 0x00000000, 0x00000000 // char 7 0x07 .word 0x00000000, 0x00000000 // char 8 0x08 .word 0x00000000, 0x00000000 // char 9 0x09 .word 0x00000000, 0x00000000 // char 10 0x0a .word 0x00000000, 0x00000000 // char 11 0x0b .word 0x00000000, 0x00000000 // char 12 0x0c .word 0x00000000, 0x00000000 // char 13 0x0d .word 0x00000000, 0x00000000 // char 14 0x0e .word 0x00000000, 0x00000000 // char 15 0x0f // Character row 1 (chars 16-31) .word 0x00000000, 0x00000000 // char 16 0x10 .word 0x00000000, 0x00000000 // char 17 0x11 .word 0x00000000, 0x00000000 // char 18 0x12 .word 0x00000000, 0x00000000 // char 19 0x13 .word 0x00000000, 0x00000000 // char 20 0x14 .word 0x00000000, 0x00000000 // char 21 0x15 .word 0x00000000, 0x00000000 // char 22 0x16 .word 0x00000000, 0x00000000 // char 23 0x17 .word 0x00000000, 0x00000000 // char 24 0x18 .word 0x00000000, 0x00000000 // char 25 0x19 .word 0x00000000, 0x00000000 // char 26 0x1a .word 0x00000000, 0x00000000 // char 27 0x1b .word 0x00000000, 0x00000000 // char 28 0x1c .word 0x00000000, 0x00000000 // char 29 0x1d .word 0x00000000, 0x00000000 // char 30 0x1e .word 0x00000000, 0x00000000 // char 31 0x1f // Character row 2 (chars 32-47) - Basic symbols .word 0x00000000, 0x00000000 // char 32 ' ' space .word 0x00000000, 0x00000000 // char 33 '!' .word 0x00000000, 0x00000000 // char 34 '"' .word 0x24247e24, 0x0000247e // char 35 '#' hash .word 0x00000000, 0x00000000 // char 36 '$' .word 0x00000000, 0x00000000 // char 37 '%' .word 0x00000000, 0x00000000 // char 38 '&' .word 0x00000000, 0x00000000 // char 39 '\'' .word 0x00000000, 0x00000000 // char 40 '(' .word 0x00000000, 0x00000000 // char 41 ')' .word 0x7c284400, 0x00004428 // char 42 '*' asterisk .word 0x7c101000, 0x00001010 // char 43 '+' plus .word 0x00000000, 0x00000000 // char 44 ',' .word 0x7c000000, 0x00000000 // char 45 '-' minus .word 0x00000000, 0x00000000 // char 46 '.' .word 0x10080402, 0x00804020 // char 47 '/' slash // Character row 3 (chars 48-63) - Numbers and symbols .word 0x42424242, 0x007e4242 // char 48 '0' .word 0x42424242, 0x007e4242 // char 49 '1' .word 0x42424242, 0x007e4242 // char 50 '2' .word 0x42424242, 0x007e4242 // char 51 '3' .word 0x42424242, 0x007e4242 // char 52 '4' .word 0x42424242, 0x007e4242 // char 53 '5' .word 0x42424242, 0x007e4242 // char 54 '6' .word 0x42424242, 0x007e4242 // char 55 '7' .word 0x42424242, 0x007e4242 // char 56 '8' .word 0x42424242, 0x007e4242 // char 57 '9' .word 0x00000000, 0x00000000 // char 58 ':' .word 0x00000000, 0x00000000 // char 59 ';' .word 0x20100800, 0x00000810 // char 60 '<' less than .word 0x007c0000, 0x0000007c // char 61 '=' equals .word 0x08102000, 0x00002010 // char 62 '>' greater than .word 0x00000000, 0x00000000 // char 63 '?' // Character row 4 (chars 64-79) - @ and uppercase letters A-O .word 0x00000000, 0x00000000 // char 64 '@' .word 0x4242427e, 0x007e4242 // char 65 'A' .word 0x4242427e, 0x007e4242 // char 66 'B' .word 0x4242427e, 0x007e4242 // char 67 'C' .word 0x4242427e, 0x007e4242 // char 68 'D' .word 0x4242427e, 0x007e4242 // char 69 'E' .word 0x4242427e, 0x007e4242 // char 70 'F' .word 0x4242427e, 0x007e4242 // char 71 'G' .word 0x4242427e, 0x007e4242 // char 72 'H' .word 0x4242427e, 0x007e4242 // char 73 'I' .word 0x4242427e, 0x007e4242 // char 74 'J' .word 0x4242427e, 0x007e4242 // char 75 'K' .word 0x4242427e, 0x007e4242 // char 76 'L' .word 0x4242427e, 0x007e4242 // char 77 'M' .word 0x4242427e, 0x007e4242 // char 78 'N' .word 0x4242427e, 0x007e4242 // char 79 'O' // Character row 5 (chars 80-95) - Letters P-Z and brackets .word 0x4242427e, 0x007e4242 // char 80 'P' .word 0x4242427e, 0x007e4242 // char 81 'Q' .word 0x4242427e, 0x007e4242 // char 82 'R' .word 0x4242427e, 0x007e4242 // char 83 'S' .word 0x4242427e, 0x007e4242 // char 84 'T' .word 0x4242427e, 0x007e4242 // char 85 'U' .word 0x4242427e, 0x007e4242 // char 86 'V' .word 0x4242427e, 0x007e4242 // char 87 'W' .word 0x4242427e, 0x007e4242 // char 88 'X' .word 0x4242427e, 0x007e4242 // char 89 'Y' .word 0x4242427e, 0x007e4242 // char 90 'Z' .word 0x4040407c, 0x007c4040 // char 91 '[' left bracket .word 0x10204080, 0x00020408 // char 92 '\' backslash .word 0x0404047c, 0x007c0404 // char 93 ']' right bracket .word 0x44281000, 0x00000000 // char 94 '^' caret .word 0x00000000, 0x007c0000 // char 95 '_' underscore // Character row 6 (chars 96-111) - lowercase and more symbols .word 0x00000000, 0x00000000 // char 96 '`' .word 0x00000000, 0x00000000 // char 97 'a' .word 0x00000000, 0x00000000 // char 98 'b' .word 0x00000000, 0x00000000 // char 99 'c' .word 0x00000000, 0x00000000 // char 100 'd' .word 0x00000000, 0x00000000 // char 101 'e' .word 0x00000000, 0x00000000 // char 102 'f' .word 0x00000000, 0x00000000 // char 103 'g' .word 0x00000000, 0x00000000 // char 104 'h' .word 0x00000000, 0x00000000 // char 105 'i' .word 0x00000000, 0x00000000 // char 106 'j' .word 0x00000000, 0x00000000 // char 107 'k' .word 0x00000000, 0x00000000 // char 108 'l' .word 0x00000000, 0x00000000 // char 109 'm' .word 0x00000000, 0x00000000 // char 110 'n' .word 0x00000000, 0x00000000 // char 111 'o' // Character row 7 (chars 112-127) - more lowercase and special chars .word 0x00000000, 0x00000000 // char 112 'p' .word 0x00000000, 0x00000000 // char 113 'q' .word 0x00000000, 0x00000000 // char 114 'r' .word 0x00000000, 0x00000000 // char 115 's' .word 0x00000000, 0x00000000 // char 116 't' .word 0x00000000, 0x00000000 // char 117 'u' .word 0x00000000, 0x00000000 // char 118 'v' .word 0x00000000, 0x00000000 // char 119 'w' .word 0x00000000, 0x00000000 // char 120 'x' .word 0x00000000, 0x00000000 // char 121 'y' .word 0x00000000, 0x00000000 // char 122 'z' .word 0x4020201c, 0x001c2020 // char 123 '{' left brace .word 0x10101010, 0x00101010 // char 124 '|' pipe .word 0x04080870, 0x00700808 // char 125 '}' right brace .word 0x004c3200, 0x00000000 // char 126 '~' tilde .word 0x00000000, 0x00000000 // char 127 // Rows 8-15 (chars 128-255) - Extended ASCII filled with blanks .rept 128 .word 0x00000000, 0x00000000 .endr//==============================================================================// void draw_ascii_char_asm(uint32_t *pixels, int x, int y, char c, uint32_t color, int alpha)//// Draw a single ASCII character at position with color and alpha blending// x0: pixels buffer// w1: x position // w2: y position// w3: character (char c)// w4: color (uint32_t)// w5: alpha (int, 0-255)//==============================================================================.global _draw_ascii_char_asm_draw_ascii_char_asm: stp x29, x30, [sp, #-96]! mov x29, sp // Save callee-saved registers with proper non-overlapping offsets stp x19, x20, [sp, #16] // 16-31 stp x21, x22, [sp, #32] // 32-47 stp x23, x24, [sp, #48] // 48-63 stp x25, x26, [sp, #64] // 64-79 stp x27, x28, [sp, #80] // 80-95 // Store parameters in callee-saved registers mov x19, x0 // pixels buffer mov w20, w1 // x position mov w21, w2 // y position mov w22, w3 // character mov w23, w4 // color mov w24, w5 // alpha // Bounds check: character range 0-255 (now support full range) cmp w22, #0 b.lt .Ldac_return // if c < 0, return cmp w22, #255 b.gt .Ldac_return // if c > 255, return // Bounds check: position within screen cmp w20, #0 b.lt .Ldac_return ldr w25, =800 // VIS_WIDTH cmp w20, w25 b.ge .Ldac_return cmp w21, #0 b.lt .Ldac_return ldr w25, =600 // VIS_HEIGHT cmp w21, w25 b.ge .Ldac_return // Get character bitmap from new 8x8 font // Each character uses 2 uint32 values (64 bits) // Font layout: char_index * 8 bytes = char_index * 2 words lsl w25, w22, #3 // char_index * 8 (bytes per char) adr x26, ascii_font // Get font base address add x26, x26, x25 // Point to character data // Load character bitmap (2 words = 8 bytes) ldp w27, w28, [x26] // w27 = first 4 bytes, w28 = last 4 bytes // OPTIMIZATION: Fast path for alpha==255 (90-95% of calls) cmp w24, #255 b.eq .Ldac_alpha_opaque // Alpha blending path for alpha < 255 // Extract RGB components from color for alpha blending ubfx w25, w23, #16, #8 // r = (color >> 16) & 0xFF ubfx w26, w23, #8, #8 // g = (color >> 8) & 0xFF ubfx w27, w23, #0, #8 // b = color & 0xFF // Apply alpha: component = (component * alpha) / 255 mul w25, w25, w24 // r * alpha mov w0, #255 udiv w25, w25, w0 // r = (r * alpha) / 255 mul w26, w26, w24 // g * alpha udiv w26, w26, w0 // g = (g * alpha) / 255 mul w27, w27, w24 // b * alpha udiv w27, w27, w0 // b = (b * alpha) / 255 // Reconstruct final color: 0xFF000000 | (r << 16) | (g << 8) | b mov w28, #0xFF lsl w28, w28, #24 // Alpha = 0xFF000000 lsl w25, w25, #16 // r << 16 lsl w26, w26, #8 // g << 8 orr w28, w28, w25 // Add red orr w28, w28, w26 // Add green orr w28, w28, w27 // Add blue - final color in w28 b .Ldac_render_bitmap .Ldac_alpha_opaque: // Fast path: alpha==255, use color directly orr w28, w23, #0xFF000000 // final color = color | 0xFF000000 .Ldac_render_bitmap: // Now render the 8x8 character // Reload character bitmap lsl w25, w22, #3 // char_index * 8 adr x26, ascii_font // Get font base address add x26, x26, x25 ldp w6, w7, [x26] // w6 = rows 0-3, w7 = rows 4-7 // Render each row of the 8x8 character mov w0, #0 // row counter.Ldac_row_loop: cmp w0, #8 b.ge .Ldac_return // Get row data - need to extract correct byte from w6 or w7 cmp w0, #4 b.ge .Ldac_upper_rows // Lower rows (0-3): extract from w6 lsl w2, w0, #3 // row * 8 lsr w1, w6, w2 // shift by row*8 bits and w1, w1, #0xFF // mask to get byte b .Ldac_process_row .Ldac_upper_rows: // Upper rows (4-7): extract from w7 sub w2, w0, #4 // row - 4 lsl w2, w2, #3 // (row-4) * 8 lsr w1, w7, w2 // shift by (row-4)*8 bits and w1, w1, #0xFF // mask to get byte .Ldac_process_row: // w1 now contains the row bitmap byte mov w2, #0 // column counter .Ldac_col_loop: cmp w2, #8 b.ge .Ldac_next_row // Check if pixel should be drawn mov w3, #7 sub w3, w3, w2 // bit position (7-col for MSB first) lsr w4, w1, w3 // shift pixel bit to position 0 and w4, w4, #1 // mask to get single bit cbz w4, .Ldac_next_col // skip if pixel is 0 // Calculate pixel position add w5, w20, w2 // pixel_x = char_x + col add w6, w21, w0 // pixel_y = char_y + row // OPTIMIZATION: Skip redundant per-pixel bounds checks // Caller already guarantees character is fully on-screen // Calculate pixel offset: y * width + x ldr w25, =800 // width mul w6, w6, w25 // y * width add w6, w6, w5 // + x lsl w6, w6, #2 // * 4 (bytes per pixel) // Set pixel str w28, [x19, x6] // pixels[offset] = color .Ldac_next_col: add w2, w2, #1 b .Ldac_col_loop .Ldac_next_row: add w0, w0, #1 b .Ldac_row_loop .Ldac_return: // Restore callee-saved registers ldp x27, x28, [sp, #80] ldp x25, x26, [sp, #64] ldp x23, x24, [sp, #48] ldp x21, x22, [sp, #32] ldp x19, x20, [sp, #16] ldp x29, x30, [sp], #96 ret||ENDFILE||
at txn 0x80ab30462c6914f96a87df4a087ddf700820907a2326c2bc35d5a4e00b8af004 Aug-20-2025 12:43:11 AM UTC (28 days ago)

||FILE:asm/active/noise.s||// AArch64 assembly implementation of noise_block// void noise_block(rng_t *rng, float *out, uint32_t n)// rng: pointer to rng_t { uint64_t state; }// out: pointer to float buffer// n : number of samples// Generates white noise in range [-1,1) .text .align 2 .globl _noise_block// 64-bit constants for SplitMix64 algorithmnoise_consts64: .quad 0x9E3779B97F4A7C15 // GAMMA constant to add to state .quad 0xBF58476D1CE4E5B9 // MUL1 constant .quad 0x94D049BB133111EB // MUL2 constant// float constantsnoise_consts32: .float 5.9604644775390625e-8 // 1/2^24 .float 2.0 .float 1.0// Registers mapping (inside loop):// x0 = rng*, x1 = out*, w2 = n, w3 = i (counter)// x4 = tmp 64-bit value (state/z), x5 = const ptr// s0 = float value, s1 = inv24, s2 = two, s3 = one_noise_block: stp x29, x30, [sp, #-16]! mov x29, sp cmp w2, #0 // if n==0, return early b.eq 2f // Load pointers to constant tables adrp x5, noise_consts64@PAGE add x5, x5, noise_consts64@PAGEOFF adrp x6, noise_consts32@PAGE add x6, x6, noise_consts32@PAGEOFF ldr s1, [x6] // inv24 = 1/16777216 ldr s2, [x6, #4] // 2.0 ldr s3, [x6, #8] // 1.0 mov w3, wzr // i = 01: // main loop // --- SplitMix64 --- ldr x4, [x0] // load state ldr x7, [x5] // GAMMA add x4, x4, x7 // state += GAMMA str x4, [x0] // store updated state back // z = state // z ^= (z >> 30); z *= MUL1; mov x8, x4 lsr x9, x8, #30 eor x8, x8, x9 ldr x9, [x5, #8] // MUL1 mul x8, x8, x9 // z ^= (z >> 27); z *= MUL2; lsr x9, x8, #27 eor x8, x8, x9 ldr x9, [x5, #16] // MUL2 mul x8, x8, x9 // z ^= (z >> 31); lsr x9, x8, #31 eor x8, x8, x9 // Take lower 32 bits and shift right by 8 mov w10, w8 // w10 = low32(z) lsr w10, w10, #8 // 24-bit value // Convert to float in [0,1) ucvtf s0, w10 // s0 = float(u) fmul s0, s0, s1 // * (1/2^24) // scale to [-1,1): v = s0 * 2 - 1 fmul s0, s0, s2 fsub s0, s0, s3 str s0, [x1], #4 // store sample and post-inc ptr // increment counter and loop add w3, w3, #1 cmp w3, w2 b.lo 1b2: ldp x29, x30, [sp], #16 ret ||ENDFILE||||FILE:asm/active/osc_shapes.s|| .text .align 2 .globl _osc_saw_block .globl _osc_square_block .globl _osc_triangle_block// Shared constantosc_TAU_const: .float 6.2831855osc_half_TAU: .float 3.1415927osc_two_const: .float 2.0osc_one_const: .float 1.0osc_neg_one_const: .float -1.0// Helper macro to compute phase_inc in s2, load TAU in s3.macro PREP_PHASE adrp x9, osc_TAU_const@PAGE add x9, x9, osc_TAU_const@PAGEOFF ldr s3, [x9] fdiv s4, s3, s1 // TAU / sr fmul s2, s4, s0 // phase_inc ldr s5, [x0] // ph mov w3, wzr // i=0.endm// void osc_saw_block(osc_t*, float*, n, freq, sr)_osc_saw_block: stp x29, x30, [sp, #-16]! mov x29, sp mov x8, x0 PREP_PHASE adrp x10, osc_two_const@PAGE add x10, x10, osc_two_const@PAGEOFF ldr s6, [x10] // 2.00: cmp w3, w2 b.hs 1f // frac = ph / TAU => s7 fdiv s7, s5, s3 // out = 2*frac -1 fmul s7, s7, s6 adrp x11, osc_one_const@PAGE add x11, x11, osc_one_const@PAGEOFF ldr s8, [x11] fsub s7, s7, s8 str s7, [x1], #4 // ph += inc; wrap fadd s5, s5, s2 fcmpe s5, s3 b.lt 2f fsub s5, s5, s32: add w3, w3, #1 b 0b1: str s5, [x8] ldp x29, x30, [sp], #16 ret// void osc_square_block(...)_osc_square_block: stp x29, x30, [sp, #-16]! mov x29, sp mov x8, x0 PREP_PHASE adrp x12, osc_half_TAU@PAGE add x12, x12, osc_half_TAU@PAGEOFF ldr s9, [x12] // half TAU adrp x13, osc_one_const@PAGE add x13, x13, osc_one_const@PAGEOFF ldr s10, [x13] adrp x14, osc_neg_one_const@PAGE add x14, x14, osc_neg_one_const@PAGEOFF ldr s11, [x14]Lsq_loop: cmp w3, w2 b.hs Lsq_done // out = (ph < half_tau) ? 1 : -1 fcmpe s5, s9 fcsel s7, s10, s11, lt str s7, [x1], #4 fadd s5, s5, s2 fcmpe s5, s3 b.lt Lsq_wrap fsub s5, s5, s3Lsq_wrap: add w3, w3, #1 b Lsq_loopLsq_done: str s5, [x8] ldp x29, x30, [sp], #16 ret// void osc_triangle_block(...)_osc_triangle_block: stp x29, x30, [sp, #-16]! mov x29, sp mov x8, x0 PREP_PHASE adrp x15, osc_two_const@PAGE add x15, x15, osc_two_const@PAGEOFF ldr s6, [x15] adrp x16, osc_one_const@PAGE add x16, x16, osc_one_const@PAGEOFF ldr s7, [x16]Ltr_loop: cmp w3, w2 b.hs Ltr_done // frac = ph/Tau -> s9 fdiv s9, s5, s3 // temp = 2*frac -1 fmul s10, s9, s6 fsub s10, s10, s7 // abs fabs s10, s10 // val = 2*abs(temp) -1 fmul s10, s10, s6 fsub s10, s10, s7 str s10, [x1], #4 fadd s5, s5, s2 fcmpe s5, s3 b.lt Ltr_wrap fsub s5, s5, s3Ltr_wrap: add w3, w3, #1 b Ltr_loopLtr_done: str s5, [x8] ldp x29, x30, [sp], #16 ret ||ENDFILE||||FILE:asm/active/osc_sine.s|| .text .align 2 .globl _osc_sine_block// void osc_sine_block(osc_t *o, float *out, uint32_t n, float freq, float sr)// x0=o, x1=out, w2=n, s0=freq, s1=sr// Strategy: compute phase_inc = TAU*freq/sr once, then loop scalar using sinfosc_sine_consts: .float 6.2831855 // TAU_osc_sine_block: // Allocate stack: x29/x30 + phase + phase_inc + TAU (16 + 12 = 28, round to 32) stp x29, x30, [sp, #-32]! mov x29, sp // Save arguments we need to preserve mov x8, x0 // save osc pointer mov x9, x1 // save out pointer mov w10, w2 // save n // load TAU constant adrp x11, osc_sine_consts@PAGE add x11, x11, osc_sine_consts@PAGEOFF ldr s3, [x11] // TAU // phase_inc = TAU * freq / sr fdiv s4, s3, s1 // TAU / sr fmul s2, s4, s0 // phase_inc // Store constants on stack str s2, [sp, #16] // phase_inc at sp+16 str s3, [sp, #20] // TAU at sp+20 // Load initial phase ldr s0, [x8] // current phase str s0, [sp, #24] // phase at sp+24 mov w11, wzr // i = 0 cmp w10, #0 b.eq 1f // if n==0 skip loop0: // Load phase for sinf ldr s0, [sp, #24] bl _sinf str s0, [x9], #4 // store result and advance pointer // phase += phase_inc ldr s0, [sp, #24] // reload phase ldr s1, [sp, #16] // reload phase_inc fadd s0, s0, s1 // wrap if phase >= TAU ldr s2, [sp, #20] // reload TAU fcmpe s0, s2 b.lt 2f fsub s0, s0, s22: str s0, [sp, #24] // store updated phase add w11, w11, #1 cmp w11, w10 b.lo 0b1: // store phase back to osc structure ldr s0, [sp, #24] str s0, [x8] ldp x29, x30, [sp], #32 ret ||ENDFILE||||FILE:asm/active/sin4_ps_asm.s||.text.align 2.globl _sin4_ps_asm.private_extern sin4_ps_asm_internalsin4_ps_asm_internal:_sin4_ps_asm: // Save callee-saved SIMD registers (v8-v15) - ARM64 ABI requirement sub sp, sp, #128 // 8×16 = 128 bytes stp q8, q9, [sp, #0] // save v8, v9 stp q10, q11, [sp, #32] // save v10, v11 stp q12, q13, [sp, #64] // save v12, v13 stp q14, q15, [sp, #96] // save v14, v15 // v0 contains input x; will carry final result. // Load constant table base adrp x9, Lsin_const@PAGE add x9, x9, Lsin_const@PAGEOFF // y = x * inv_pi ldr q1, [x9, #0] // inv_pi fmul v1.4s, v0.4s, v1.4s // v1 = y (float) // Round y to nearest even integer frintn v2.4s, v1.4s // v2 = rounded float fcvtzs v3.4s, v2.4s // v3 = int32 n // Convert back to float scvtf v4.4s, v3.4s // v4 = y as float // x = x - y * pi ldr q5, [x9, #16] // pi fmul v6.4s, v4.4s, v5.4s fsub v0.4s, v0.4s, v6.4s // swap_sign = n & 1 movi v7.4s, #1 and v8.16b, v3.16b, v7.16b // v8 holds 0 or 1 shl v8.4s, v8.4s, #31 // move to sign bit position // Toggle sign bit where needed eor v0.16b, v0.16b, v8.16b // Polynomial evaluation // z = x*x fmul v9.4s, v0.4s, v0.4s // z ldr q10, [x9, #32] // s2 ldr q11, [x9, #48] // s3 ldr q12, [x9, #64] // s4 ldr q13, [x9, #80] // s1 // y1 = s2 + z*s3 mov v14.16b, v10.16b // y1 = s2 fmla v14.4s, v9.4s, v11.4s // + z*s3 // z2 = z*z fmul v15.4s, v9.4s, v9.4s // z2 fmla v14.4s, v15.4s, v12.4s // + z2*s4 // y1 += z * s1 fmla v14.4s, v9.4s, v13.4s // w = z * y1 fmul v16.4s, v9.4s, v14.4s // x += x * w fmla v0.4s, v0.4s, v16.4s // Restore callee-saved SIMD registers ldp q14, q15, [sp, #96] // restore v14, v15 ldp q12, q13, [sp, #64] // restore v12, v13 ldp q10, q11, [sp, #32] // restore v10, v11 ldp q8, q9, [sp, #0] // restore v8, v9 add sp, sp, #128 // restore stack pointer ret // Constant table (16-byte aligned) .align 4Lsin_const: // inv_pi (1/π) .float 0.31830988618379067154, 0.31830988618379067154, 0.31830988618379067154, 0.31830988618379067154 // pi .float 3.14159265358979323846, 3.14159265358979323846, 3.14159265358979323846, 3.14159265358979323846 // s2 .float 8.3333337670e-3, 8.3333337670e-3, 8.3333337670e-3, 8.3333337670e-3 // s3 .float -1.9841270114e-4, -1.9841270114e-4, -1.9841270114e-4, -1.9841270114e-4 // s4 .float 2.7557314297e-6, 2.7557314297e-6, 2.7557314297e-6, 2.7557314297e-6 // s1 .float -1.6666664611e-1, -1.6666664611e-1, -1.6666664611e-1, -1.6666664611e-1 ||ENDFILE||||FILE:asm/active/snare.s|| .text .align 2 .globl _snare_process// Lightweight Snare – envelope recurrence + inline SplitMix64 noise// void snare_process(snare_t *s, float *L, float *R, uint32_t n)// x0 = snare_t*// x1 = L buffer// x2 = R buffer// w3 = n samples//// snare_t layout (see snare.h)// uint32_t pos @ 0// uint32_t len @ 4// float sr @ 8 (unused here)// float env @ 12// float env_coef @ 16// <padding> @ 20// uint64_t rng.state @ 24// --- Struct Offsets ---.equ S_POS, 0.equ S_LEN, 4.equ S_ENV, 12.equ S_ENV_COEF, 16.equ S_RNG_STATE, 24// --- Constants ---AMP_const: .float 0.4 // overall amplitudefloats_inv24_two_one: .float 5.9604644775390625e-8 // 1/2^24 .float 2.0 .float 1.0rng64_consts: .quad 0x9E3779B97F4A7C15 // GAMMA .quad 0xBF58476D1CE4E5B9 // MUL1 .quad 0x94D049BB133111EB // MUL2// ------------------------------------------------------------// Main routine// ------------------------------------------------------------_snare_process: // Prologue (leaf function ‑ minimal stack) stp x29, x30, [sp, #-16]! mov x29, sp // Save callee-saved x22 which we use as loop counter str x22, [sp, #-16]! // Early exit if inactive or n==0 ldr w8, [x0, #S_POS] // pos ldr w9, [x0, #S_LEN] // len cmp w8, w9 b.ge Ldone // already finished cbz w3, Ldone // n == 0 // Load mutable state ldr s4, [x0, #S_ENV] // env ldr s5, [x0, #S_ENV_COEF] // env_coef ldr x10, [x0, #S_RNG_STATE] // rng.state // Load constants adrp x11, AMP_const@PAGE add x11, x11, AMP_const@PAGEOFF ldr s15, [x11] // AMP adrp x12, floats_inv24_two_one@PAGE add x12, x12, floats_inv24_two_one@PAGEOFF ldr s12, [x12] // inv24 ldr s13, [x12, #4] // 2.0 ldr s14, [x12, #8] // 1.0 adrp x13, rng64_consts@PAGE add x13, x13, rng64_consts@PAGEOFF mov w22, wzr // loop counter i// ------------------------------------------------------------Lloop: // Break conditions: i>=n OR pos>=len cmp w22, w3 b.ge Lend cmp w8, w9 b.ge Lend // env *= env_coef fmul s4, s4, s5 // --- SplitMix64 --- ldr x14, [x13] // GAMMA add x10, x10, x14 // state += GAMMA mov x15, x10 // z = state copy lsr x16, x15, #30 eor x15, x15, x16 ldr x16, [x13, #8] // MUL1 mul x15, x15, x16 lsr x16, x15, #27 eor x15, x15, x16 ldr x16, [x13, #16] // MUL2 mul x15, x15, x16 lsr x16, x15, #31 eor x15, x15, x16 // final z // Convert to float in [-1,1) mov w16, w15 // low 32 bits lsr w16, w16, #8 // 24-bit mantissa ucvtf s0, w16 // to float fmul s0, s0, s12 // *inv24 fmul s0, s0, s13 // *2 fsub s0, s0, s14 // -1 // sample = env * noise * AMP fmul s0, s0, s4 fmul s0, s0, s15 // L[i] += sample ldr s1, [x1, w22, sxtw #2] fadd s1, s1, s0 str s1, [x1, w22, sxtw #2] // R[i] += sample ldr s2, [x2, w22, sxtw #2] fadd s2, s2, s0 str s2, [x2, w22, sxtw #2] // Advance indices add w8, w8, #1 // pos++ add w22, w22, #1 // i++ b Lloop// ------------------------------------------------------------Lend: // Store updated state back to struct str s4, [x0, #S_ENV] str w8, [x0, #S_POS] str x10, [x0, #S_RNG_STATE]Ldone: ldr x22, [sp], #16 ldp x29, x30, [sp], #16 ret ||ENDFILE||
at txn 0x60799a356730705eb35500a65696934c881519893012fab111862bfbdde1a656 Aug-20-2025 12:42:47 AM UTC (28 days ago)

/ Restore stack b .Lrot_done .Lrot_scalar: // Scalar fallback for arbitrary sizes // Copy pattern to tmp buffer mov w4, wzr // w4 = loop counter for memcpy .Lrot_copy_loop: cmp w4, w2 b.ge .Lrot_rotate_start ldrb w5, [x0, w4, uxtw] // Load pattern[i] strb w5, [x1, w4, uxtw] // Store to tmp[i] add w4, w4, #1 b .Lrot_copy_loop .Lrot_rotate_start: // Rotate: pattern[i] = tmp[(i+rot) % size] mov w4, wzr // w4 = i (loop counter) .Lrot_rotate_loop: cmp w4, w2 b.ge .Lrot_done // Calculate src_index = (i + rot) % size add w5, w4, w3 // w5 = i + rot udiv w6, w5, w2 // w6 = (i + rot) / size msub w5, w6, w2, w5 // w5 = (i + rot) - (w6 * size) = (i + rot) % size // pattern[i] = tmp[src_index] ldrb w6, [x1, w5, uxtw] // Load tmp[src_index] strb w6, [x0, w4, uxtw] // Store to pattern[i] add w4, w4, #1 // i++ b .Lrot_rotate_loop .Lrot_done: ret /* * generator_build_events_asm - Pre-compute entire event queue in assembly * ---------------------------------------------------------------------- * void generator_build_events_asm(event_queue_t *q, rng_t *rng, * const uint8_t *kick_pat, const uint8_t *snare_pat, const uint8_t *hat_pat, * uint32_t step_samples); * * Converts the event queue building loop from C to assembly for ultimate performance. * This is the final orchestration step - building the complete musical timeline. * * Event generation rules: * - Drums: kick/snare/hat based on euclidean patterns * - Melody: triggers at specific bar positions (0, 8, 16, 24) * - Mid: stochastic triggers with 10% probability on certain beats * - Bass: triggers at beginning of each bar (step 0) * * Constants: * - TOTAL_STEPS = 32, STEPS_PER_BAR = 16 * - Event types: KICK=0, SNARE=1, HAT=2, MELODY=3, MID=4, FM_BASS=5 */ .globl _generator_build_events_asm_generator_build_events_asm: // Arguments: x0=q, x1=rng, x2=kick_pat, x3=snare_pat, x4=hat_pat, w5=step_samples // Save callee-saved registers stp x19, x20, [sp, #-80]! stp x21, x22, [sp, #16] stp x23, x24, [sp, #32] stp x25, x26, [sp, #48] stp x27, x28, [sp, #64] // Initialize event queue: q->count = 0 str wzr, [x0, #4096] // q->count = 0 (events array is 4096 bytes) // Register assignments for loop mov x19, x0 // x19 = q (event queue) mov x20, x1 // x20 = rng mov x21, x2 // x21 = kick_pat mov x22, x3 // x22 = snare_pat mov x23, x4 // x23 = hat_pat mov w24, w5 // w24 = step_samples mov w25, wzr // w25 = step (loop counter) // Constants mov w26, #32 // w26 = TOTAL_STEPS mov w27, #16 // w27 = STEPS_PER_BAR // RNG_FLOAT constants - removed unused constant .Lbuild_loop: // Check loop condition: step < TOTAL_STEPS cmp w25, w26 b.ge .Lbuild_done // Calculate t = step * step_samples mul w6, w25, w24 // w6 = t = step * step_samples // Calculate bar_step = step % STEPS_PER_BAR udiv w7, w25, w27 // w7 = step / STEPS_PER_BAR msub w8, w7, w27, w25 // w8 = bar_step = step - (w7 * STEPS_PER_BAR) // Check kick pattern: if(kick_pat[step % STEPS_PER_BAR]) ldrb w9, [x21, w8, uxtw] // w9 = kick_pat[bar_step] cbz w9, .Lcheck_snare // Push kick event: eq_push(q, t, EVT_KICK, 0) mov w10, #0 // EVT_KICK = 0 mov w11, #0 // aux = 0 bl _generator_eq_push_helper_asm .Lcheck_snare: // Check snare pattern: if(snare_pat[step % STEPS_PER_BAR]) ldrb w9, [x22, w8, uxtw] // w9 = snare_pat[bar_step] cbz w9, .Lcheck_hat // Push snare event: eq_push(q, t, EVT_SNARE, 0) mov w10, #1 // EVT_SNARE = 1 mov w11, #0 // aux = 0 bl _generator_eq_push_helper_asm .Lcheck_hat: // Check hat pattern: if(hat_pat[step % STEPS_PER_BAR]) ldrb w9, [x23, w8, uxtw] // w9 = hat_pat[bar_step] cbz w9, .Lcheck_melody // Push hat event: eq_push(q, t, EVT_HAT, 0) mov w10, #2 // EVT_HAT = 2 mov w11, #0 // aux = 0 bl _generator_eq_push_helper_asm .Lcheck_melody: // Check melody triggers: if(bar_step==0 || bar_step==8 || bar_step==16 || bar_step==24) cbz w8, .Lmelody_trigger // bar_step == 0 cmp w8, #8 b.eq .Lmelody_trigger cmp w8, #16 b.eq .Lmelody_trigger cmp w8, #24 b.eq .Lmelody_trigger b .Lcheck_mid .Lmelody_trigger: // Push melody event: eq_push(q, t, EVT_MELODY, bar_step/8) lsr w11, w8, #3 // w11 = aux = bar_step / 8 mov w10, #3 // EVT_MELODY = 3 bl _generator_eq_push_helper_asm .Lcheck_mid: // Check mid triggers: if(bar_step % 4 == 2 || ((bar_step%4==1 || bar_step%4==3) && RNG_FLOAT < 0.1)) and w9, w8, #3 // w9 = bar_step % 4 cmp w9, #2 b.eq .Lmid_trigger // bar_step % 4 == 2 // Check if bar_step % 4 == 1 or 3 cmp w9, #1 b.eq .Lmid_rng_check cmp w9, #3 b.ne .Lcheck_bass .Lmid_rng_check: // Generate RNG_FLOAT and compare with 0.1 bl _generator_rng_next_float_asm // Returns float in s0 // Compare with 0.1f mov w12, #0x3dcc movk w12, #0xcccd, lsl #16 // 0.1f in IEEE 754 fmov s1, w12 fcmp s0, s1 b.ge .Lcheck_bass // if RNG_FLOAT >= 0.1, skip .Lmid_trigger: // Generate random aux value: rng_next_u32() % 7 bl _generator_rng_next_u32_asm // Returns uint32_t in w0 mov w12, #7 udiv w13, w0, w12 msub w11, w13, w12, w0 // w11 = aux = w0 % 7 // Push mid event: eq_push(q, t, EVT_MID, aux) mov w10, #4 // EVT_MID = 4 bl _generator_eq_push_helper_asm .Lcheck_bass: // Check bass trigger: if(bar_step == 0) cbnz w8, .Lloop_next // Push bass event: eq_push(q, t, EVT_FM_BASS, 0) mov w10, #5 // EVT_FM_BASS = 5 mov w11, #0 // aux = 0 bl _generator_eq_push_helper_asm .Lloop_next: // Increment step and continue loop add w25, w25, #1 b .Lbuild_loop .Lbuild_done: // Restore callee-saved registers ldp x27, x28, [sp, #64] ldp x25, x26, [sp, #48] ldp x23, x24, [sp, #32] ldp x21, x22, [sp, #16] ldp x19, x20, [sp], #80 ret/* * Helper function: eq_push equivalent * Inputs: w6=time, w10=type, w11=aux * Uses: x19=q */ .globl _generator_eq_push_helper_asm_generator_eq_push_helper_asm: // Load current count ldr w12, [x19, #4096] // w12 = q->count // Check if count < MAX_EVENTS (512) cmp w12, #512 b.ge .Leq_push_ret // Skip if queue full // Calculate event address: &q->events[count] mov w13, #8 // sizeof(event_t) = 8 bytes mul w14, w12, w13 // w14 = count * sizeof(event_t) add x15, x19, w14, uxtw // x15 = &q->events[count] // Store event: {time, type, aux, padding} str w6, [x15] // event.time = time strb w10, [x15, #4] // event.type = type strb w11, [x15, #5] // event.aux = aux // Increment count add w12, w12, #1 str w12, [x19, #4096] // q->count++ .Leq_push_ret: ret/* * Helper function: rng_next_u32 equivalent * Inputs: x20=rng * Returns: w0=random uint32_t * Preserves: x20 (rng pointer) */ .globl _generator_rng_next_u32_asm _generator_rng_next_u32_asm: // Save link register and preserve registers stp x29, x30, [sp, #-16]! // Implementation of SplitMix64 algorithm // uint64_t z = (r->state += 0x9E3779B97F4A7C15ULL); ldr x0, [x20] // x0 = rng->state movz x1, #0x7C15, lsl #0 movk x1, #0x7F4A, lsl #16 movk x1, #0xB979, lsl #32 movk x1, #0x9E37, lsl #48 // x1 = 0x9E3779B97F4A7C15 add x0, x0, x1 // x0 = state + increment str x0, [x20] // rng->state = new state // z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL; lsr x1, x0, #30 eor x0, x0, x1 movz x1, #0xE5B9, lsl #0 movk x1, #0x1CE4, lsl #16 movk x1, #0x476D, lsl #32 movk x1, #0xBF58, lsl #48 // x1 = 0xBF58476D1CE4E5B9 mul x0, x0, x1 // z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL; lsr x1, x0, #27 eor x0, x0, x1 movz x1, #0x11EB, lsl #0 movk x1, #0x3311, lsl #16 movk x1, #0x49BB, lsl #32 movk x1, #0x94D0, lsl #48 // x1 = 0x94D049BB133111EB mul x0, x0, x1 // return z ^ (z >> 31); lsr x1, x0, #31 eor x0, x0, x1 // Return lower 32 bits mov w0, w0 // Restore and return ldp x29, x30, [sp], #16 ret/* * Helper function: rng_next_float equivalent * Inputs: x20=rng * Returns: s0=random float [0,1) */ .globl _generator_rng_next_float_asm_generator_rng_next_float_asm: // Save link register and floating-point context stp x29, x30, [sp, #-16]! // Call rng_next_u32 bl _generator_rng_next_u32_asm // w0 = random uint32_t // Implement: (rng_next_u32(r) >> 8) * (1.0f / 16777216.0f) lsr w0, w0, #8 // w0 = w0 >> 8 ucvtf s0, w0 // s0 = (float)w0 // Multiply by 1.0f / 16777216.0f = 5.960464477539063e-08 movz w1, #0x0000, lsl #0 movk w1, #0x3380, lsl #16 // IEEE 754 representation of 1.0f/16777216.0f fmov s1, w1 fmul s0, s0, s1 // s0 = s0 * (1.0f/16777216.0f) // Restore and return ldp x29, x30, [sp], #16 ret .section __TEXT,__cstring.Ldbg_fmt: .asciz "ASM: rem=%u proc=%u pos=%u\n".Ldbg_pre_fmt: .asciz "PRE: rem=%u step=%u pos=%u\n".Ldbg_post1_fmt: .asciz "P1: rem=%u proc=%u pos=%u\n" .Ldbg_rms_fmt: .asciz "RMSraw=%u\n" .Ldbg_scratch_fmt:.asciz "SCR drums=%u synth=%u n=%u\n".Loutput_silent_msg: .asciz "OUTPUT_BUFFER_SILENT\n".Loutput_audio_msg: .asciz "OUTPUT_BUFFER_HAS_AUDIO\n".Lmix_null_msg: .asciz "MIX_NULL_POINTER_ERROR\n"||ENDFILE||||FILE:asm/active/hat.s|| .text .align 2 .globl _hat_process// Lightweight Hat – fast-decay envelope + white noise (SplitMix64)// void hat_process(hat_t *h, float *L, float *R, uint32_t n)// x0 = hat_t*// x1 = L buffer// x2 = R buffer// w3 = n samples//// hat_t layout (see hat.h)// uint32_t pos @ 0// uint32_t len @ 4// float sr @ 8 (unused)// float env @ 12// float env_coef @ 16// rng_t rng @ 24 (state 64-bit)// --- Struct Offsets ---.equ H_POS, 0.equ H_LEN, 4.equ H_ENV, 12.equ H_ENV_COEF, 16.equ H_RNG_STATE, 24// --- Constants ---H_AMP_const: .float 0.15 // hat amplitudef_inv24_two_one: .float 5.9604644775390625e-8 // 1/2^24 .float 2.0 .float 1.0rng64_consts_hat: .quad 0x9E3779B97F4A7C15 // GAMMA .quad 0xBF58476D1CE4E5B9 // MUL1 .quad 0x94D049BB133111EB // MUL2// ------------------------------------------------------------_hat_process: // Prologue: minimal stack frame stp x29, x30, [sp, #-16]! mov x29, sp // Preserve callee-saved x22 str x22, [sp, #-16]! // Early exits ldr w8, [x0, #H_POS] ldr w9, [x0, #H_LEN] cmp w8, w9 b.ge Ldone // inactive cbz w3, Ldone // n == 0 // Load state ldr s4, [x0, #H_ENV] ldr s5, [x0, #H_ENV_COEF] ldr x10, [x0, #H_RNG_STATE] // Constants adrp x11, H_AMP_const@PAGE add x11, x11, H_AMP_const@PAGEOFF ldr s15, [x11] // AMP adrp x12, f_inv24_two_one@PAGE add x12, x12, f_inv24_two_one@PAGEOFF ldr s12, [x12] ldr s13, [x12, #4] ldr s14, [x12, #8] adrp x13, rng64_consts_hat@PAGE add x13, x13, rng64_consts_hat@PAGEOFF mov w22, wzr // loop counter i// --- Main Loop ---Lloop: cmp w22, w3 b.ge Lend cmp w8, w9 b.ge Lend // env *= env_coef fmul s4, s4, s5 // SplitMix64 ldr x14, [x13] // GAMMA add x10, x10, x14 mov x15, x10 lsr x16, x15, #30 eor x15, x15, x16 ldr x16, [x13, #8] // MUL1 mul x15, x15, x16 lsr x16, x15, #27 eor x15, x15, x16 ldr x16, [x13, #16] // MUL2 mul x15, x15, x16 lsr x16, x15, #31 eor x15, x15, x16 mov w16, w15 lsr w16, w16, #8 ucvtf s0, w16 fmul s0, s0, s12 fmul s0, s0, s13 fsub s0, s0, s14 // sample = env*noise*AMP fmul s0, s0, s4 fmul s0, s0, s15 // L[i] += sample ldr s1, [x1, w22, sxtw #2] fadd s1, s1, s0 str s1, [x1, w22, sxtw #2] // R[i] += sample ldr s2, [x2, w22, sxtw #2] fadd s2, s2, s0 str s2, [x2, w22, sxtw #2] // Advance add w8, w8, #1 add w22, w22, #1 b Lloop// --- Exit ---Lend: str s4, [x0, #H_ENV] str w8, [x0, #H_POS] str x10, [x0, #H_RNG_STATE]Ldone: ldr x22, [sp], #16 ldp x29, x30, [sp], #16 ret ||ENDFILE||||FILE:asm/active/kick.s|| .text .align 2 .globl _kick_process// --- Constants ---AMP_const: .float 0.9 // overall amplitude (reduced from 1.2 - was too loud)// Offsets inside kick_t struct (see kick.h).equ K_SR, 0 // float sr.equ K_POS, 4 // uint32 pos.equ K_LEN, 8 // uint32 len.equ K_ENV, 12 // float env.equ K_ENV_COEF, 16 // float env_coef.equ K_Y_PREV, 28 // float y_prev (sin(theta[n-1])).equ K_Y_PREV2, 32 // float y_prev2 (sin(theta[n-2])).equ K_K1, 36 // float k1 = 2*cos(delta)// void kick_process(kick_t *k, float *L, float *R, uint32_t n)// x0 = kick*, x1 = L*, x2 = R*, w3 = n_kick_process: // Prologue (minimal – leaf function, no lib calls) stp x29, x30, [sp, #-16]! mov x29, sp // Preserve callee-saved x22 (used as loop counter) str x22, [sp, #-16]! // early-out: inactive or n==0 ldr w9, [x0, #K_POS] ldr w10,[x0, #K_LEN] cmp w9, w10 b.ge Ldone // pos >= len cbz w3, Ldone // n==0 // Load state into FP regs ldr s4, [x0, #K_ENV] // env ldr s5, [x0, #K_ENV_COEF] // env_coef ldr s6, [x0, #K_Y_PREV] // y_prev ldr s7, [x0, #K_Y_PREV2] // y_prev2 ldr s8, [x0, #K_K1] // k1 = 2*cos(delta) // Load AMP constant once adrp x11, AMP_const@PAGE add x11, x11, AMP_const@PAGEOFF ldr s15, [x11] mov w22, wzr // i counterLloop: // Check end conditions cmp w22, w3 b.ge Lend cmp w9, w10 b.ge Lend // env *= env_coef fmul s4, s4, s5 // y = k1*y_prev - y_prev2 (use s9 temps) fmul s9, s8, s6 // k1*y_prev fsub s9, s9, s7 // - y_prev2 -> y // sample = env * y * AMP fmul s0, s4, s9 fmul s0, s0, s15 // L[i] += sample ldr s1, [x1, w22, sxtw #2] fadd s1, s1, s0 str s1, [x1, w22, sxtw #2] // R[i] += sample ldr s2, [x2, w22, sxtw #2] fadd s2, s2, s0 str s2, [x2, w22, sxtw #2] // Update sine recurrence state fmov s7, s6 // y_prev2 = old y_prev fmov s6, s9 // y_prev = y // Increment counters add w9, w9, #1 // pos++ add w22, w22, #1 // i++ b LloopLend: // Store back updated state str s4, [x0, #K_ENV] str s6, [x0, #K_Y_PREV] str s7, [x0, #K_Y_PREV2] str w9, [x0, #K_POS]Ldone: ldr x22, [sp], #16 ldp x29, x30, [sp], #16 ret ||ENDFILE||||FILE:asm/active/limiter.s||// AArch64 assembly implementation of soft-knee limiter// void limiter_process(limiter_t *l, float *L, float *R, uint32_t n)// l: x0 = pointer to limiter_t { attack_coeff, release_coeff, envelope, threshold, knee_width }// L: x1 = left channel buffer// R: x2 = right channel buffer// n: w3 = number of samples//// Uses soft-knee compression with envelope follower .text .align 2 .globl _limiter_processlimiter_consts: .float 20.0 .float 2.0 .float 10.0 .float -0.5 .float 1.0_limiter_process: // Save registers we'll use stp x29, x30, [sp, #-96]! stp x19, x20, [sp, #16] stp x21, x22, [sp, #32] mov x29, sp // Early exit if n == 0 cbz w3, done // Save arguments mov x19, x0 // limiter struct mov x20, x1 // L pointer mov x21, x2 // R pointer mov w22, w3 // n // Load limiter parameters ldr s0, [x19] // attack_coeff ldr s1, [x19, #4] // release_coeff ldr s2, [x19, #8] // envelope ldr s3, [x19, #12] // threshold ldr s4, [x19, #16] // knee_width // Store parameters on stack for reloading after function calls str s0, [sp, #48] // attack_coeff str s1, [sp, #52] // release_coeff str s3, [sp, #56] // threshold str s4, [sp, #60] // knee_width str s2, [sp, #64] // envelope (will be updated) // Calculate knee bounds and store fmov s5, #-0.5 fmul s5, s4, s5 // -knee_width/2 str s5, [sp, #68] fneg s6, s5 // knee_width/2 str s6, [sp, #72] // Main loop mov w23, wzr // i = 0 loop: // Break conditions cmp w23, w22 b.hs Lstore_env // i >= n → exit loop // Load samples ldr s7, [x20, w23, sxtw #2] // L[i] ldr s8, [x21, w23, sxtw #2] // R[i] // Get absolute values and peak fabs s9, s7 // |L[i]| fabs s10, s8 // |R[i]| fmax s11, s9, s10 // peak = max(|L|, |R|) // Load current envelope ldr s2, [sp, #64] // Envelope follower fcmp s11, s2 b.le 1f // Attack: env = peak + att * (env - peak) ldr s0, [sp, #48] // attack_coeff fsub s12, s2, s11 fmadd s2, s0, s12, s11 b 2f 1: // Release: env = peak + rel * (env - peak) ldr s1, [sp, #52] // release_coeff fsub s12, s2, s11 fmadd s2, s1, s12, s11 2: // Store updated envelope str s2, [sp, #64] // Calculate overshoot_db = 20 * log10(env / thresh) ldr s3, [sp, #56] // threshold fdiv s0, s2, s3 // env / thresh // Save sample values before function call str s7, [sp, #76] // L[i] str s8, [sp, #80] // R[i] // Call log10f bl _log10f fmov s12, #20.0 fmul s13, s0, s12 // overshoot_db = 20 * log10(...) // Reload samples ldr s7, [sp, #76] ldr s8, [sp, #80] // Calculate gain_reduction_db fmov s14, wzr // gain_reduction_db = 0 ldr s5, [sp, #68] // -knee_width/2 fcmp s13, s5 b.le 3f // no reduction if below knee ldr s6, [sp, #72] // knee_width/2 fcmp s13, s6 b.ge 4f // hard limit if above knee // Soft knee calculation fsub s15, s13, s5 // overshoot_db + knee_width/2 fmul s15, s15, s15 // square it ldr s4, [sp, #60] // knee_width fmov s16, #2.0 fmul s16, s16, s4 // 2 * knee_width fdiv s14, s15, s16 // gain_reduction_db b 3f 4: // Hard limiting fmov s14, s13 3: // Convert to linear gain fneg s15, s14 // -gain_reduction_db fmov s16, #20.0 fdiv s1, s15, s16 // -gain_reduction_db / 20 // Save before powf str s7, [sp, #76] str s8, [sp, #80] // Call powf(10, exponent) fmov s0, #10.0 bl _powf fmov s17, s0 // gain // Reload samples ldr s7, [sp, #76] ldr s8, [sp, #80] // Apply gain if < 1.0 fmov s18, #1.0 fcmp s17, s18 b.ge 5f fmul s7, s7, s17 fmul s8, s8, s17 5: // Store processed samples str s7, [x20, w23, sxtw #2] str s8, [x21, w23, sxtw #2] // Loop control add w23, w23, #1 cmp w23, w22 b.lo loop Lstore_env: // Store final envelope ldr s2, [sp, #64] str s2, [x19, #8] // Fallthrough done: // Restore registers ldp x19, x20, [sp, #16] ldp x21, x22, [sp, #32] ldp x29, x30, [sp], #96 ret ||ENDFILE||||FILE:asm/active/melody.s|| .text .align 2 .globl _melody_process// Simplified melody implementation avoiding libm calls// Uses polynomial approximation for exponential decaymelody_constants: .float 6.2831853071795864769 // [0] TAU .float 5.0 // [4] DECAY_RATE .float 0.07 // [8] AMP .float 2.0 // [12] TWO .float 1.0 // [16] ONE .float 1.2 // [20] DRIVE_GAIN .float 1.5 // [24] SOFT_A .float 0.5 // [28] SOFT_B_melody_process: // Prologue - simpler frame stp x29, x30, [sp, #-80]! mov x29, sp stp x19, x20, [sp, #16] stp x21, x22, [sp, #32] stp x23, x24, [sp, #48] // Save arguments mov x19, x0 // melody* mov x23, x1 // L* mov x24, x2 // R* mov w22, w3 // n // Load melody struct members ldr s0, [x19] // phase ldr w20, [x19, #4] // pos ldr w21, [x19, #8] // len ldr s1, [x19, #12] // sr ldr s2, [x19, #16] // freq // Early exit checks cmp w20, w21 b.ge Ldone cbz w22, Ldone // Load constants adrp x4, melody_constants@PAGE add x4, x4, melody_constants@PAGEOFF ldr s10, [x4] // TAU ldr s11, [x4, #4] // DECAY_RATE ldr s12, [x4, #8] // AMP ldr s13, [x4, #12] // TWO ldr s14, [x4, #16] // ONE ldr s15, [x4, #20] // DRIVE_GAIN ldr s16, [x4, #24] // SOFT_A ldr s17, [x4, #28] // SOFT_B // Calculate phase increment: TAU * freq / sr fmul s3, s10, s2 // TAU * freq fdiv s3, s3, s1 // / sr -> phase_inc in s3 mov w4, wzr // loop counter iLloop: // Loop bounds check cmp w4, w22 b.ge Lend cmp w20, w21 b.ge Lend // Calculate time: t = pos / sr ucvtf s4, w20 fdiv s4, s4, s1 // t in s4 // Simple exponential decay approximation: env = 1.0 / (1.0 + decay_rate * t) fmul s5, s11, s4 // decay_rate * t fadd s5, s14, s5 // 1.0 + decay_rate * t fdiv s5, s14, s5 // env = 1.0 / (1.0 + decay_rate * t) // Calculate sawtooth: frac = phase / TAU fdiv s6, s0, s10 // frac // raw = 2*frac - 1 fmul s7, s6, s13 // 2*frac fsub s7, s7, s14 // -1 -> raw sawtooth // Apply drive: driven = 1.2 * raw fmul s8, s15, s7 // driven // Soft clipping: soft = 1.5*driven - 0.5*driven^3 fmul s9, s8, s8 // driven^2 fmul s9, s9, s8 // driven^3 fmul s18, s16, s8 // 1.5*driven fmul s19, s17, s9 // 0.5*driven^3 fsub s18, s18, s19 // soft = 1.5*driven - 0.5*driven^3 // Final sample: sample = soft * env * amp fmul s18, s18, s5 // * env fmul s18, s18, s12 // * amp // Add to L[i] and R[i] ldr s19, [x23, w4, sxtw #2] fadd s19, s19, s18 str s19, [x23, w4, sxtw #2] ldr s19, [x24, w4, sxtw #2] fadd s19, s19, s18 str s19, [x24, w4, sxtw #2] // Update counters and phase add w4, w4, #1 // i++ add w20, w20, #1 // pos++ fadd s0, s0, s3 // phase += phase_inc // Wrap phase properly in both directions fcmp s0, s10 b.lt .check_negative_wrap fsub s0, s0, s10 // phase -= TAU if phase >= TAU b Lloop.check_negative_wrap: fmov s16, wzr // 0.0 fcmp s0, s16 b.ge Lloop fadd s0, s0, s10 // phase += TAU if phase < 0 b LloopLend: // Store back state str s0, [x19] // phase str w20, [x19, #4] // posLdone: // Epilogue ldp x23, x24, [sp, #48] ldp x21, x22, [sp, #32] ldp x19, x20, [sp, #16] ldp x29, x30, [sp], #80 ret||ENDFILE||
at txn 0x5ad3952994bcbb53d81958c303cd1176ae43fb2ebd3d24f6694329c0fcd8f140 Aug-20-2025 12:42:35 AM UTC (28 days ago)

||FILE:asm/active/generator.s|| .text .align 2 .globl _generator_mix_buffers_asm// Assembly stubs – we override only generator_process; keep C generator_init .globl _generator_process // (no _generator_init symbol here; C version remains)_generator_process: // Args: x0 = g, x1 = L, x2 = R, w3 = num_frames // Prologue – save frame pointer & callee-saved regs (x19-x22) stp x29, x30, [sp, #-128]! // reserve 128-byte fixed frame (was 96) mov x29, sp stp x19, x20, [sp, #16] stp x21, x22, [sp, #32] stp x23, x24, [sp, #48] stp x25, x26, [sp, #64] stp x27, x28, [sp, #80] // Early exit if no frames cbz w3, .Lgp_epilogue // Preserve output buffer pointers before we repurpose argument regs mov x19, x1 // L mov x20, x2 // R mov x24, x0 // g pointer // (dup line removed) // (generator pointer already preserved earlier) // Preserve num_frames in callee-saved x21 for later reuse mov x21, x3 // x21 = num_frames (32-bit valid) // --------------------------------------------------------------------- // Allocate contiguous scratch block on heap instead of stack // scratch_size = (num_frames * 16 + 15) & ~15 // 16-byte alignment lsl x22, x21, #4 // x22 = num_frames * 16 (bytes for 4 buffers) add x22, x22, #15 bic x22, x22, #15 // align to 16 bytes // malloc(scratch_size) mov x0, x22 bl _malloc mov x25, x0 // x25 = Ld base (scratch start) // TEMP: Check if malloc failed cbz x25, .Lgp_epilogue // if malloc returned NULL, exit immediately // bytes_per_buffer = num_frames * 4 lsl x5, x21, #2 // x5 = bytes per buffer // Compute remaining scratch pointers add x26, x25, x5 // Rd = Ld + size add x27, x26, x5 // Ls add x28, x27, x5 // Rs // Prepare arguments for generator_clear_buffers_asm mov x0, x25 // Ld add x1, x0, x5 // Rd = Ld + bytes_per_buffer add x2, x1, x5 // Ls add x3, x2, x5 // Rs mov w4, w21 // num_frames stp x21, x22, [sp, #96] // save frames_rem & x22 inside fixed frame bl _generator_clear_buffers_asm ldp x21, x22, [sp, #96] // restore w21, x22 (sp unchanged) // --------------------------------------------------------------------- // Skip explicit memset on L/R since they will be fully overwritten by // subsequent processing/mixing. // --------------------------------------------------------------------- // Slice-2: Outer frame loop over musical steps (state advance only) // Register assignments: // x24 = g (generator*) – set now // Use x10 as pointer to timing/event fields (base = g + 4352) add x10, x24, #0x1000 // x10 = g + 4096 add x10, x10, #0x128 // +296 => g + 4392 (event_idx) ldr w9, [x24, #12] // w9 = step_samples (offset 12 bytes) ldr w8, [x10, #8] // w8 = pos_in_step (event base + 8) // TOTAL_STEPS constant mov w13, #32 // for wrap-around comparison // After generator_clear_buffers_asm call and before outer loop label // Save scratch base pointers for later voice processing // x25..x28 already set to scratch pointers mov w23, wzr // frames_done = 0 (will live in w23/x23).Lgp_loop: ldr w9, [x24, #12] // reload step_samples each iteration cbz w21, .Lgp_after_loop // frames_rem == 0 ? done // ----- DEBUG: dump counters at loop start -----.if 0 stp x0, x1, [sp, #-16]! // save caller-saved regs we'll clobber stp x2, x3, [sp, #-16]! adrp x0, .Ldbg_pre_fmt@PAGE add x0, x0, .Ldbg_pre_fmt@PAGEOFF mov w1, w21 // frames_rem mov w2, w9 // step_samples mov w3, w8 // pos_in_step bl _printf ldp x2, x3, [sp], #16 // restore ldp x0, x1, [sp], #16 // restore.endif // --------------------------------------------- // Slice-3: Trigger events at step start cbnz w8, .Lgp_trigger_skip // Only trigger when pos_in_step == 0 // Preserve caller-saved x8/x9 that hold pos_in_step & scratch before calling C helper stp x8, x9, [sp, #112] // save into fixed 128-byte frame (offsets 112-127) mov x0, x24 // x0 = g pointer bl _generator_trigger_step ldp x8, x9, [sp, #112] // restore registers (keeps sp constant).Lgp_trigger_skip: // Recompute event/state base pointer after external calls may clobber x10 add x10, x24, #0x1000 add x10, x10, #0x128 // x10 = &g->event_idx ldr w9, [x24, #12] // Reload constant step_samples in case caller-saved w9 was clobbered ldr w9, [x24, #12] // w9 = step_samples (offset 12 bytes) // frames_to_step_boundary = step_samples - pos_in_step sub w10, w9, w8 // w10 = frames_to_step_boundary (no slice-shortening) // FM sustain fix: if pos_in_step == 0 and frames_to_step_boundary > 1, decrement by 1 so // that voices (particularly fm_voice) spread notes over at least two slices. This mirrors // the logic added in generator.c (Round 23 fix). cbnz w8, 1f // if pos_in_step != 0, skip cmp w10, #1 ble 1f // if boundary <=1, nothing to shorten sub w10, w10, #1 // frames_to_step_boundary -= 11: // frames_to_process = min(frames_rem, frames_to_step_boundary) cmp w21, w10 b.lt 1f mov w11, w10 b 2f1: mov w11, w212: // ---------- Slice-4: Voice processing + mixing ---------- // Save frames_to_process into callee-saved x22 to survive C call (zero-extend to avoid garbage high bits) mov w22, w11 // preserve w11, zeroing upper 32 bits of x22 // Compute byte offset into scratch/output for frames_done lsl x12, x23, #2 // x12 = frames_done * 4 (bytes) // Scratch pointers for this sub-block add x13, x25, x12 // Ld ptr add x14, x26, x12 // Rd ptr add x15, x27, x12 // Ls ptr add x16, x28, x12 // Rs ptr // Output pointers add x17, x19, x12 // L dest add x18, x20, x12 // R dest // Call voice processor (preserve x21 across call) stp x21, x22, [sp, #96] // save frames_rem & x22 inside fixed frame // Replace C voice processing with ASM voice processing calls // Process kick into drum buffers (Ld/Rd) add x0, x24, #56 // kick offset (from generator_t) mov x1, x13 // Ld mov x2, x14 // Rd mov w3, w11 // num_frames bl _kick_process // Remove debug for now - focus on testing audio output // Process snare into drum buffers add x0, x24, #96 // snare offset mov x1, x13 // Ld mov x2, x14 // Rd mov w3, w11 // num_frames bl _snare_process // Process melody into synth buffers (Ls/Rs) add x0, x24, #160 // melody offset mov x1, x15 // Ls mov x2, x16 // Rs mov w3, w11 // num_frames bl _melody_process // Process FM voices into synth buffers add x0, x24, #180 // mid_fm offset mov x1, x15 // Ls mov x2, x16 // Rs mov w3, w11 // num_frames bl _fm_voice_process add x0, x24, #220 // bass_fm offset mov x1, x15 // Ls mov x2, x16 // Rs mov w3, w11 // num_frames bl _fm_voice_process ldp x21, x22, [sp, #96] // restore w21, x22 (sp unchanged) // Recompute event/state base pointer after _generator_process_voices (x10 may be clobbered) add x10, x24, #0x1000 add x10, x10, #0x128 // x10 = &g->event_idx // Restore w11 from x22 after helper mov w11, w22 // restore frames_to_process // Reload pos_in_step since w8 is caller-clobbered ldr w8, [x10, #8] // ----- TRACE1: after voice processing -----.if 0 stp x0, x1, [sp, #-16]! // save regs clobbered by printf adrp x0, .Ldbg_post1_fmt@PAGE add x0, x0, .Ldbg_post1_fmt@PAGEOFF mov w1, w21 // frames_rem (remaining) mov w2, w11 // frames_to_process for this slice mov w3, w8 // current pos_in_step bl _printf ldp x0, x1, [sp], #16 // restore regs.endif // TEMP: Remove offset calculation to test if that's the issue lsl x12, x23, #2 // byte offset = frames_done * 4 add x13, x25, x12 // Ld ptr add x14, x26, x12 // Rd ptr add x15, x27, x12 // Ls ptr add x16, x28, x12 // Rs ptr add x17, x19, x12 // L dest = L_base + offset add x18, x20, x12 // R dest = R_base + offset // Mix drums + synths into output buffers mov x0, x17 // L out mov x1, x18 // R out mov x2, x13 // Ld mov x3, x14 // Rd mov x4, x15 // Ls mov x5, x16 // Rs mov w6, w11 // num_frames // Just call the mixing function (debug later) bl _generator_mix_buffers_asm // Re-enable debug check but only for first slice .if 0 cbnz w23, .Lskip_output_check // only when frames_done == 0 // cbnz w23, .Lskip_output_check // DISABLED: run on every slice // Check first few samples of L output buffer as integers (simpler) ldr w0, [x17] // Load L[0] as int ldr w1, [x17, #4] // Load L[1] as int ldr w2, [x17, #8] // Load L[2] as int // Simple check: if any sample != 0, we have audio orr w0, w0, w1 // w0 = L[0] | L[1] orr w0, w0, w2 // w0 = L[0] | L[1] | L[2] cbnz w0, .Loutput_has_audio // Output is silent adrp x0, .Loutput_silent_msg@PAGE add x0, x0, .Loutput_silent_msg@PAGEOFF mov w1, #22 mov x2, #2 // stderr mov x16, #4 // sys_write svc #0x80 b .Lskip_output_check .Loutput_has_audio: adrp x0, .Loutput_audio_msg@PAGE add x0, x0, .Loutput_audio_msg@PAGEOFF mov w1, #24 mov x2, #2 // stderr mov x16, #4 // sys_write svc #0x80 .Lskip_output_check: .endif // ----- SCRATCH RMS PROBE (debug – first slice only) ----- .if 0 cbnz w23, .Lskip_scratch_rms // only when frames_done == 0 // Save caller-saved regs we will clobber (x0-x3) stp x0, x1, [sp, #-16]! stp x2, x3, [sp, #-16]! // ---- drums scratch RMS (Ld/Rd) ---- mov x0, x13 // Ld mov x1, x14 // Rd mov w2, w11 // num_frames in slice bl _generator_compute_rms_asm // s0 = RMS fmov w4, s0 // raw bits -> w4 // ---- synth scratch RMS (Ls/Rs) ---- mov x0, x15 // Ls mov x1, x16 // Rs mov w2, w11 bl _generator_compute_rms_asm // s0 = RMS fmov w5, s0 // raw bits -> w5 // printf("SCR drums=%u synth=%u n=%u\n", drums_bits, synth_bits, frames_to_process) adrp x0, .Ldbg_scratch_fmt@PAGE add x0, x0, .Ldbg_scratch_fmt@PAGEOFF mov w1, w4 mov w2, w5 mov w3, w11 bl _printf // Restore clobbered regs ldp x2, x3, [sp], #16 ldp x0, x1, [sp], #16 .Lskip_scratch_rms: .endif // ----- RMS DEBUG ----- .if 0 // Save x0–x3 into unused area of 128-byte fixed frame (keeps sp constant) stp x0, x1, [x29, #96] stp x2, x3, [x29, #112] mov x0, x17 // L buffer pointer mov x1, x18 // R buffer pointer mov w2, w11 // num_frames this slice bl _generator_compute_rms_asm // s0 = RMS (float) // Store RMS for real-time visual feedback adrp x9, _g_block_rms@PAGE add x9, x9, _g_block_rms@PAGEOFF fmov s1, #0.5 // Fixed RMS for visuals str s1, [x9] // g_block_rms = 0.5 (safe) // Print raw IEEE bits so we avoid float formatting overhead fmov w1, s0 // RMS bits → w1 adrp x0, .Ldbg_rms_fmt@PAGE // format string "%u\n" add x0, x0, .Ldbg_rms_fmt@PAGEOFF bl _printf // Restore x0–x3 ldp x2, x3, [x29, #112] ldp x0, x1, [x29, #96] .endif // ----- END RMS DEBUG ----- // DEBUG PRINT BEGIN.if 0 stp x21, x22, [sp, #-16]! // save frames_rem and spare callee-saved slot stp x8, x11, [sp, #-16]! // save live regs for printf args adrp x0, .Ldbg_fmt@PAGE add x0, x0, .Ldbg_fmt@PAGEOFF mov w1, w21 // frames_rem mov w2, w11 // frames_to_process mov w3, w8 // pos_in_step bl _printf ldp x8, x11, [sp], #16 // restore ldp x21, x22, [sp], #16 // restore frames_rem.endif // DEBUG PRINT END // Advance counters add w8, w8, w11 // pos_in_step += frames_to_process // write back updated pos_in_step to struct add x10, x24, #0x1000 add x10, x10, #0x128 // correct base for event/state block (g + 4392) str w8, [x10, #8] sub w21, w21, w11 // frames_rem -= frames_to_process add w23, w23, w11 // frames_done += frames_to_process // Check if step boundary reached cmp w8, w9 b.lt .Lgp_loop // Boundary reached – reset pos_in_step and advance step mov w8, wzr // Recompute event/state base pointer again (x10 may be clobbered by helpers) add x10, x24, #0x1000 add x10, x10, #0x128 // x10 = &g->event_idx str w8, [x10, #8] // write back pos_in_step = 0 to generator struct ldr w12, [x10, #4] // w12 = step (event base + 4) add w12, w12, #1 cmp w12, w13 b.lt 3f mov w12, wzr str wzr, [x10] // event_idx reset3: str w12, [x10, #4] b .Lgp_loop.Lgp_after_loop: // Store updated pos_in_step back str w8, [x10, #8] // Deallocate scratch (free) mov x0, x25 bl _free // TEMP: Skip delay & limiter to test if they're clearing audio b .Lgp_epilogue // --------------------------------------------------------------------- // Slice-5: Apply Delay & Limiter (C implementations) // -------------------------------------------------- // delay_process_block(&g->delay, L, R, num_frames, 0.45f); // limiter_process(&g->limiter, L, R, num_frames); // Prepare arguments for delay_process_block // x24 = g (preserved), x19 = L buffer, x20 = R buffer, w23 = total num_frames // x0 = &g->delay (offset 4408 bytes) add x0, x24, #4096 // base offset add x0, x0, #312 // 4096 + 312 = 4408 mov x1, x19 // L mov x2, x20 // R mov w3, w23 // n = num_frames // s0 = 0.45f (IEEE-754 0x3EE66666) mov w4, #0x6666 movk w4, #0x3EE6, lsl #16 fmov s0, w4 bl _delay_process_block #ifndef SKIP_LIMITER // Prepare arguments for limiter_process // x0 = &g->limiter (offset 4424 bytes) add x0, x24, #4096 // base offset add x0, x0, #328 // 4096 + 328 = 4424 mov x1, x19 // L mov x2, x20 // R mov w3, w23 // n = num_frames bl _limiter_process #endif // Existing epilogue label below handles register restore and return.Lgp_epilogue: // Early-exit path: deallocate scratch skipped (not allocated) ldp x27, x28, [x29, #80] ldp x25, x26, [x29, #64] ldp x23, x24, [x29, #48] ldp x21, x22, [x29, #32] ldp x19, x20, [x29, #16] ldp x29, x30, [sp], #128 // pop full 128-byte frame ret/* * generator_mix_buffers_asm - NEON vectorized buffer mixing * ------------------------------------------------------- * void generator_mix_buffers_asm(float *L, float *R, * const float *Ld, const float *Rd, * const float *Ls, const float *Rs, * uint32_t num_frames); * * Performs: L[i] = Ld[i] + Ls[i] (drums + synths) * R[i] = Rd[i] + Rs[i] (drums + synths) * * Uses NEON to process 4 samples per iteration for maximum throughput. * This is the hot path that runs every audio frame in real-time. */_generator_mix_buffers_asm: // Arguments: x0=L, x1=R, x2=Ld, x3=Rd, x4=Ls, x5=Rs, w6=num_frames // TEMP: Check for zero frame count (common cause of silent exit) cbnz w6, .Lmix_continue ret // Exit immediately if zero frames - but this is normal.Lmix_continue: // Early exit if no frames to process cbz w6, .Lmix_done // Calculate how many complete NEON vectors (4 samples) we can process lsr w7, w6, #2 // w7 = num_frames / 4 (complete vectors) and w8, w6, #3 // w8 = num_frames % 4 (remainder samples) // Process complete 4-sample vectors with NEON cbz w7, .Lmix_scalar // Skip if no complete vectors .Lmix_vector_loop: // Load 4 samples from each source buffer ld1 {v0.4s}, [x2], #16 // v0 = Ld[i..i+3], advance pointer ld1 {v1.4s}, [x3], #16 // v1 = Rd[i..i+3], advance pointer ld1 {v2.4s}, [x4], #16 // v2 = Ls[i..i+3], advance pointer ld1 {v3.4s}, [x5], #16 // v3 = Rs[i..i+3], advance pointer // Vector addition: drums + synths fadd v4.4s, v0.4s, v2.4s // v4 = Ld + Ls fadd v5.4s, v1.4s, v3.4s // v5 = Rd + Rs // Store results to output buffers st1 {v4.4s}, [x0], #16 // L[i..i+3] = v4, advance pointer st1 {v5.4s}, [x1], #16 // R[i..i+3] = v5, advance pointer // Loop control subs w7, w7, #1 b.ne .Lmix_vector_loop .Lmix_scalar: // Handle remaining samples (0-3) with scalar operations cbz w8, .Lmix_done .Lmix_scalar_loop: // Load single samples ldr s0, [x2], #4 // s0 = Ld[i] ldr s1, [x3], #4 // s1 = Rd[i] ldr s2, [x4], #4 // s2 = Ls[i] ldr s3, [x5], #4 // s3 = Rs[i] // Scalar addition fadd s4, s0, s2 // s4 = Ld[i] + Ls[i] fadd s5, s1, s3 // s5 = Rd[i] + Rs[i] // Store results str s4, [x0], #4 // L[i] = s4 str s5, [x1], #4 // R[i] = s5 // Loop control subs w8, w8, #1 b.ne .Lmix_scalar_loop .Lmix_done: ret.Lmix_null_error: // Safe debug message for NULL pointer error stp x0, x1, [sp, #-16]! adrp x0, .Lmix_null_msg@PAGE add x0, x0, .Lmix_null_msg@PAGEOFF mov w1, #22 mov x2, #2 // stderr mov x16, #4 // sys_write svc #0x80 ldp x0, x1, [sp], #16 ret/* * generator_compute_rms_asm - NEON vectorized RMS calculation * --------------------------------------------------------- * float generator_compute_rms_asm(const float *L, const float *R, uint32_t num_frames); * * Computes RMS = sqrt(sum(L[i]² + R[i]²) / (num_frames * 2)) * * Uses NEON to process 4 samples per iteration: * - Load L[i..i+3] and R[i..i+3] * - Square each (fmul) * - Add L² + R² (fadd) * - Accumulate in vector sum * - Final horizontal sum + sqrt in scalar */ .globl _generator_compute_rms_asm_generator_compute_rms_asm: // Arguments: x0=L, x1=R, w2=num_frames // Returns: s0 = RMS value // Early exit if no frames cbz w2, .Lrms_zero // Initialize accumulator vector to zero movi v16.4s, #0 // v16 = accumulator for vector sum fmov s17, wzr // s17 = accumulator for scalar sum // Calculate how many complete NEON vectors (4 samples) we can process lsr w3, w2, #2 // w3 = num_frames / 4 (complete vectors) and w4, w2, #3 // w4 = num_frames % 4 (remainder samples) // Process complete 4-sample vectors with NEON cbz w3, .Lrms_scalar // Skip if no complete vectors .Lrms_vector_loop: // Load 4 samples from each buffer ld1 {v0.4s}, [x0], #16 // v0 = L[i..i+3], advance pointer ld1 {v1.4s}, [x1], #16 // v1 = R[i..i+3], advance pointer // Square the samples: L² and R² fmul v2.4s, v0.4s, v0.4s // v2 = L[i]² for 4 samples fmul v3.4s, v1.4s, v1.4s // v3 = R[i]² for 4 samples // Add L² + R² fadd v4.4s, v2.4s, v3.4s // v4 = L[i]² + R[i]² for 4 samples // Accumulate in sum vector fadd v16.4s, v16.4s, v4.4s // accumulate // Loop control subs w3, w3, #1 b.ne .Lrms_vector_loop .Lrms_scalar: // Handle remaining samples (0-3) with scalar operations cbz w4, .Lrms_finalize .Lrms_scalar_loop: // Load single samples ldr s0, [x0], #4 // s0 = L[i] ldr s1, [x1], #4 // s1 = R[i] // Square and add: L² + R² fmul s2, s0, s0 // s2 = L[i]² fmul s3, s1, s1 // s3 = R[i]² fadd s4, s2, s3 // s4 = L[i]² + R[i]² // Add to scalar accumulator fadd s17, s17, s4 // accumulate scalar remainder // Loop control subs w4, w4, #1 b.ne .Lrms_scalar_loop .Lrms_finalize: // Horizontal sum of accumulator vector v16 → s0 faddp v18.4s, v16.4s, v16.4s // pairwise add: [a+b, c+d, a+b, c+d] faddp s0, v18.2s // final vector sum: (a+b) + (c+d) // Add scalar accumulator to vector sum fadd s0, s0, s17 // total_sum = vector_sum + scalar_sum // Convert num_frames to float and multiply by 2 ucvtf s1, w2 // s1 = (float)num_frames fmov s2, #2.0 // s2 = 2.0 fmul s1, s1, s2 // s1 = num_frames * 2 // Divide sum by (num_frames * 2) to get mean fdiv s0, s0, s1 // s0 = mean = sum / (num_frames * 2) // Take square root to get RMS fsqrt s0, s0 // s0 = sqrt(mean) = RMS ret .Lrms_zero: // Return 0.0 if no frames fmov s0, wzr ret /* * generator_clear_buffers_asm - NEON vectorized buffer clearing * ----------------------------------------------------------- * void generator_clear_buffers_asm(float *Ld, float *Rd, float *Ls, float *Rs, uint32_t num_frames); * * Clears (zeros) all 4 float buffers using NEON vector stores. * Replaces 4 memset() calls with optimized NEON operations. * * Uses NEON to process 4 samples per iteration for maximum throughput. */ .globl _generator_clear_buffers_asm_generator_clear_buffers_asm: // Arguments: x0=Ld, x1=Rd, x2=Ls, x3=Rs, w4=num_frames // Early exit if no frames to process cbz w4, .Lclear_done // Initialize zero vector for NEON stores movi v0.4s, #0 // v0 = [0.0, 0.0, 0.0, 0.0] // Calculate how many complete NEON vectors (4 samples) we can process lsr w5, w4, #2 // w5 = num_frames / 4 (complete vectors) and w6, w4, #3 // w6 = num_frames % 4 (remainder samples) // Process complete 4-sample vectors with NEON cbz w5, .Lclear_scalar // Skip if no complete vectors .Lclear_vector_loop: // Store 4 zero samples to each buffer st1 {v0.4s}, [x0], #16 // Ld[i..i+3] = 0.0, advance pointer st1 {v0.4s}, [x1], #16 // Rd[i..i+3] = 0.0, advance pointer st1 {v0.4s}, [x2], #16 // Ls[i..i+3] = 0.0, advance pointer st1 {v0.4s}, [x3], #16 // Rs[i..i+3] = 0.0, advance pointer // Loop control subs w5, w5, #1 b.ne .Lclear_vector_loop .Lclear_scalar: // Handle remaining samples (0-3) with scalar operations cbz w6, .Lclear_done // Zero value for scalar stores fmov s1, wzr // s1 = 0.0 .Lclear_scalar_loop: // Store single zero sample to each buffer str s1, [x0], #4 // Ld[i] = 0.0 str s1, [x1], #4 // Rd[i] = 0.0 str s1, [x2], #4 // Ls[i] = 0.0 str s1, [x3], #4 // Rs[i] = 0.0 // Loop control subs w6, w6, #1 b.ne .Lclear_scalar_loop .Lclear_done: ret/* * generator_rotate_pattern_asm - NEON vectorized pattern rotation * ------------------------------------------------------------- * void generator_rotate_pattern_asm(uint8_t *pattern, uint8_t *tmp, uint32_t size, uint32_t rot); * * Rotates a uint8_t array by 'rot' positions: pattern[i] = old_pattern[(i+rot) % size] * Optimized for size=16 (STEPS_PER_BAR) using NEON EXT instruction. * * For size=16: Single NEON register holds entire pattern, EXT performs rotation in one operation. */ .globl _generator_rotate_pattern_asm_generator_rotate_pattern_asm: // Arguments: x0=pattern, x1=tmp, w2=size, w3=rot // Early exit if no rotation needed cbz w3, .Lrot_done // Optimize for STEPS_PER_BAR = 16 case cmp w2, #16 b.eq .Lrot_neon16 // Fallback for other sizes: scalar implementation b .Lrot_scalar .Lrot_neon16: // NEON optimization for 16-byte patterns (STEPS_PER_BAR) // Load entire 16-byte pattern into single NEON register ld1 {v0.16b}, [x0] // Build rotation index table: [rot, rot+1, rot+2, ..., rot+15] % 16 and w3, w3, #15 // Ensure rot is 0-15 (rot % 16) // Build index table on stack sub sp, sp, #16 // Allocate 16 bytes on stack mov w4, wzr // w4 = loop counter .Lrot_build_indices: add w5, w4, w3 // w5 = i + rot and w5, w5, #15 // w5 = (i + rot) % 16 strb w5, [sp, w4, uxtw] // Store index to stack add w4, w4, #1 // i++ cmp w4, #16 b.lt .Lrot_build_indices // Load index table and use TBL for rotation ld1 {v2.16b}, [sp] // Load index table tbl v1.16b, {v0.16b}, v2.16b // Perform table lookup rotation // Store rotated pattern back and clean up stack st1 {v1.16b}, [x0] add sp, sp, #16 /
at txn 0x365a9d8a78918ff1c5633072be239db1053a2f5cd68a68ea419cabebdecfc5ac Aug-20-2025 12:42:23 AM UTC (28 days ago)

||NOTDEAFBEEF_MASTER_START||NotDeafBeef On-Chain Assembly Audio-Visual Generator=====================================================This bundle contains the complete source code for generatingaudio-visual NFTs from ARM64 assembly. The code is deterministic:same seed = identical output.RECONSTRUCTION INSTRUCTIONS:1. Download all chunks from blockchain transactions 2. Save each as chunk_00.txt, chunk_01.txt, ..., chunk_XX.txt3. Extract Python script: grep -A 90 "||FILE:extract.py||" chunk_26.txt | sed '1d' | sed '/||ENDFILE||/,$d' > extract.py4. Run: python3 extract.py5. Edit seed.s with your token's 32-byte seed6. Run: ./build.sh7. Output: nft_audio.wav + nft_final.mp4TOTAL FILES: 128||FILE:Makefile||# NotDeafbeef - Root Build System# Orchestrates builds for both C and Assembly implementations# Default target builds the stable configurationall: c-build# Build C implementation (stable)c-build: $(MAKE) -C src/c# Export timeline JSON for a given seed (usage: make export_timeline SEED=0xDEADBEEF OUT=path.json)export_timeline: $(MAKE) -C src/c bin/export_timeline cd src/c && ./bin/export_timeline $(SEED) $(OUT)# Visual assembly object filesvisual_core.o: asm/visual/visual_core.s gcc -c asm/visual/visual_core.s -o visual_core.odrawing.o: asm/visual/drawing.s gcc -c asm/visual/drawing.s -o drawing.oascii_renderer.o: asm/visual/ascii_renderer.s gcc -c asm/visual/ascii_renderer.s -o ascii_renderer.oparticles.o: asm/visual/particles.s gcc -c asm/visual/particles.s -o particles.obass_hits.o: asm/visual/bass_hits.s gcc -c asm/visual/bass_hits.s -o bass_hits.oterrain.o: asm/visual/terrain.s gcc -c asm/visual/terrain.s -o terrain.oglitch_system.o: asm/visual/glitch_system.s gcc -c asm/visual/glitch_system.s -o glitch_system.o# Build visual system with ASM componentsvis-build: visual_core.o drawing.o ascii_renderer.o particles.o bass_hits.o terrain.o glitch_system.o mkdir -p bin gcc -o bin/vis_main src/vis_main.c src/visual_c_stubs.c src/audio_visual_bridge.c src/wav_reader.c visual_core.o drawing.o ascii_renderer.o particles.o bass_hits.o terrain.o glitch_system.o -Iinclude $(shell pkg-config --cflags --libs sdl2) -lm# Frame generator (no SDL2 required)generate_frames: visual_core.o drawing.o ascii_renderer.o particles.o bass_hits.o terrain.o glitch_system.o gcc -o generate_frames generate_frames.c src/audio_visual_bridge.c src/deterministic_prng.c src/timeline_reader.c simple_wav_reader.c visual_core.o drawing.o ascii_renderer.o particles.o bass_hits.o terrain.o glitch_system.o -Iinclude -Isrc/include -lm# Build audio system only (for protection verification)audio: $(MAKE) -C src/c segment USE_ASM=1 VOICE_ASM="GENERATOR_ASM KICK_ASM SNARE_ASM HAT_ASM MELODY_ASM LIMITER_ASM"# Generate test audio files test-audio: python3 tools/generate_test_wavs.py# NEW: Generate comprehensive WAV tests for all sounds in both C and ASMtest-comprehensive: python3 tools/generate_comprehensive_tests.py# NEW: Compare C vs ASM WAV filescompare: python3 tools/compare_c_vs_asm.py# NEW: Play specific sound for audition (usage: make play SOUND=kick)play:ifndef SOUND @echo "Usage: make play SOUND=<sound_name>" @echo "Example: make play SOUND=kick"else python3 tools/compare_c_vs_asm.py --play $(SOUND)endif# Run test suitetest: pytest tests/# Clean all build artifactsclean: $(MAKE) -C src/c clean rm -rf output/ find . -name "*.o" -delete find . -name "*.dSYM" -delete rm -f generate_frames 2>/dev/null || true# Generate a demo audio segmentdemo: $(MAKE) -C src/c segment @echo "Generated demo audio: src/c/seed_0xcafebabe.wav"# Quick verification that everything worksverify: c-build test-audio @echo "✅ NotDeafbeef verification complete!"# NEW: Full verification including comprehensive testsverify-full: c-build test-comprehensive compare @echo "✅ NotDeafbeef full verification complete!" @echo "Check the comparison output above for any issues.".PHONY: all c-build vis-build audio test-audio test-comprehensive compare play test clean demo verify verify-full||ENDFILE||||FILE:asm/active/delay.s||.text.align 2.globl _delay_process_block// -----------------------------------------------------------------------------// void delay_process_block(delay_t *d, float *L, float *R, uint32_t n, float feedback)// x0 = delay_t* { float *buf; uint32_t size; uint32_t idx; }// x1 = L buffer// x2 = R buffer// w3 = n samples// s0 = feedback amount// Stereo ping-pong delay: L feeds R, R feeds L// -----------------------------------------------------------------------------_delay_process_block: // Prologue – use 512-byte frame; save x27/x28 at offset #480 (512-64) giving // the maximum distance LLDP allows (>=480 <=504) to avoid overlapping caller // memory even in fast execution. stp x29, x30, [sp, #-512]! stp q8, q9, [sp, #112] stp q10, q11, [sp, #144] stp q12, q13, [sp, #176] stp q14, q15, [sp, #208] mov x29, sp stp x19, x20, [sp, #16] stp x21, x22, [sp, #32] stp x23, x24, [sp, #48] stp x25, x26, [sp, #64] stp x27, x28, [sp, #480] // Load struct members (buf,size,idx) into convenient regs ldr x4, [x0] // buf* ldr w5, [x0, #8] // size ldr w6, [x0, #12] // idx // Early-out if n==0 cbz w3, Ldone // --- PRE-WRAP BUG FIX ---------------------------------------------------- // Make absolutely sure idx is in range BEFORE first buffer access. cmp w6, w5 // idx >= size ? csel w6, wzr, w6, hs// if so wrap to 0 // ------------------------------------------------------------------------ mov w7, wzr // loop counter iLloop: // Break conditions cmp w7, w3 b.hs Lstore_idx // i >= n → exit loop // Calculate &buf[idx*2] // (each sample is float = 4 bytes, stereo interleaved) // addr = buf + idx*8 lsl w8, w6, #3 // w8 = idx*8 add x9, x4, x8 // x9 = &buf[idx*2] // Load delayed samples ldp s1, s2, [x9] // s1 = yl, s2 = yr // Load input samples (post-increment L/R ptrs) ldr s3, [x1] // L[i] ldr s4, [x2] // R[i] // buf[idx*2] = L + yr*feedback fmadd s5, s2, s0, s3 // buf[idx*2+1] = R + yl*feedback fmadd s6, s1, s0, s4 stp s5, s6, [x9] // Add delayed signal to dry samples fadd s3, s3, s1 // L[i] = dryL + yl fadd s4, s4, s2 // R[i] = dryR + yr str s3, [x1], #4 // write & advance L* str s4, [x2], #4 // write & advance R* // Increment and wrap idx add w6, w6, #1 cmp w6, w5 csel w6[CHUNK 2 OF 2]BUNDLE_0_CORE - PART 2Concatenate all chunks in order to reconstruct., wzr, w6, hs // Next sample add w7, w7, #1 b LloopLstore_idx: // Store updated idx back to struct str w6, [x0, #12]Ldone: // Epilogue – mirror prologue order ldp x27, x28, [sp, #480] ldp q14, q15, [sp, #208] ldp q12, q13, [sp, #176] ldp q10, q11, [sp, #144] ldp q8, q9, [sp, #112] ldp x29, x30, [sp] add sp, sp, #512 ret||ENDFILE||||FILE:asm/active/euclid.s|| .text .align 2 .globl _euclid_pattern_euclid_pattern: // void euclid_pattern(int pulses,int steps,uint8_t *out) stp x29, x30, [sp, #-16]! // prologue mov x29, sp mov w3, wzr // bucket = 0 mov w4, wzr // i = 0 (loop counter)1: cmp w4, w1 // while (i < steps) b.ge 2f add w3, w3, w0 // bucket += pulses cmp w3, w1 b.lt 3f sub w3, w3, w1 // bucket -= steps mov w5, #1 // out[i] = 1 b 4f3: mov w5, #0 // out[i] = 04: add x6, x2, x4 // &out[i] strb w5, [x6] add w4, w4, #1 // i++ b 1b2: ldp x29, x30, [sp], #16 // epilogue ret ||ENDFILE||||FILE:asm/active/exp4_ps_asm.s||.text.align 2.globl _exp4_ps_asm// float32x4_t exp4_ps_asm(float32x4_t x)// 4-wide single-precision e^x approximation, ported from fast_math_neon.h.// Identical maths to the C intrinsics version._exp4_ps_asm: // v0 holds input vector x and will carry the final result. // Load constant table base adrp x9, Lexp_const@PAGE add x9, x9, Lexp_const@PAGEOFF // Clamp x to [min_x, max_x] ldr q1, [x9, #0] // max_x ldr q2, [x9, #16] // min_x fmin v3.4s, v0.4s, v1.4s // v3 = min(x,max_x) fmax v0.4s, v3.4s, v2.4s // x = max(v3,min_x) // fx = x * log2e + 0.5 ldr q4, [x9, #32] // log2e fmul v5.4s, v0.4s, v4.4s ldr q6, [x9, #48] // 0.5 fadd v5.4s, v5.4s, v6.4s // Convert to int (truncate toward zero) fcvtzs v7.4s, v5.4s // emm0 // fx = float(emm0) scvtf v8.4s, v7.4s // x -= fx * ln2_hi + fx * ln2_lo ldr q9, [x9, #64] // ln2_hi ldr q10, [x9, #80] // ln2_lo fmul v11.4s, v8.4s, v9.4s fmul v12.4s, v8.4s, v10.4s fsub v0.4s, v0.4s, v11.4s fsub v0.4s, v0.4s, v12.4s // Polynomial approximation fmul v13.4s, v0.4s, v0.4s // x2 = x*x ldr q14, [x9, #96] // c1 ldr q15, [x9, #112] // c2 fmla v14.4s, v0.4s, v15.4s ldr q16, [x9, #128] // c3 fmla v14.4s, v13.4s, v16.4s fmul v17.4s, v13.4s, v0.4s // x2*x ldr q18, [x9, #144] // c4 fmla v14.4s, v17.4s, v18.4s fmul v19.4s, v13.4s, v13.4s // x2*x2 ldr q20, [x9, #160] // c5 fmla v14.4s, v19.4s, v20.4s // y += x fadd v14.4s, v14.4s, v0.4s // y += 1.0f ldr q21, [x9, #176] // 1.0f vector fadd v14.4s, v14.4s, v21.4s // construct 2^n movi v22.4s, #127 // 127 add v7.4s, v7.4s, v22.4s shl v7.4s, v7.4s, #23 mov v23.16b, v7.16b // result = y * 2^n fmul v0.4s, v14.4s, v23.4s ret .align 4Lexp_const: // max_x .float 88.3762626647949, 88.3762626647949, 88.3762626647949, 88.3762626647949 // min_x .float -88.3762626647949, -88.3762626647949, -88.3762626647949, -88.3762626647949 // log2e .float 1.44269504088896341, 1.44269504088896341, 1.44269504088896341, 1.44269504088896341 // 0.5 .float 0.5, 0.5, 0.5, 0.5 // ln2_hi .float 0.693359375, 0.693359375, 0.693359375, 0.693359375 // ln2_lo .float -2.12194440e-4, -2.12194440e-4, -2.12194440e-4, -2.12194440e-4 // c1 .float 1.9875691500E-4, 1.9875691500E-4, 1.9875691500E-4, 1.9875691500E-4 // c2 .float 1.3981999507E-3, 1.3981999507E-3, 1.3981999507E-3, 1.3981999507E-3 // c3 .float 8.3334519073E-3, 8.3334519073E-3, 8.3334519073E-3, 8.3334519073E-3 // c4 .float 4.1665795894E-2, 4.1665795894E-2, 4.1665795894E-2, 4.1665795894E-2 // c5 .float 0.16666665459, 0.16666665459, 0.16666665459, 0.16666665459 // 1.0 .float 1.0, 1.0, 1.0, 1.0 ||ENDFILE||||FILE:asm/active/fm_voice.s|| .section __TEXT,__const .align 2.L_tau: .float 6.283185307.L_pi: .float 3.14159265.L_six: .float 6.0.L_onehundredtwenty: .float 120.0 .text .align 2 .globl _fm_voice_process// fm_voice_process(fm_voice_t *v, float32_t *L, float32_t *R, uint32_t n)// Struct offsets: sr=0, carrier_freq=4, ratio=8, index0=12, amp=16, decay=20// len=24, pos=28, carrier_phase=32, mod_phase=36_fm_voice_process: // x0 = fm_voice_t *v, x1 = L, x2 = R, x3 = n // Early exit if pos >= len ldr w4, [x0, #28] // v->pos ldr w5, [x0, #24] // v->len cmp w4, w5 b.ge .fm_exit // Load parameters ldr s16, [x0, #0] // v->sr ldr s17, [x0, #4] // v->carrier_freq ldr s18, [x0, #8] // v->ratio ldr s19, [x0, #12] // v->index0 ldr s20, [x0, #16] // v->amp ldr s21, [x0, #20] // v->decay ldr s22, [x0, #32] // v->carrier_phase ldr s23, [x0, #36] // v->mod_phase // Calculate increments: c_inc = TAU * carrier_freq / sr adrp x7, .L_tau@PAGE add x7, x7, .L_tau@PAGEOFF ldr s0, [x7] // Load TAU fmul s24, s0, s17 // TAU * carrier_freq fdiv s24, s24, s16 // c_inc = TAU * carrier_freq / sr // m_inc = TAU * carrier_freq * ratio / sr fmul s25, s24, s18 // m_inc = c_inc * ratio mov w6, #0 // i = 0 .fm_loop: cmp w6, w3 // i < n? b.ge .fm_loop_end // Check if pos >= len ldr w4, [x0, #28] // v->pos ldr w5, [x0, #24] // v->len cmp w4, w5 b.ge .fm_loop_end // Calculate envelope: t = pos / sr ucvtf s26, w4 // convert pos to float fdiv s26, s26, s16 // t = pos / sr // Simple exponential decay approximation: env ≈ 1.0 / (1.0 + decay * t) fmul s27, s21, s26 // decay * t fmov s28, #1.0 fadd s27, s28, s27 // 1.0 + decay * t fdiv s27, s28, s27 // env = 1.0 / (1.0 + decay * t) // Index with envelope: index = index0 * env fmul s29, s19, s27 // index = index0 * env // FM synthesis: sin(carrier_phase + index * sin(mod_phase)) // Use polynomial approximation for sine waves // Step 1: Calculate sin(mod_phase) using polynomial approximation // Normalize mod_phase to [-π, π] range adrp x7, .L_pi@PAGE add x7, x7, .L_pi@PAGEOFF ldr s30, [x7] // Load PI // Wrap mod_phase to [-π, π] fmov s31, s23 // s31 = mod_phase fcmp s31, s30 // compare with π b.le .fm_mod_no_wrap_pos fsub s31, s31, s30 // mod_phase - π fsub s31, s31, s30 // mod_phase - 2π.fm_mod_no_wrap_pos: fneg s0, s30 // -π fcmp s31, s0 // compare with -π b.ge .fm_mod_wrapped fadd s31, s31, s30 // mod_phase + π fadd s31, s31, s30 // mod_phase + 2π.fm_mod_wrapped: // Use polynomial sine approximation instead of libm for stability // sin(x) ≈ x - x³/6 + x⁵/120 (higher order for better accuracy) fmul s0, s31, s31 // x² fmul s1, s0, s31 // x³ fmul s2, s0, s0 // x⁴ fmul s3, s2, s31 // x⁵ // Calculate x³/6 adrp x7, .L_six@PAGE add x7, x7, .L_six@PAGEOFF ldr s4, [x7] fdiv s1, s1, s4 // x³/6 // Calculate x⁵/120 adrp x7, .L_onehundredtwenty@PAGE add x7, x7, .L_onehundredtwenty@PAGEOFF ldr s4, [x7] fdiv s3, s3, s4 // x⁵/120 // Combine: x - x³/6 + x⁵/120 fsub s31, s31, s1 // x - x³/6 fadd s31, s31, s3 // x - x³/6 + x⁵/120 // Step 2: Apply modulation index: index * sin(mod_phase) fmul s31, s29, s31 // index * sin(mod_phase) // Clamp modulation to prevent instability: limit to [-3.0, 3.0] fmov s0, #3.0 fcmp s31, s0 b.le .fm_mod_clamp_pos_ok fmov s31, s0 // clamp to +3.0.fm_mod_clamp_pos_ok: fneg s0, s0 // -3.0 fcmp s31, s0 b.ge .fm_mod_clamp_neg_ok fmov s31, s0 // clamp to -3.0.fm_mod_clamp_neg_ok: // Step 3: Add to carrier phase: carrier_phase + index * sin(mod_phase) fadd s31, s22, s31 // carrier_phase + index * sin(mod_phase) // Step 4: Calculate final sine: sin(carrier_phase + index * sin(mod_phase)) // Wrap result to [-π, π] range fcmp s31, s30 // compare with π b.le .fm_carr_no_wrap_pos fsub s31, s31, s30 // result - π fsub s31, s31, s30 // result - 2π.fm_carr_no_wrap_pos: fneg s0, s30 // -π fcmp s31, s0 // compare with -π b.ge .fm_carr_wrapped fadd s31, s31, s30 // result + π fadd s31, s31, s30 // result + 2π.fm_carr_wrapped: // Use higher-order polynomial sine approximation // sin(x) ≈ x - x³/6 + x⁵/120 (better accuracy than simple version) fmul s0, s31, s31 // x² fmul s1, s0, s31 // x³ fmul s2, s0, s0 // x⁴ fmul s3, s2, s31 // x⁵ // Calculate x³/6 adrp x7, .L_six@PAGE add x7, x7, .L_six@PAGEOFF ldr s4, [x7] fdiv s1, s1, s4 // x³/6 // Calculate x⁵/120 adrp x7, .L_onehundredtwenty@PAGE add x7, x7, .L_onehundredtwenty@PAGEOFF ldr s4, [x7] fdiv s3, s3, s4 // x⁵/120 // Combine: x - x³/6 + x⁵/120 fsub s31, s31, s1 // x - x³/6 fadd s31, s31, s3 // x - x³/6 + x⁵/120 // s31 now contains the final FM synthesis result // Apply envelope and amplitude (with scaling to prevent clipping) fmul s31, s31, s27 // apply envelope fmul s31, s31, s20 // apply amplitude fmov s0, #0.25 // Scale down to prevent clipping from FM harmonics fmul s31, s31, s0 // final scaling // Final safety clamp to prevent amplitude spikes: limit to [-1.0, 1.0] fmov s0, #1.0 fcmp s31, s0 b.le .fm_out_clamp_pos_ok fmov s31, s0 // clamp to +1.0.fm_out_clamp_pos_ok: fneg s0, s0 // -1.0 fcmp s31, s0 b.ge .fm_out_clamp_neg_ok fmov s31, s0 // clamp to -1.0.fm_out_clamp_neg_ok: // Add to output buffers ldr s0, [x1, x6, lsl #2] // L[i] fadd s0, s0, s31 // L[i] += sample str s0, [x1, x6, lsl #2] // store L[i] ldr s0, [x2, x6, lsl #2] // R[i] fadd s0, s0, s31 // R[i] += sample str s0, [x2, x6, lsl #2] // store R[i] // Update phases fadd s22, s22, s24 // carrier_phase += c_inc fadd s23, s23, s25 // mod_phase += m_inc // Keep phases in range [0, TAU] with proper modulo wrapping adrp x7, .L_tau@PAGE add x7, x7, .L_tau@PAGEOFF ldr s0, [x7] // Load TAU // Carrier phase proper modulo: phase = phase - TAU * round(phase / TAU) fdiv s1, s22, s0 // s1 = carrier_phase / TAU frinta s1, s1 // s1 = round(carrier_phase / TAU) fmul s1, s1, s0 // s1 = round(carrier_phase / TAU) * TAU fsub s22, s22, s1 // carrier_phase = carrier_phase - round_part // Modulator phase proper modulo: phase = phase - TAU * round(phase / TAU) fdiv s2, s23, s0 // s2 = mod_phase / TAU frinta s2, s2 // s2 = round(mod_phase / TAU) fmul s2, s2, s0 // s2 = round(mod_phase / TAU) * TAU fsub s23, s23, s2 // mod_phase = mod_phase - round_part // Increment counters add w6, w6, #1 // i++ add w4, w4, #1 // pos++ str w4, [x0, #28] // store v->pos b .fm_loop.fm_loop_end: // Store updated phases str s22, [x0, #32] // v->carrier_phase str s23, [x0, #36] // v->mod_phase.fm_exit: ret||ENDFILE||
at txn 0xc258b892b2f63f0ba38ce4aee3d40c380eb273d3b197675d3927e2888e9182c0 Aug-20-2025 12:40:47 AM UTC (28 days ago)

=:THOR.RUJI:thor1tmpduvq480lu235uns2s589dqzdpafgs5dj2mf:0/1/0:va:0
at txn 0x58b688660bdeb3757518357fa131bf7b56e9741a2860978191199723c250a4c4 Aug-20-2025 12:31:35 AM UTC (28 days ago)

=:THOR.RUJI:thor1tmpduvq480lu235uns2s589dqzdpafgs5dj2mf:0/1/0:va:0
at txn 0x01efbd79b1860ce28fd070b91dd43d16da71d27b8f5cda4c574a0506f7a16d70 Aug-20-2025 12:27:23 AM UTC (28 days ago)

=:b:3FMKaWhgLGkMKyLseuPukoZtFHr3tHarug:0/1/0:zengo:200
at txn 0x03fbfbca259817354e845052ea1e81c702efadb78049afd7b274a8de5217d331 Aug-20-2025 12:26:23 AM UTC (28 days ago)

Ignore
at txn 0xf4090c402c7f81ea6fc3dd3167c02d4a60f7ffb8707f1b214142f70c590ff4d6 Aug-20-2025 12:11:47 AM UTC (28 days ago)

DC-L5:A71w7GgJrJ47uldVcjooTxUHOIrQ/Z9ygFcKO73zZB0=
at txn 0xe3a13a53d31451f66c9c02ca34a30e8ec7835af66b6df2824edadc9597b7113f Aug-20-2025 12:10:59 AM UTC (28 days ago)

Ignore
at txn 0xf99e21ba851d119e07d9837b47cb7face831f474e5279d3dfda610bbe1484137 Aug-20-2025 12:05:23 AM UTC (28 days ago)

cs
at txn 0x33aefde8cba2e6eff888035cea7e5ffedd3f23b965cc4bcd540333e8c9338f29 Aug-20-2025 12:04:47 AM UTC (28 days ago)

Ignore
at txn 0xa5dd4b7e5590dca88930c0879cab627058a75fd6de667d8e92c9a555e2cbf004 Aug-20-2025 12:03:47 AM UTC (28 days ago)

Verification: Requesting verification for 0x4D51573Db98693561B2c97722aEecADA267A9345 with no branding text on etherscan
at txn 0x91c4ef8cee438aa89d6bc8a36739a6d7656cd5344ff1554a7ba49f6609a63d4d Aug-20-2025 12:01:23 AM UTC (28 days ago)

m6#
at txn 0x91b0fefa63a4aad6a30fde4268600da2c2af1e2205f4d98325e71f9aaf7be208 Aug-19-2025 11:58:35 PM UTC (28 days ago)

{"BlockHash":"0x6060edb39d98e5ea02583d79074bf873fc9e810354ee8c51693392a68ce3ae96","Sender":"5GCc8penY3wGsmaq8ZgeTW7TgEfN76tZGvAnUZ2ZqmMbPWqj","Nonce":3891,"Commitment":"0x841c4dbb154e556776700ffbb61891cd44d8310ae76494ae4e774788dd2e7122"}
at txn 0xd24b5f4acc875659897e9c07696554fd66b8bc503ee8bde0ad04292f501ec32a Aug-19-2025 11:55:11 PM UTC (28 days ago)

Ignore
at txn 0x934461ddba299500f37b137da928099ba67a2d6386d0229442282bd845d18866 Aug-19-2025 11:52:11 PM UTC (28 days ago)

Ignore
at txn 0xfa6309e57bcc93d6daacf1878ad244cb8144381357c069ac46fbebf4e82eb437 Aug-19-2025 11:51:59 PM UTC (28 days ago)

Ignore
at txn 0x4493895ad802dfcfd47ca069445be4db0774adf9f15bf582c15223626e2c2191 Aug-19-2025 11:51:47 PM UTC (28 days ago)

BNB
at txn 0x17965088266dde297d9c08a796058b2705448d712f775b329c82ae46a4485d1d Aug-19-2025 11:44:47 PM UTC (28 days ago)

------BEGIN MEMO------eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6IjllZDAyN2YzLThhZGQtNDY5NC04MTI4LWQzNTNlMTRkMTUyNSIsIm9yZGVySWRlbnRpZmllciI6ImI5ZTQ5NDlkLTNhYzQtNDg3Mi1hYTU2LWNmZGJlN2RhNDBjNyIsImlhdCI6MTc1NTY0NjY5Mn0.utqlnjY-17TKOavHMmIzpH0QQvr2nVcyILFSzk-65o8------END MEMO------
at txn 0x439c86f66c8a5dd4acb692e5e92c4ad118a79688e0b69bc2df53962459168b13 Aug-19-2025 11:38:59 PM UTC (28 days ago)

EM
at txn 0xcc250adf8df2ff05a33124bd13a7a9528ed85d340597fdb99396dca3b3ff97e7 Aug-19-2025 11:34:23 PM UTC (28 days ago)

Ignore
at txn 0xb455af1d2e0f92608a049f531466237090658b1f22c022e1dfa4be655c305781 Aug-19-2025 11:32:23 PM UTC (28 days ago)

Ignore
at txn 0xfbb6bc1cfcb8e5cb89479088ccc7ba967962a3738689b17b64a5ed3125bde90e Aug-19-2025 11:31:47 PM UTC (28 days ago)

------BEGIN MEMO------eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJub25jZSI6IjEzZTY0Njc3LTkzNTctNDEwYy05Y2I5LWQ4ODhhYWQ4NTA0MiIsIm9yZGVySWRlbnRpZmllciI6ImJjZDFkZTQyLTk3MzUtNDVkMS04NTNmLTc0ODZjMzAyYzMwMSIsImlhdCI6MTc1NTY0NTM2Nn0.7gFODtJ7z8kmexUgsriU-gwzvG6twhI-UYIl59dS87E------END MEMO------
at txn 0xa3a668847655eaaea03641221f54ae970c959463b77357184f0f2852c5cfdb60 Aug-19-2025 11:16:23 PM UTC (28 days ago)