1REM "********************************************* 2REM "*** FracNEON V0.3 (09/03/2016) *** 3REM "*** NEON Fractal by Michael Kübel *** 4REM "*** e-mail: michael.kuebel@googlemail.com *** 5REM "********************************************* 6 7ON ERROR REPORT: PRINT ERL: A$=GET$: END 8 9DIM code_neon 2048,L%-1 10 11REM check screen mode 12DIM v_in% 32 13DIM v_out% 32 14v_in%!0=148 : REM screen start 15v_in%!4= 11 : REM width 16v_in%!8= 12 : REM height 17v_in%!12= 3 : REM colours 18v_in%!16=-1 19SYS "OS_ReadVduVariables",v_in%,v_out% 20IF (v_out%!8) < 599 THEN PRINT "Screen height must be >= 600":END 21IF (v_out%!12) <> -1 THEN PRINT "Screen mode must be 32bpp":END 22Screen_x_max%=v_out%!4+1 23Screen_y_max%=v_out%!8+1 24Screen_Start_Address=v_out%!0 25 26Fractal_size%=600 27 28REM Get VFP context 29SYS "VFPSupport_CreateContext",&80000001,32,0,0 TO ctx%,prev% 30ON ERROR SYS "XOS_Byte",112,0 : SYS "XOS_Byte",113,0 : SYS "XOS_RestoreCursors" : SYS "XVFPSupport_DestroyContext",ctx%,prev% : PRINT REPORT$;" at ";ERL : END 31 32PROCAssemble_neon 33OFF:CLG 34CALL code_neon 35PRINT"---------------------" 36PRINT"NEON Single Precision" 37PRINT"Fractal by M.Kuebel" 38PRINT"---------------------" 39PRINT"Time [s].....................: ";code_neon!4/100 40PRINT"Iterations...................: ";code_neon!8 41PRINT"Million Iterations per second: ";INT(code_neon!8/1000000/(code_neon!4/100)*1000)/1000 42A$=GET$ 43 44SYS "VFPSupport_DestroyContext",ctx%,prev% 45 46END 47 48DEF PROCAssemble_neon 49FOR pass%=8 TO 10 STEP 2 50P%=code_neon 51[ 52OPT pass% 53B code_starts_here 54;-------------"Put some variables at start for later easy READ from Basic"--- 55.timer 56DCD 0 57.iteration_counter 58DCD 0 59;------------- 60.code_starts_here 61STMFD R13!,{R0-R12,R14} 62 63 ;---------Calculate Mandelbrot Fractal in Single Precision 64 65 SWI "OS_ReadMonotonicTime" 66 STR R0,timer ;save timer 67 68 MOV R0,#(Fractal_size%) 69 VLDR S0,ro_sp 70 VLDR S1,ru_sp 71 VMOV S2,R0 72 VCVT.F32.U32 S2,S2 73 VSUB.F32 S0,S0,S1 74 VDIV.F32 S0,S0,S2 75 VSTR S0,delta_sp ;delta = (ro-ru)/fractal_size 76 77 VLDR S0,iter_out_sp 78 VDPL.32 Q15,D0[0] ;limit = 4.0 for each pixel 79 80 LDR R12,iter_max 81 SUB R12,R12,#1 ;init maximum iterations - 1 82 83 MOV R4,#0 ;init global iteration counter 84 85 MOV R0,#0 86 STR R0,y_counter ;init y_counter 87 88 .y_loop_sp 89 90 LDR R2,y_counter 91 92 VLDR S0,io_sp 93 VLDR S1,delta_sp 94 VMOV S2,R2 95 VCVT.F32.U32 S2,S2 96 VMUL.F32 S2,S2,S1 97 VSUB.F32 S0,S0,S2 ;b0 = io - current_y * delta 98 VSTR S0,io_save_sp;save value for easy access later 99 VDPL.32 Q5,D0[0] ;save b0,b1,b2,b3 in Q5 100 101 MOV R6 ,#0 ;starting line counter pixel 1 102 MOV R7, #1 ;starting line counter pixel 2 103 MOV R8, #2 ;starting line counter pixel 3 104 MOV R9, #3 ;starting line counter pixel 4 105 MOV R10,R9 ;init line counter global 106 107 MOV R0,#0 ;Init all 4 first pixel of a line 108 VLDR S4,ru_sp 109 VLDR S5,delta_sp 110 VDPL.32 Q0,D2[0] ;Q0 = ru ru ru ru 111 VDPL.32 Q1,D2[1] ;Q1 = d d d d 112 VMOV S8, R0 113 ADD R0, R0,#1 114 VMOV S9, R0 115 ADD R0, R0,#1 116 VMOV S10,R0 117 ADD R0, R0,#1 118 VMOV S11,R0 119 VCVT.F32.U32 Q2,Q2 ;Q2 = x x+1 x+2 x+3 120 VMUL.F32 Q1,Q1,Q2 ;Q1 = d*x d*(x+1) d*(x+2) d*(x+3) 121 VADD.F32 Q0,Q0,Q1 ;Q0 = ru+d*x ru+d*(x+1) ru+d*(x+2) ru+d*(x+3) 122 ;x = a 123 124 VMOV Q4,Q0 ;save a0,a1,a2,a3 in Q10 125 VMOV Q1,Q5 ;y = b 126 127 VMOV.I32 Q14,#4 ;init sub value = 4 for each pixel 128 VDPL.I32 Q2, R12 ;init single iteration counters = max_iter-1 129 130 LDR R0,Screen_Start ;Screen Start address 131 LDR R1,Hpixels ;Screen Width 132 MOV R1,R1,LSL#2 ;4 Bytes per Pixel 133 MLA R5,R1,R2,R0 ;Line position = Screen_Start + HPixels * y 134 135 MOV R11,#0 ;init end of line flags for each pixel 136 137 .iteration_loop 138 ;1.Iteration 139 VMUL.F32 Q3, Q0, Q0 ;x*x 140 VMUL.F32 Q8, Q1, Q1 ;y*y 141 VMUL.F32 Q12,Q0, Q1 ;x*y 142 VADD.F32 Q10,Q3, Q8 ;x*x + y*y 143 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 144 VADD.F32 Q12,Q12,Q12 ;2*x*y 145 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 146 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 147 148 ;2.Iteration 149 VMUL.F32 Q3, Q0, Q0 ;x*x 150 VMUL.F32 Q8, Q1, Q1 ;y*y 151 VMUL.F32 Q12,Q0, Q1 ;x*y 152 VADD.F32 Q11,Q3, Q8 ;x*x + y*y 153 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 154 VADD.F32 Q12,Q12,Q12 ;2*x*y 155 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 156 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 157 158 ;3.Iteration 159 VMUL.F32 Q3, Q0, Q0 ;x*x 160 VMUL.F32 Q8, Q1, Q1 ;y*y 161 VMUL.F32 Q12,Q0, Q1 ;x*y 162 VADD.F32 Q6, Q3, Q8 ;x*x + y*y 163 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 164 VADD.F32 Q12,Q12,Q12 ;2*x*y 165 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 166 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 167 168 ;4.Iteration 169 VMUL.F32 Q3, Q0, Q0 ;x*x 170 VMUL.F32 Q8, Q1, Q1 ;y*y 171 VMUL.F32 Q12,Q0, Q1 ;x*y 172 VADD.F32 Q7, Q3, Q8 ;x*x + y*y 173 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 174 VADD.F32 Q12,Q12,Q12 ;2*x*y 175 VCGT.F32 Q3, Q7, Q15 ;> 4 ? 176 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 177 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 178 179 VSUB.U32 Q2, Q2, Q14 ;decrement pixel counters 180 181 VORR D26,D6, D7 ;check if Q3>4 without changing Q3 182 VORR D27,D4, D5 ;check if Q2<0 without changing Q2 183 VMOV R0, R1, D26 184 VMOV R2, R3, D27 185 ORRS R0, R0, R1 186 BMI diverged_1234 ;1 or more pixel diverged 187 ORRS R2, R2, R3 188 BPL iteration_loop 189 BMI max_iterations_1234 ;1 or more pixel reached max iter 190 191 ;############ Pixel 1 Diverge Handling ############ 192 193 .diverged_1234 194 MVN R3,#0 195 VMOV R0,S12 ;check if pixel 1 diverged (Q3.1 -> S12) 196 TST R0,R3 197 BEQ diverged_234 ;not ? -> check Pixel 2,3,4 198 199 VMOV Q13,Q3 ;save 4th iteration compare results 200 VMOV R0,S8 ;get iterations pixel 1 (Q2.1 -> S8) 201 VCGT.F32 Q3, Q6, Q15 ;> 4 ? Diverged at 3rd ? 202 VMOV R1,S12 203 TST R1,R3 204 ADDNE R0,R0,#1 205 VCGT.F32 Q3, Q11,Q15 ;> 4 ? Diverged at 2nd ? 206 VMOV R1,S12 207 TST R1,R3 208 ADDNE R0,R0,#1 209 VCGT.F32 Q3, Q10,Q15 ;> 4 ? Diverged at 1st ? 210 VMOV R1,S12 211 TST R1,R3 212 ADDNE R0,R0,#1 213 VMOV Q3,Q13 ;restore 4th iteration compare results 214 215 SUB R2,R12,R0 ;correct counter 216 ADD R4,R4, R2 ;add to global iteration counter 217 MOV R2,R2,LSL#11 ;some colour 218 STR R2,[R5,R6,LSL#2] ;write pixel on screen 219 ADD R10,R10,#1 ;increase x 220 CMP R10,#(Fractal_size%) ;end of line reached ? 221 222 ORRGE R11,R11,#%0001 ;set end of line flags for pixel 1 223 BGE diverged_234 ;but still check if also another diverged 224 225 VMOV S8,R12 ;transfer counter init to Q2.1 -> S8 226 MOV R6,R10 ;save new x in new plot_x1 227 VLDR S0,delta_sp ;use S0 as not needed anymore 228 VMOV S4,R10 ;use S4 for R10, as not needed anymore 229 VCVT.F32.U32 S4,S4 230 VMUL.F32 S0,S0,S4 ;new x1 = x1*d 231 VLDR S4,ru_sp ;use S4 as not needed anymore 232 VADD.F32 S0,S0,S4 ;new x1 = ru + x1*d 233 VMOV.F32 S16,S0 ;new x1 also = new 'a' 234 VLDR S4,io_save_sp ;get saved y4=y3=y2=y1 235 236 ;just continue checking pixel 2 237 238 ;############ Pixel 1 Diverge Handling ############ 239 240 .diverged_234 241 MVN R3,#0 242 VMOV R0,S13 ;check if pixel 2 diverged (Q3.2 -> S13) 243 TST R0,R3 244 BEQ diverged_34 ;not ? -> check Pixel 3,4 245 246 VMOV Q13,Q3 ;save 4th iteration compare results 247 VMOV R0,S9 ;get iterations pixel 2 (Q2.2 -> S9) 248 VCGT.F32 Q3, Q6, Q15 ;> 4 ? Diverged at 3rd ? 249 VMOV R1,S13 250 TST R1,R3 251 ADDNE R0,R0,#1 252 VCGT.F32 Q3, Q11,Q15 ;> 4 ? Diverged at 2nd ? 253 VMOV R1,S13 254 TST R1,R3 255 ADDNE R0,R0,#1 256 VCGT.F32 Q3, Q10,Q15 ;> 4 ? Diverged at 1st ? 257 VMOV R1,S13 258 TST R1,R3 259 ADDNE R0,R0,#1 260 VMOV Q3,Q13 ;restore 4th iteration compare results 261 262 SUB R2,R12,R0 ;correct counter 263 ADD R4,R4,R2 ;add to global iteration counter 264 MOV R2,R2,LSL#11 ;some colour 265 STR R2,[R5,R7,LSL#2] ;write pixel on screen 266 ADD R10,R10,#1 ;increase x 267 CMP R10,#(Fractal_size%) ;end of line reached ? 268 269 ORRGE R11,R11,#%0010 ;set end of line flags for pixel 2 270 BGE diverged_34 ;but still check if also another diverged 271 272 VMOV S9,R12 ;transfer to Q2.2 -> S9 273 MOV R7,R10 ;save new x in new plot_x2 274 VLDR S1,delta_sp ;use S1 as not needed anymore 275 VMOV S5,R10 ;use S5 for R10, as not needed anymore 276 VCVT.F32.U32 S5,S5 277 VMUL.F32 S1,S1,S5 ;new x2 = x2*d 278 VLDR S5,ru_sp ;use S5 as not needed anymore 279 VADD.F32 S1,S1,S5 ;new x2 = ru + x2*d 280 VMOV.F32 S17,S1 ;new x2 also = new 'a' 281 VLDR S5,io_save_sp ;get saved y4=y3=y2=y1 282 283 ;just continue checking pixel 3 284 285 ;############ Pixel 3 Diverge Handling ############ 286 287 .diverged_34 288 MVN R3,#0 289 VMOV R0,S14 ;check if pixel 3 diverged (Q3.3 -> S14) 290 TST R0,R3 291 BEQ diverged_4 ;not ? -> check Pixel 4 292 293 VMOV Q13,Q3 ;save 4th iteration compare results 294 VMOV R0,S10 ;get iterations pixel 3 (Q2.3 -> S10) 295 VCGT.F32 Q3, Q6, Q15 ;> 4 ? Diverged at 3rd ? 296 VMOV R1,S14 297 TST R1,R3 298 ADDNE R0,R0,#1 299 VCGT.F32 Q3, Q11,Q15 ;> 4 ? Diverged at 2nd ? 300 VMOV R1,S14 301 TST R1,R3 302 ADDNE R0,R0,#1 303 VCGT.F32 Q3, Q10,Q15 ;> 4 ? Diverged at 1st ? 304 VMOV R1,S14 305 TST R1,R3 306 ADDNE R0,R0,#1 307 VMOV Q3,Q13 ;restore 4th iteration compare results 308 309 SUB R2,R12,R0 ;correct counter 310 ADD R4,R4, R2 ;add to global iteration counter 311 MOV R2,R2,LSL#11 ;some colour 312 STR R2,[R5,R8,LSL#2] ;write pixel on screen 313 ADD R10,R10,#1 ;increase x 314 CMP R10,#(Fractal_size%) ;end of line reached ? 315 316 ORRGE R11,R11,#%0100 ;set end of line flags for pixel 3 317 BGE diverged_4 ;but still check if also another diverged 318 319 VMOV S10,R12 ;transfer to Q2.3 -> S10 320 MOV R8, R10 ;save new x in new plot_x3 321 VLDR S2,delta_sp ;use S2 as not needed anymore 322 VMOV S6,R10 ;use S6 for R10, as not needed anymore 323 VCVT.F32.U32 S6,S6 324 VMUL.F32 S2,S2,S6 ;new x3 = x3*d 325 VLDR S6,ru_sp ;use S6 as not needed anymore 326 VADD.F32 S2,S2,S6 ;new x3 = ru + x3*d 327 VMOV.F32 S18,S2 ;new x3 also = new 'a' 328 VLDR S6,io_save_sp ;get saved y4=y3=y2=y1 329 330 ;just continue checking pixel 4 331 332 B diverged_4 333 334 ;############ Data Mandelbrot Fractal placed here for offset limit reason 335 ;placed here because of alignment 336 .ro_sp DCFS -0.1450 337 .ru_sp DCFS -0.1750 338 .io_sp DCFS -1.02 339 .delta_sp DCFS 0.0 ;dummy_value 340 .io_save_sp DCFS 0.0 ;dummy_value 341 .iter_out_sp DCFS 4.0 342 343 ;############ Pixel 4 Diverge Handling ############ 344 345 .diverged_4 346 MVN R3,#0 347 VMOV R0,S15 ;check if pixel 4 diverged (Q3.4 -> S15) 348 TST R0,R3 349 BEQ end_diverge_check ;not ? -> continue 350 351 VMOV Q13,Q3 ;save 4th iteration compare results 352 VMOV R0,S11 ;get iterations pixel 1 (Q2.4 -> S11) 353 VCGT.F32 Q3, Q6, Q15 ;> 4 ? Diverged at 3rd ? 354 VMOV R1,S15 355 TST R1,R3 356 ADDNE R0,R0,#1 357 VCGT.F32 Q3, Q11,Q15 ;> 4 ? Diverged at 2nd ? 358 VMOV R1,S15 359 TST R1,R3 360 ADDNE R0,R0,#1 361 VCGT.F32 Q3, Q10,Q15 ;> 4 ? Diverged at 1st ? 362 VMOV R1,S15 363 TST R1,R3 364 ADDNE R0,R0,#1 365 VMOV Q3,Q13 ;restore 4th iteration compare results 366 367 SUB R2,R12,R0 ;correct counter 368 ADD R4,R4, R2 ;add to global iteration counter 369 MOV R2,R2,LSL#11 ;some colour 370 STR R2,[R5,R9,LSL#2] ;write pixel on screen 371 ADD R10,R10,#1 ;increase x 372 CMP R10,#(Fractal_size%) ;end of line reached ? 373 374 ORRGE R11,R11,#%1000 ;set end of line flags for pixel 4 375 BGE end_diverge_check ;but still check also end of iterations 376 377 VMOV S11,R12 ;transfer to Q2.4 -> S11 378 MOV R9, R10 ;save new x in new plot_x4 379 VLDR S3,delta_sp ;use S3 as not needed anymore 380 VMOV S7,R10 ;use S7 for R10, as not needed anymore 381 VCVT.F32.U32 S7,S7 382 VMUL.F32 S3,S3,S7 ;new x4 = x4*d 383 VLDR S7,ru_sp ;use S7 as not needed anymore 384 VADD.F32 S3,S3,S7 ;new x4 = ru + x4*d 385 VMOV.F32 S19,S3 ;new x4 also = new 'a' 386 VLDR S7,io_save_sp ;get saved y4=y3=y2=y1 387 388 .end_diverge_check 389 390 VORR D27,D4, D5 ;check if Q2<0 without changing Q2 391 VMOV R2, R3, D27 392 ORRS R2, R2, R3 393 BMI max_iterations_1234 ;1 or more pixel reached also max iter 394 395 CMP R11,#0 396 BEQ iteration_loop ;if not, was end of line reached ? 397 398 ;############ Pixel 1 Max Iterations Handling ############ 399 400 .max_iterations_1234 401 VMOV R0,S8 ;check if pixel 1 reach max iter (Q2.1 -> S8) 402 TST R0,#&80000000 ;check sign bit 403 BEQ max_iterations_234 ;not ? -> check Pixel 2,3,4 404 405 ADD R4,R4,R12 406 ADD R4,R4,#1 ;add to global iteration counter 407 ADD R10,R10,#1 ;increase x 408 CMP R10,#(Fractal_size%) ;end of line reached ? 409 410 ORRGE R11,R11,#%0001 ;set end of line flags for pixel 1 411 BGE max_iterations_234 ;but still check also others 412 413 VMOV S8,R12 ;transfer to Q2.1 -> S8 414 MOV R6,R10 ;save new x in new plot_x1 415 VLDR S0,delta_sp ;use S0 as not needed anymore 416 VMOV S4,R10 ;use S4 for R10, as not needed anymore 417 VCVT.F32.U32 S4,S4 418 VMUL.F32 S0,S0,S4 ;new x1 = x1*d 419 VLDR S4,ru_sp ;use S4 as not needed anymore 420 VADD.F32 S0,S0,S4 ;new x1 = ru + x1*d 421 VMOV.F32 S16,S0 ;new x1 also = new 'a' 422 VLDR S4,io_save_sp ;get saved y4=y3=y2=y1 423 424 ;############ Pixel 2 Max Iterations Handling ############ 425 426 .max_iterations_234 427 VMOV R0,S9 ;check if pixel 2 reach max iter (Q2.2 -> S9) 428 TST R0,#&80000000 ;check sign bit 429 BEQ max_iterations_34 ;not ? -> check Pixel 3,4 430 431 ADD R4,R4,R12 432 ADD R4,R4,#1 ;add to global iteration counter 433 ADD R10,R10,#1 ;increase x 434 CMP R10,#(Fractal_size%) ;end of line reached ? 435 436 ORRGE R11,R11,#%0010 ;set end of line flags for pixel 2 437 BGE max_iterations_34 ;but still check also others 438 439 VMOV S9,R12 ;transfer to Q2.2 -> S9 440 MOV R7,R10 ;save new x in new plot_x2 441 VLDR S1,delta_sp ;use S1 as not needed anymore 442 VMOV S5,R10 ;use S5 for R10, as not needed anymore 443 VCVT.F32.U32 S5,S5 444 VMUL.F32 S1,S1,S5 ;new x2 = x2*d 445 VLDR S5,ru_sp ;use S5 as not needed anymore 446 VADD.F32 S1,S1,S5 ;new x2 = ru + x2*d 447 VMOV.F32 S17,S1 ;new x2 also = new 'a' 448 VLDR S5,io_save_sp ;get saved y4=y3=y2=y1 449 450 ;############ Pixel 3 Max Iterations Handling ############ 451 452 .max_iterations_34 453 VMOV R0,S10 ;check if pixel 3 reach max iter (Q2.3 -> S10) 454 TST R0,#&80000000 ;check sign bit 455 BEQ max_iterations_4 ;not ? -> check Pixel 4 456 457 ADD R4,R4,R12 458 ADD R4,R4,#1 ;add to global iteration counter 459 ADD R10,R10,#1 ;increase x 460 CMP R10,#(Fractal_size%) ;end of line reached ? 461 462 ORRGE R11,R11,#%0100 ;set end of line flags for pixel 3 463 BGE max_iterations_4 ;but still check also others 464 465 VMOV S10,R12 ;transfer to Q2.3 -> S10 466 MOV R8,R10 ;save new x in new plot_x3 467 VLDR S2,delta_sp ;use S2 as not needed anymore 468 VMOV S6,R10 ;use S6 for R10, as not needed anymore 469 VCVT.F32.U32 S6,S6 470 VMUL.F32 S2,S2,S6 ;new x3 = x3*d 471 VLDR S6,ru_sp ;use S6 as not needed anymore 472 VADD.F32 S2,S2,S6 ;new x3 = ru + x3*d 473 VMOV.F32 S18,S2 ;new x3 also = new 'a' 474 VLDR S6,io_save_sp ;get saved y4=y3=y2=y1 475 476 ;############ Pixel 4 Max Iterations Handling ############ 477 478 .max_iterations_4 479 VMOV R0,S11 ;check if pixel 3 reach max iter (Q2.4 -> S11) 480 TST R0,#&80000000 ;check sign bit 481 BEQ end_max_iter_check ;not ? -> end max iter check 482 483 ADD R4,R4,R12 484 ADD R4,R4,#1 ;add to global iteration counter 485 ADD R10,R10,#1 ;increase x 486 CMP R10,#(Fractal_size%) ;end of line reached ? 487 488 ORRGE R11,R11,#%1000 ;set end of line flags for pixel 4 489 BGE end_of_line ;now all are checked for everything 490 491 VMOV S11,R12 ;transfer to Q2.4 -> S11 492 MOV R9,R10 ;save new x in new plot_x4 493 VLDR S3,delta_sp ;use S3 as not needed anymore 494 VMOV S7,R10 ;use S7 for R10, as not needed anymore 495 VCVT.F32.U32 S7,S7 496 VMUL.F32 S3,S3,S7 ;new x4 = x4*d 497 VLDR S7,ru_sp ;use S7 as not needed anymore 498 VADD.F32 S3,S3,S7 ;new x4 = ru + x4*d 499 VMOV.F32 S19,S3 ;new x4 also = new 'a' 500 VLDR S7,io_save_sp ;get saved y4=y3=y2=y1 501 502 .end_max_iter_check 503 504 CMP R11,#0 505 BEQ iteration_loop 506 507 .end_of_line 508 509 VMOV.I32 Q14,#1 ;change sub value = 1 now for each pixel 510 VMOV Q10,Q0 ;save state of Q0 for other pixels 511 VMOV Q11,Q1 ;save state of Q1 for other pixels 512 VMOV Q13,Q2 ;save state of pixel counter for other pixels 513 514 ;############ End of Line Pixel 1 Handling ############ 515 TST R11,#%0001 516 BNE check_end_of_line_pixel_2 517 518 .iterate_pixel_1 519 VMUL.F32 Q3, Q0, Q0 ;x*x 520 VMUL.F32 Q8, Q1, Q1 ;y*y 521 VMUL.F32 Q12,Q0, Q1 ;x*y 522 VADD.F32 Q7, Q3, Q8 ;x*x + y*y 523 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 524 VADD.F32 Q12,Q12,Q12 ;2*x*y 525 VCGT.F32 Q3, Q7, Q15 ;> 4 ? 526 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 527 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 528 VSUB.U32 Q2, Q2, Q14 ;decrement pixel counters 529 530 VMOV R0,S12 ;check if pixel 1 diverged 531 VMOV R1,S8 ;check if pixel 1 reached max iter 532 MOVS R0,R0 533 BMI end_pixel_1_diverged 534 MOVS R1,R1 535 BPL iterate_pixel_1 536 537 ;maximum iterations reached 538 ADD R4,R4,R12 ;add to global iteration counter 539 ADD R4,R4,#1 540 B check_end_of_line_pixel_2 541 542 .end_pixel_1_diverged 543 VMOV R0,S8 ;get iterations pixel 1 (Q2.1 -> S8) 544 SUB R2,R12,R0 ;correct counter 545 ADD R4,R4, R2 ;add to global iteration counter 546 MOV R2,R2,LSL#11 ;some colour 547 STR R2,[R5,R6,LSL#2] ;write pixel on screen 548 549 ;############ End of Line Pixel 2 Handling ############ 550 .check_end_of_line_pixel_2 551 552 VMOV Q0,Q10 ;restore state of Q0 for other pixels 553 VMOV Q1,Q11 ;restore state of Q1 for other pixels 554 VMOV Q2,Q13 ;restore state of pixel counter for other pixels 555 556 TST R11,#%0010 557 BNE check_end_of_line_pixel_3 558 559 .iterate_pixel_2 560 VMUL.F32 Q3, Q0, Q0 ;x*x 561 VMUL.F32 Q8, Q1, Q1 ;y*y 562 VMUL.F32 Q12,Q0, Q1 ;x*y 563 VADD.F32 Q7, Q3, Q8 ;x*x + y*y 564 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 565 VADD.F32 Q12,Q12,Q12 ;2*x*y 566 VCGT.F32 Q3, Q7, Q15 ;> 4 ? 567 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 568 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 569 VSUB.U32 Q2, Q2, Q14 ;decrement pixel counters 570 571 VMOV R0,S13 ;check if pixel 2 diverged 572 VMOV R1,S9 ;check if pixel 2 reached max iter 573 MOVS R0,R0 574 BMI end_pixel_2_diverged 575 MOVS R1,R1 576 BPL iterate_pixel_2 577 578 ;maximum iterations reached 579 ADD R4,R4,R12 ;add to global iteration counter 580 ADD R4,R4,#1 581 B check_end_of_line_pixel_3 582 583 .end_pixel_2_diverged 584 VMOV R0,S9 ;get iterations pixel 2 (Q2.2 -> S9) 585 SUB R2,R12,R0 ;correct counter 586 ADD R4,R4, R2 ;add to global iteration counter 587 MOV R2,R2,LSL#11 ;some colour 588 STR R2,[R5,R7,LSL#2] ;write pixel on screen 589 590 ;############ End of Line Pixel 3 Handling ############ 591 .check_end_of_line_pixel_3 592 593 VMOV Q0,Q10 ;restore state of Q0 for other pixels 594 VMOV Q1,Q11 ;restore state of Q1 for other pixels 595 VMOV Q2,Q13 ;restore state of pixel counter for other pixels 596 597 TST R11,#%0100 598 BNE check_end_of_line_pixel_4 599 600 .iterate_pixel_3 601 VMUL.F32 Q3, Q0, Q0 ;x*x 602 VMUL.F32 Q8, Q1, Q1 ;y*y 603 VMUL.F32 Q12,Q0, Q1 ;x*y 604 VADD.F32 Q7, Q3, Q8 ;x*x + y*y 605 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 606 VADD.F32 Q12,Q12,Q12 ;2*x*y 607 VCGT.F32 Q3, Q7, Q15 ;> 4 ? 608 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 609 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 610 VSUB.U32 Q2, Q2, Q14 ;decrement pixel counters 611 612 VMOV R0,S14 ;check if pixel 3 diverged 613 VMOV R1,S10 ;check if pixel 3 reached max iter 614 MOVS R0,R0 615 BMI end_pixel_3_diverged 616 MOVS R1,R1 617 BPL iterate_pixel_3 618 619 ;maximum iterations reached 620 ADD R4,R4,R12 ;add to global iteration counter 621 ADD R4,R4,#1 622 B check_end_of_line_pixel_4 623 624 .end_pixel_3_diverged 625 VMOV R0,S10 ;get iterations pixel 3 (Q2.3 -> S10) 626 SUB R2,R12,R0 ;correct counter 627 ADD R4,R4, R2 ;add to global iteration counter 628 MOV R2,R2,LSL#11 ;some colour 629 STR R2,[R5,R8,LSL#2] ;write pixel on screen 630 631 ;############ End of Line Pixel 4 Handling ############ 632 .check_end_of_line_pixel_4 633 634 VMOV Q0,Q10 ;restore state of Q0 for other pixels 635 VMOV Q1,Q11 ;restore state of Q1 for other pixels 636 VMOV Q2,Q13 ;restore state of pixel counter for other pixels 637 638 TST R11,#%1000 639 BNE next_line 640 641 .iterate_pixel_4 642 VMUL.F32 Q3, Q0, Q0 ;x*x 643 VMUL.F32 Q8, Q1, Q1 ;y*y 644 VMUL.F32 Q12,Q0, Q1 ;x*y 645 VADD.F32 Q7, Q3, Q8 ;x*x + y*y 646 VSUB.F32 Q9, Q3, Q8 ;x_new = x*x - y*y 647 VADD.F32 Q12,Q12,Q12 ;2*x*y 648 VCGT.F32 Q3, Q7, Q15 ;> 4 ? 649 VADD.F32 Q0, Q9 ,Q4 ;x_new = x_new + a 650 VADD.F32 Q1, Q12,Q5 ;y_new = y_new + b 651 VSUB.U32 Q2, Q2, Q14 ;decrement pixel counters 652 653 VMOV R0,S15 ;check if pixel 4 diverged 654 VMOV R1,S11 ;check if pixel 4 reached max iter 655 MOVS R0,R0 656 BMI end_pixel_4_diverged 657 MOVS R1,R1 658 BPL iterate_pixel_4 659 660 ;maximum iterations reached 661 ADD R4,R4,R12 ;add to global iteration counter 662 ADD R4,R4,#1 663 B next_line 664 665 .end_pixel_4_diverged 666 VMOV R0,S11 ;get iterations pixel 4 (Q2.4 -> S11) 667 SUB R2,R12,R0 ;correct counter 668 ADD R4,R4, R2 ;add to global iteration counter 669 MOV R2,R2,LSL#11 ;some colour 670 STR R2,[R5,R9,LSL#2] ;write pixel on screen 671 672 .next_line 673 LDR R0,y_counter 674 ADD R0,R0,#1 675 STR R0,y_counter 676 CMP R0,#(Fractal_size%) 677 678 BNE y_loop_sp 679 680 STR R4,iteration_counter ;save iteration counter in case R4 used 681 682 SWI "OS_ReadMonotonicTime" 683 LDR R1,timer 684 SUB R0,R0,R1 685 STR R0,timer 686 687LDMFD R13!,{R0-R12,PC} 688 689;----------------------------"Variables"------------------------- 690.iter_max 691DCD 4096 692.y_counter 693DCD 0 694.x_counter 695DCD 0 696;--- 697.Hpixels 698EQUD Screen_x_max% 699.Screen_Start 700EQUD Screen_Start_Address 701;---------------------------------------------------------------- 702] 703NEXT 704ENDPROC