.section __TEXT,__text,regular,pure_instructions .macosx_version_min 10, 13 .section __TEXT,__literal16,16byte_literals .p2align 4 LCPI0_0: .long 0 ## 0x0 .long 1 ## 0x1 .long 2 ## 0x2 .long 3 ## 0x3 LCPI0_1: .long 4 ## 0x4 .long 5 ## 0x5 .long 6 ## 0x6 .long 7 ## 0x7 .section __TEXT,__const .p2align 5 LCPI0_2: .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf LCPI0_3: .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .section __TEXT,__text,regular,pure_instructions .p2align 4, 0x90 _ComputeZBounds___uniuniuniuniun_3C_unf_3E_uniunfunfunfunfREFunfREFunf: ## @ComputeZBounds___uniuniuniuniun_3C_unf_3E_uniunfunfunfunfREFunfREFunf ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx movq 64(%rsp), %r10 movq 56(%rsp), %r11 vmovmskps %ymm4, %eax vpermilps $0, %xmm3, %xmm3 ## xmm3 = xmm3[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 cmpl $255, %eax jne LBB0_1 ## BB#12: ## %for_test.preheader cmpl %ecx, %edx jge LBB0_13 ## BB#16: ## %for_loop.lr.ph movl %esi, %eax subl %edi, %eax movl %eax, %ebx sarl $31, %ebx shrl $29, %ebx addl %eax, %ebx andl $-8, %ebx subl %ebx, %eax movl %esi, %r13d subl %eax, %r13d vmovd %esi, %xmm4 vpshufd $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm10 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vpermilps $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 cmpl %edi, %r13d jle LBB0_18 ## BB#17: ## %foreach_full_body.lr.ph.us.preheader movl %r9d, %r15d imull %edx, %r15d addl %edi, %r15d leal (,%r15,4), %r12d leal (,%r9,4), %r14d vxorps %ymm7, %ymm7, %ymm7 vmovdqa LCPI0_0(%rip), %xmm8 ## xmm8 = [0,1,2,3] vmovdqa LCPI0_1(%rip), %xmm9 ## xmm9 = [4,5,6,7] vmovaps %ymm2, %ymm5 vmovaps %ymm3, %ymm6 .p2align 4, 0x90 LBB0_22: ## %foreach_full_body.lr.ph.us ## =>This Loop Header: Depth=1 ## Child Loop BB0_23 Depth 2 movl %r12d, %eax movl %r15d, %ebx movl %edi, %ebp .p2align 4, 0x90 LBB0_23: ## %foreach_full_body.us ## Parent Loop BB0_22 Depth=1 ## => This Inner Loop Header: Depth=2 cltq vmovups (%r8,%rax), %xmm4 vinsertf128 $1, 16(%r8,%rax), %ymm4, %ymm4 vsubps %ymm0, %ymm4, %ymm4 vdivps %ymm4, %ymm1, %ymm4 vcmpnleps %ymm4, %ymm3, %ymm11 vcmpnltps %ymm2, %ymm4, %ymm12 vminps %ymm4, %ymm6, %ymm13 vblendvps %ymm12, %ymm11, %ymm7, %ymm11 vblendvps %ymm11, %ymm13, %ymm6, %ymm6 vmaxps %ymm4, %ymm5, %ymm4 vblendvps %ymm11, %ymm4, %ymm5, %ymm5 addl $8, %ebp addl $8, %ebx addl $32, %eax cmpl %r13d, %ebp jl LBB0_23 ## BB#19: ## %partial_inner_all_outer.us ## in Loop: Header=BB0_22 Depth=1 cmpl %esi, %ebp jge LBB0_21 ## BB#20: ## %partial_inner_only.us ## in Loop: Header=BB0_22 Depth=1 vmovd %ebp, %xmm4 vpshufd $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vpaddd %xmm8, %xmm4, %xmm11 vpaddd %xmm9, %xmm4, %xmm12 vextractf128 $1, %ymm10, %xmm4 vpcmpgtd %xmm12, %xmm4, %xmm4 vpcmpgtd %xmm11, %xmm10, %xmm11 vinsertf128 $1, %xmm4, %ymm11, %ymm4 shll $2, %ebx movslq %ebx, %rax vmaskmovps (%r8,%rax), %ymm4, %ymm11 vsubps %ymm0, %ymm11, %ymm11 vdivps %ymm11, %ymm1, %ymm11 vcmpnleps %ymm11, %ymm3, %ymm12 vcmpnltps %ymm2, %ymm11, %ymm13 vandps %ymm13, %ymm12, %ymm12 vminps %ymm11, %ymm6, %ymm13 vblendvps %ymm12, %ymm4, %ymm7, %ymm4 vblendvps %ymm4, %ymm13, %ymm6, %ymm6 vmaxps %ymm11, %ymm5, %ymm11 vblendvps %ymm4, %ymm11, %ymm5, %ymm5 LBB0_21: ## %foreach_reset.us ## in Loop: Header=BB0_22 Depth=1 incl %edx addl %r9d, %r15d addl %r14d, %r12d cmpl %ecx, %edx jne LBB0_22 jmp LBB0_14 LBB0_1: ## %for_test156.preheader cmpl %ecx, %edx jge LBB0_2 ## BB#4: ## %for_loop158.lr.ph movl %esi, %eax subl %edi, %eax movl %eax, %ebx sarl $31, %ebx shrl $29, %ebx addl %eax, %ebx andl $-8, %ebx subl %ebx, %eax movl %esi, %r13d subl %eax, %r13d vmovd %esi, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm11 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vpermilps $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 cmpl %edi, %r13d jle LBB0_6 ## BB#5: ## %foreach_full_body167.lr.ph.us.preheader movl %r9d, %r15d imull %edx, %r15d addl %edi, %r15d leal (,%r15,4), %r12d leal (,%r9,4), %r14d vxorps %ymm8, %ymm8, %ymm8 vmovdqa LCPI0_0(%rip), %xmm9 ## xmm9 = [0,1,2,3] vmovdqa LCPI0_1(%rip), %xmm10 ## xmm10 = [4,5,6,7] vmovaps %ymm2, %ymm6 vmovaps %ymm3, %ymm7 .p2align 4, 0x90 LBB0_10: ## %foreach_full_body167.lr.ph.us ## =>This Loop Header: Depth=1 ## Child Loop BB0_11 Depth 2 movl %r12d, %eax movl %r15d, %ebx movl %edi, %ebp .p2align 4, 0x90 LBB0_11: ## %foreach_full_body167.us ## Parent Loop BB0_10 Depth=1 ## => This Inner Loop Header: Depth=2 cltq vmovups (%r8,%rax), %xmm5 vinsertf128 $1, 16(%r8,%rax), %ymm5, %ymm5 vsubps %ymm0, %ymm5, %ymm5 vdivps %ymm5, %ymm1, %ymm5 vcmpnleps %ymm5, %ymm3, %ymm12 vcmpnltps %ymm2, %ymm5, %ymm13 vminps %ymm5, %ymm7, %ymm14 vblendvps %ymm13, %ymm12, %ymm8, %ymm12 vblendvps %ymm12, %ymm14, %ymm7, %ymm7 vmaxps %ymm5, %ymm6, %ymm5 vblendvps %ymm12, %ymm5, %ymm6, %ymm6 addl $8, %ebp addl $8, %ebx addl $32, %eax cmpl %r13d, %ebp jl LBB0_11 ## BB#7: ## %partial_inner_all_outer206.us ## in Loop: Header=BB0_10 Depth=1 cmpl %esi, %ebp jge LBB0_9 ## BB#8: ## %partial_inner_only268.us ## in Loop: Header=BB0_10 Depth=1 vmovd %ebp, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vpaddd %xmm9, %xmm5, %xmm12 vpaddd %xmm10, %xmm5, %xmm13 vextractf128 $1, %ymm11, %xmm5 vpcmpgtd %xmm13, %xmm5, %xmm5 vpcmpgtd %xmm12, %xmm11, %xmm12 vinsertf128 $1, %xmm5, %ymm12, %ymm5 shll $2, %ebx movslq %ebx, %rax vmaskmovps (%r8,%rax), %ymm5, %ymm12 vsubps %ymm0, %ymm12, %ymm12 vdivps %ymm12, %ymm1, %ymm12 vcmpnleps %ymm12, %ymm3, %ymm13 vcmpnltps %ymm2, %ymm12, %ymm14 vandps %ymm14, %ymm13, %ymm13 vminps %ymm12, %ymm7, %ymm14 vblendvps %ymm13, %ymm5, %ymm8, %ymm5 vblendvps %ymm5, %ymm14, %ymm7, %ymm7 vmaxps %ymm12, %ymm6, %ymm12 vblendvps %ymm5, %ymm12, %ymm6, %ymm6 LBB0_9: ## %foreach_reset175.us ## in Loop: Header=BB0_10 Depth=1 incl %edx addl %r9d, %r15d addl %r14d, %r12d cmpl %ecx, %edx jne LBB0_10 jmp LBB0_3 LBB0_13: vmovaps %ymm2, %ymm5 vmovaps %ymm3, %ymm6 jmp LBB0_14 LBB0_2: vmovaps %ymm2, %ymm6 vmovaps %ymm3, %ymm7 jmp LBB0_3 LBB0_18: ## %for_loop.lr.ph.for_loop.lr.ph.split_crit_edge movl %r9d, %eax imull %edx, %eax addl %edi, %eax shll $2, %eax shll $2, %r9d subl %edx, %ecx vmovd %edi, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vpaddd LCPI0_0(%rip), %xmm5, %xmm8 vpaddd LCPI0_1(%rip), %xmm5, %xmm5 vextractf128 $1, %ymm10, %xmm6 vpcmpgtd %xmm5, %xmm6, %xmm7 vxorps %ymm9, %ymm9, %ymm9 vmovaps %ymm2, %ymm5 vmovaps %ymm3, %ymm6 .p2align 4, 0x90 LBB0_25: ## %partial_inner_all_outer ## =>This Inner Loop Header: Depth=1 cmpl %edi, %esi jle LBB0_24 ## BB#26: ## %partial_inner_only ## in Loop: Header=BB0_25 Depth=1 vpcmpgtd %xmm8, %xmm10, %xmm4 vinsertf128 $1, %xmm7, %ymm4, %ymm4 movslq %eax, %rdx vmaskmovps (%r8,%rdx), %ymm4, %ymm11 vsubps %ymm0, %ymm11, %ymm11 vdivps %ymm11, %ymm1, %ymm11 vcmpnleps %ymm11, %ymm3, %ymm12 vcmpnltps %ymm2, %ymm11, %ymm13 vandps %ymm13, %ymm12, %ymm12 vminps %ymm11, %ymm6, %ymm13 vblendvps %ymm12, %ymm4, %ymm9, %ymm4 vblendvps %ymm4, %ymm13, %ymm6, %ymm6 vmaxps %ymm11, %ymm5, %ymm11 vblendvps %ymm4, %ymm11, %ymm5, %ymm5 LBB0_24: ## %foreach_reset ## in Loop: Header=BB0_25 Depth=1 addl %r9d, %eax decl %ecx jne LBB0_25 LBB0_14: ## %for_exit vextractf128 $1, %ymm6, %xmm0 vminps %ymm6, %ymm0, %ymm0 vpermilpd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0] vminps %ymm0, %ymm1, %ymm0 vmovshdup %xmm0, %xmm1 ## xmm1 = xmm0[1,1,3,3] vminss %xmm1, %xmm0, %xmm0 vmovss %xmm0, (%r11) vextractf128 $1, %ymm5, %xmm0 vmaxps %ymm5, %ymm0, %ymm0 jmp LBB0_15 LBB0_6: ## %for_loop158.lr.ph.for_loop158.lr.ph.split_crit_edge movl %r9d, %eax imull %edx, %eax addl %edi, %eax shll $2, %eax shll $2, %r9d subl %edx, %ecx vmovd %edi, %xmm6 vpshufd $0, %xmm6, %xmm6 ## xmm6 = xmm6[0,0,0,0] vpaddd LCPI0_0(%rip), %xmm6, %xmm8 vpaddd LCPI0_1(%rip), %xmm6, %xmm6 vextractf128 $1, %ymm11, %xmm7 vpcmpgtd %xmm6, %xmm7, %xmm9 vxorps %ymm10, %ymm10, %ymm10 vmovaps %ymm2, %ymm6 vmovaps %ymm3, %ymm7 .p2align 4, 0x90 LBB0_28: ## %partial_inner_all_outer206 ## =>This Inner Loop Header: Depth=1 cmpl %edi, %esi jle LBB0_27 ## BB#29: ## %partial_inner_only268 ## in Loop: Header=BB0_28 Depth=1 vpcmpgtd %xmm8, %xmm11, %xmm5 vinsertf128 $1, %xmm9, %ymm5, %ymm5 movslq %eax, %rdx vmaskmovps (%r8,%rdx), %ymm5, %ymm12 vsubps %ymm0, %ymm12, %ymm12 vdivps %ymm12, %ymm1, %ymm12 vcmpnleps %ymm12, %ymm3, %ymm13 vcmpnltps %ymm2, %ymm12, %ymm14 vandps %ymm14, %ymm13, %ymm13 vminps %ymm12, %ymm7, %ymm14 vblendvps %ymm13, %ymm5, %ymm10, %ymm5 vblendvps %ymm5, %ymm14, %ymm7, %ymm7 vmaxps %ymm12, %ymm6, %ymm12 vblendvps %ymm5, %ymm12, %ymm6, %ymm6 LBB0_27: ## %foreach_reset175 ## in Loop: Header=BB0_28 Depth=1 addl %r9d, %eax decl %ecx jne LBB0_28 LBB0_3: ## %for_exit159 vextractf128 $1, %ymm4, %xmm0 vxorps %xmm1, %xmm1, %xmm1 vpcmpeqd %xmm1, %xmm0, %xmm0 vpcmpeqd %xmm2, %xmm2, %xmm2 vpxor %xmm2, %xmm0, %xmm0 vpcmpeqd %xmm1, %xmm4, %xmm1 vpxor %xmm2, %xmm1, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vmovaps LCPI0_2(%rip), %ymm1 ## ymm1 = [inf,inf,inf,inf,inf,inf,inf,inf] vblendvps %ymm0, %ymm7, %ymm1, %ymm1 vextractf128 $1, %ymm1, %xmm2 vminps %ymm1, %ymm2, %ymm1 vpermilpd $1, %xmm1, %xmm2 ## xmm2 = xmm1[1,0] vminps %ymm1, %ymm2, %ymm1 vmovshdup %xmm1, %xmm2 ## xmm2 = xmm1[1,1,3,3] vminss %xmm2, %xmm1, %xmm1 vmovss %xmm1, (%r11) vmovaps LCPI0_3(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm0, %ymm6, %ymm1, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmaxps %ymm0, %ymm1, %ymm0 LBB0_15: ## %for_exit vpermilpd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0] vmaxps %ymm0, %ymm1, %ymm0 vmovshdup %xmm0, %xmm1 ## xmm1 = xmm0[1,1,3,3] vmaxss %xmm0, %xmm1, %xmm0 vmovss %xmm0, (%r10) popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .section __TEXT,__literal4,4byte_literals .p2align 2 LCPI1_0: .long 1056964608 ## float 0.5 LCPI1_1: .long 1077936128 ## float 3 .section __TEXT,__literal16,16byte_literals .p2align 4 LCPI1_2: .long 0 ## 0x0 .long 1 ## 0x1 .long 2 ## 0x2 .long 3 ## 0x3 LCPI1_3: .long 4 ## 0x4 .long 5 ## 0x5 .long 6 ## 0x6 .long 7 ## 0x7 LCPI1_4: .byte 0 ## 0x0 .byte 1 ## 0x1 .byte 4 ## 0x4 .byte 5 ## 0x5 .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 12 ## 0xc .byte 13 ## 0xd .byte 14 ## 0xe .byte 15 ## 0xf .section __TEXT,__text,regular,pure_instructions .globl _IntersectLightsWithTileMinMax___uniuniuniuniunfunfuniuniunfunfuniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_ .p2align 4, 0x90 _IntersectLightsWithTileMinMax___uniuniuniuniunfunfuniuniunfunfuniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_: ## @IntersectLightsWithTileMinMax___uniuniuniuniunfunfuniuniunfunfuniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_ ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $296, %rsp ## imm = 0x128 vmovaps %xmm1, 16(%rsp) ## 16-byte Spill vmovaps %xmm0, (%rsp) ## 16-byte Spill movq 392(%rsp), %r10 movq 384(%rsp), %r15 movq 376(%rsp), %r13 movq 368(%rsp), %r11 movq 360(%rsp), %r14 movl 352(%rsp), %r12d vmovmskps %ymm4, %eax vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %r8d, %xmm0, %xmm1 vmovss LCPI1_0(%rip), %xmm9 ## xmm9 = mem[0],zero,zero,zero vmulss %xmm9, %xmm1, %xmm5 vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %r9d, %xmm0, %xmm1 vmulss %xmm9, %xmm1, %xmm6 vmulss %xmm2, %xmm5, %xmm4 vxorps %xmm1, %xmm1, %xmm1 vsubss %xmm4, %xmm1, %xmm7 vmulss %xmm3, %xmm6, %xmm2 vsubss %xmm2, %xmm1, %xmm8 vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %esi, %xmm0, %xmm3 vsubss %xmm5, %xmm3, %xmm1 negl %edi vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %edi, %xmm0, %xmm3 vaddss %xmm5, %xmm3, %xmm0 vcvtsi2ssl %ecx, %xmm0, %xmm3 vsubss %xmm6, %xmm3, %xmm5 negl %edx vcvtsi2ssl %edx, %xmm0, %xmm3 vaddss %xmm6, %xmm3, %xmm10 vmulss %xmm7, %xmm7, %xmm6 vmulss %xmm1, %xmm1, %xmm3 vaddss %xmm3, %xmm6, %xmm3 vrsqrtss %xmm3, %xmm0, %xmm6 vmulss %xmm3, %xmm6, %xmm3 vmulss %xmm3, %xmm6, %xmm3 vmovss LCPI1_1(%rip), %xmm11 ## xmm11 = mem[0],zero,zero,zero vsubss %xmm3, %xmm11, %xmm3 vmulss %xmm3, %xmm6, %xmm3 vmulss %xmm9, %xmm3, %xmm3 vmulss %xmm3, %xmm7, %xmm6 vmovaps %xmm6, -112(%rsp) ## 16-byte Spill vmulss %xmm3, %xmm1, %xmm1 vmovaps %xmm1, -128(%rsp) ## 16-byte Spill vmulss %xmm4, %xmm4, %xmm1 vmulss %xmm0, %xmm0, %xmm3 vaddss %xmm3, %xmm1, %xmm1 vrsqrtss %xmm1, %xmm0, %xmm3 vmulss %xmm1, %xmm3, %xmm1 vmulss %xmm1, %xmm3, %xmm1 vsubss %xmm1, %xmm11, %xmm1 vmulss %xmm1, %xmm3, %xmm1 vmulss %xmm9, %xmm1, %xmm1 vmulss %xmm1, %xmm4, %xmm3 vmovaps %xmm3, -96(%rsp) ## 16-byte Spill vmulss %xmm1, %xmm0, %xmm0 vmovaps %xmm0, -80(%rsp) ## 16-byte Spill vmulss %xmm2, %xmm2, %xmm0 vmulss %xmm5, %xmm5, %xmm1 vaddss %xmm1, %xmm0, %xmm0 vrsqrtss %xmm0, %xmm0, %xmm1 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vsubss %xmm0, %xmm11, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm9, %xmm0, %xmm0 vmulss %xmm0, %xmm2, %xmm1 vmovaps %xmm1, -48(%rsp) ## 16-byte Spill vmulss %xmm0, %xmm5, %xmm0 vmovaps %xmm0, -64(%rsp) ## 16-byte Spill vmulss %xmm8, %xmm8, %xmm0 vmulss %xmm10, %xmm10, %xmm1 vaddss %xmm1, %xmm0, %xmm0 vrsqrtss %xmm0, %xmm0, %xmm1 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vsubss %xmm0, %xmm11, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm9, %xmm0, %xmm0 vmulss %xmm0, %xmm8, %xmm1 vmovaps %xmm1, -16(%rsp) ## 16-byte Spill vmulss %xmm0, %xmm10, %xmm0 vmovaps %xmm0, -32(%rsp) ## 16-byte Spill movl %r12d, %ecx sarl $31, %ecx shrl $29, %ecx addl %r12d, %ecx andl $-8, %ecx cmpl $255, %eax jne LBB1_67 ## BB#1: ## %all_on xorl %eax, %eax movl $0, %edx testl %ecx, %ecx jle LBB1_8 ## BB#2: ## %foreach_full_body.lr.ph vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm10 vpermilps $0, 16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm11 vpermilps $0, -128(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 256(%rsp) ## 32-byte Spill vpermilps $0, -112(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 224(%rsp) ## 32-byte Spill vpermilps $0, -80(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 192(%rsp) ## 32-byte Spill vpermilps $0, -96(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 160(%rsp) ## 32-byte Spill vpermilps $0, -64(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 128(%rsp) ## 32-byte Spill vpermilps $0, -48(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 96(%rsp) ## 32-byte Spill vpermilps $0, -32(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vpermilps $0, -16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 32(%rsp) ## 32-byte Spill xorl %esi, %esi vxorps %ymm9, %ymm9, %ymm9 vmovdqa LCPI1_4(%rip), %xmm3 ## xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] xorl %eax, %eax xorl %edx, %edx .p2align 4, 0x90 LBB1_3: ## %foreach_full_body ## =>This Inner Loop Header: Depth=1 vmovd %edx, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI1_2(%rip), %xmm0, %xmm7 movslq %esi, %rdi vmovups (%r13,%rdi), %xmm1 vinsertf128 $1, 16(%r13,%rdi), %ymm1, %ymm6 vmovups (%r15,%rdi), %xmm1 vinsertf128 $1, 16(%r15,%rdi), %ymm1, %ymm1 vsubps %ymm1, %ymm9, %ymm12 vsubps %ymm10, %ymm6, %ymm1 vcmpnltps %ymm12, %ymm1, %ymm1 vsubps %ymm6, %ymm11, %ymm13 vcmpnltps %ymm12, %ymm13, %ymm13 vandps %ymm1, %ymm13, %ymm1 vmovmskps %ymm1, %ebp testl %ebp, %ebp je LBB1_7 ## BB#4: ## %cif_mask_all ## in Loop: Header=BB1_3 Depth=1 vmovups (%r14,%rdi), %xmm13 vinsertf128 $1, 16(%r14,%rdi), %ymm13, %ymm13 vmovups (%r11,%rdi), %xmm14 vinsertf128 $1, 16(%r11,%rdi), %ymm14, %ymm14 vmulps 256(%rsp), %ymm6, %ymm15 ## 32-byte Folded Reload vmulps 224(%rsp), %ymm13, %ymm8 ## 32-byte Folded Reload vaddps %ymm8, %ymm15, %ymm8 vcmpnltps %ymm12, %ymm8, %ymm8 vextractf128 $1, %ymm8, %xmm4 vpshufb %xmm3, %xmm4, %xmm4 vpshufb %xmm3, %xmm8, %xmm5 vpunpcklqdq %xmm4, %xmm5, %xmm4 ## xmm4 = xmm5[0],xmm4[0] vmulps 192(%rsp), %ymm6, %ymm5 ## 32-byte Folded Reload vmulps 160(%rsp), %ymm13, %ymm8 ## 32-byte Folded Reload vaddps %ymm8, %ymm5, %ymm5 vcmpnltps %ymm12, %ymm5, %ymm5 vextractf128 $1, %ymm5, %xmm2 vpshufb %xmm3, %xmm2, %xmm2 vpshufb %xmm3, %xmm5, %xmm5 vpunpcklqdq %xmm2, %xmm5, %xmm2 ## xmm2 = xmm5[0],xmm2[0] vpand %xmm4, %xmm2, %xmm2 vmulps 128(%rsp), %ymm6, %ymm4 ## 32-byte Folded Reload vmulps 96(%rsp), %ymm14, %ymm5 ## 32-byte Folded Reload vaddps %ymm5, %ymm4, %ymm4 vcmpnltps %ymm12, %ymm4, %ymm4 vextractf128 $1, %ymm4, %xmm5 vpshufb %xmm3, %xmm5, %xmm5 vpshufb %xmm3, %xmm4, %xmm4 vpunpcklqdq %xmm5, %xmm4, %xmm4 ## xmm4 = xmm4[0],xmm5[0] vmulps 64(%rsp), %ymm6, %ymm5 ## 32-byte Folded Reload vmulps 32(%rsp), %ymm14, %ymm6 ## 32-byte Folded Reload vaddps %ymm6, %ymm5, %ymm5 vcmpnltps %ymm12, %ymm5, %ymm5 vextractf128 $1, %ymm5, %xmm6 vpshufb %xmm3, %xmm6, %xmm6 vpshufb %xmm3, %xmm5, %xmm5 vpunpcklqdq %xmm6, %xmm5, %xmm5 ## xmm5 = xmm5[0],xmm6[0] vpand %xmm5, %xmm4, %xmm4 vpand %xmm4, %xmm2, %xmm2 vpmovzxwd %xmm2, %xmm4 ## xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero vpslld $31, %xmm4, %xmm4 vpunpckhwd %xmm0, %xmm2, %xmm2 ## xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] vpslld $31, %xmm2, %xmm2 vinsertf128 $1, %xmm2, %ymm4, %ymm2 vblendvps %ymm2, %ymm1, %ymm9, %ymm1 vmovmskps %ymm1, %ebp testl %ebp, %ebp je LBB1_7 ## BB#5: ## %cif_mask_all ## in Loop: Header=BB1_3 Depth=1 vpaddd LCPI1_3(%rip), %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm7, %ymm7 cmpl $255, %ebp jne LBB1_14 ## BB#6: ## %packed_store_active___un_3C_uni_3E_vyi.exit1881 ## in Loop: Header=BB1_3 Depth=1 cltq vextractf128 $1, %ymm7, 16(%r10,%rax,4) vmovups %xmm7, (%r10,%rax,4) addl $8, %eax jmp LBB1_7 .p2align 4, 0x90 LBB1_14: ## %cif_test_mixed ## in Loop: Header=BB1_3 Depth=1 movslq %eax, %rdi leaq (%r10,%rdi,4), %rbx xorl %edi, %edi testb $1, %bpl je LBB1_16 ## BB#15: ## %store.i.i1942 ## in Loop: Header=BB1_3 Depth=1 movl %edx, (%rbx) movl $1, %edi LBB1_16: ## %loopend.i.i1947 ## in Loop: Header=BB1_3 Depth=1 testb $2, %bpl je LBB1_18 ## BB#17: ## %store.i.i1942.1 ## in Loop: Header=BB1_3 Depth=1 vpextrd $1, %xmm7, (%rbx,%rdi,4) incl %edi LBB1_18: ## %loopend.i.i1947.1 ## in Loop: Header=BB1_3 Depth=1 testb $4, %bpl je LBB1_20 ## BB#19: ## %store.i.i1942.2 ## in Loop: Header=BB1_3 Depth=1 movslq %edi, %rdi vpextrd $2, %xmm7, (%rbx,%rdi,4) incl %edi LBB1_20: ## %loopend.i.i1947.2 ## in Loop: Header=BB1_3 Depth=1 testb $8, %bpl je LBB1_22 ## BB#21: ## %store.i.i1942.3 ## in Loop: Header=BB1_3 Depth=1 movslq %edi, %rdi vpextrd $3, %xmm7, (%rbx,%rdi,4) incl %edi LBB1_22: ## %loopend.i.i1947.3 ## in Loop: Header=BB1_3 Depth=1 testb $16, %bpl je LBB1_24 ## BB#23: ## %store.i.i1942.4 ## in Loop: Header=BB1_3 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vmovd %xmm0, (%rbx,%rdi,4) incl %edi LBB1_24: ## %loopend.i.i1947.4 ## in Loop: Header=BB1_3 Depth=1 testb $32, %bpl je LBB1_26 ## BB#25: ## %store.i.i1942.5 ## in Loop: Header=BB1_3 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vpextrd $1, %xmm0, (%rbx,%rdi,4) incl %edi LBB1_26: ## %loopend.i.i1947.5 ## in Loop: Header=BB1_3 Depth=1 testb $64, %bpl je LBB1_28 ## BB#27: ## %store.i.i1942.6 ## in Loop: Header=BB1_3 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vpextrd $2, %xmm0, (%rbx,%rdi,4) incl %edi LBB1_28: ## %loopend.i.i1947.6 ## in Loop: Header=BB1_3 Depth=1 testb %bpl, %bpl jns LBB1_30 ## BB#29: ## %store.i.i1942.7 ## in Loop: Header=BB1_3 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vpextrd $3, %xmm0, (%rbx,%rdi,4) incl %edi LBB1_30: ## %loopend.i.i1947.7 ## in Loop: Header=BB1_3 Depth=1 addl %eax, %edi movl %edi, %eax LBB1_7: ## %if_exit ## in Loop: Header=BB1_3 Depth=1 addl $8, %edx addl $32, %esi cmpl %ecx, %edx jl LBB1_3 jmp LBB1_8 LBB1_67: ## %some_on xorl %edx, %edx movl $0, %eax testl %ecx, %ecx jle LBB1_8 ## BB#68: ## %foreach_full_body516.lr.ph vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm10 vpermilps $0, 16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm11 vpermilps $0, -128(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 256(%rsp) ## 32-byte Spill vpermilps $0, -112(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 224(%rsp) ## 32-byte Spill vpermilps $0, -80(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 192(%rsp) ## 32-byte Spill vpermilps $0, -96(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 160(%rsp) ## 32-byte Spill vpermilps $0, -64(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 128(%rsp) ## 32-byte Spill vpermilps $0, -48(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 96(%rsp) ## 32-byte Spill vpermilps $0, -32(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vpermilps $0, -16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 32(%rsp) ## 32-byte Spill xorl %esi, %esi vxorps %ymm9, %ymm9, %ymm9 vmovdqa LCPI1_4(%rip), %xmm3 ## xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] xorl %edx, %edx xorl %eax, %eax .p2align 4, 0x90 LBB1_69: ## %foreach_full_body516 ## =>This Inner Loop Header: Depth=1 vmovd %edx, %xmm0 vpshufd $0, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,0,0] vpaddd LCPI1_2(%rip), %xmm1, %xmm7 movslq %esi, %rdi vmovups (%r13,%rdi), %xmm0 vinsertf128 $1, 16(%r13,%rdi), %ymm0, %ymm6 vmovups (%r15,%rdi), %xmm0 vinsertf128 $1, 16(%r15,%rdi), %ymm0, %ymm0 vsubps %ymm0, %ymm9, %ymm12 vsubps %ymm10, %ymm6, %ymm0 vcmpnltps %ymm12, %ymm0, %ymm0 vsubps %ymm6, %ymm11, %ymm13 vcmpnltps %ymm12, %ymm13, %ymm13 vandps %ymm0, %ymm13, %ymm0 vmovmskps %ymm0, %ebp testl %ebp, %ebp je LBB1_73 ## BB#70: ## %cif_mask_all656 ## in Loop: Header=BB1_69 Depth=1 vmovups (%r14,%rdi), %xmm13 vinsertf128 $1, 16(%r14,%rdi), %ymm13, %ymm13 vmovups (%r11,%rdi), %xmm14 vinsertf128 $1, 16(%r11,%rdi), %ymm14, %ymm14 vmulps 256(%rsp), %ymm6, %ymm15 ## 32-byte Folded Reload vmulps 224(%rsp), %ymm13, %ymm8 ## 32-byte Folded Reload vaddps %ymm8, %ymm15, %ymm8 vcmpnltps %ymm12, %ymm8, %ymm8 vextractf128 $1, %ymm8, %xmm4 vpshufb %xmm3, %xmm4, %xmm4 vpshufb %xmm3, %xmm8, %xmm5 vpunpcklqdq %xmm4, %xmm5, %xmm4 ## xmm4 = xmm5[0],xmm4[0] vmulps 192(%rsp), %ymm6, %ymm5 ## 32-byte Folded Reload vmulps 160(%rsp), %ymm13, %ymm8 ## 32-byte Folded Reload vaddps %ymm8, %ymm5, %ymm5 vcmpnltps %ymm12, %ymm5, %ymm5 vextractf128 $1, %ymm5, %xmm2 vpshufb %xmm3, %xmm2, %xmm2 vpshufb %xmm3, %xmm5, %xmm5 vpunpcklqdq %xmm2, %xmm5, %xmm2 ## xmm2 = xmm5[0],xmm2[0] vpand %xmm4, %xmm2, %xmm2 vmulps 128(%rsp), %ymm6, %ymm4 ## 32-byte Folded Reload vmulps 96(%rsp), %ymm14, %ymm5 ## 32-byte Folded Reload vaddps %ymm5, %ymm4, %ymm4 vcmpnltps %ymm12, %ymm4, %ymm4 vextractf128 $1, %ymm4, %xmm5 vpshufb %xmm3, %xmm5, %xmm5 vpshufb %xmm3, %xmm4, %xmm4 vpunpcklqdq %xmm5, %xmm4, %xmm4 ## xmm4 = xmm4[0],xmm5[0] vmulps 64(%rsp), %ymm6, %ymm5 ## 32-byte Folded Reload vmulps 32(%rsp), %ymm14, %ymm6 ## 32-byte Folded Reload vaddps %ymm6, %ymm5, %ymm5 vcmpnltps %ymm12, %ymm5, %ymm5 vextractf128 $1, %ymm5, %xmm6 vpshufb %xmm3, %xmm6, %xmm6 vpshufb %xmm3, %xmm5, %xmm5 vpunpcklqdq %xmm6, %xmm5, %xmm5 ## xmm5 = xmm5[0],xmm6[0] vpand %xmm5, %xmm4, %xmm4 vpand %xmm4, %xmm2, %xmm2 vpmovzxwd %xmm2, %xmm4 ## xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero vpslld $31, %xmm4, %xmm4 vpunpckhwd %xmm0, %xmm2, %xmm2 ## xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] vpslld $31, %xmm2, %xmm2 vinsertf128 $1, %xmm2, %ymm4, %ymm2 vblendvps %ymm2, %ymm0, %ymm9, %ymm0 vmovmskps %ymm0, %ebp testl %ebp, %ebp je LBB1_73 ## BB#71: ## %cif_mask_all656 ## in Loop: Header=BB1_69 Depth=1 vpaddd LCPI1_3(%rip), %xmm1, %xmm0 vinsertf128 $1, %xmm0, %ymm7, %ymm1 cmpl $255, %ebp jne LBB1_74 ## BB#72: ## %packed_store_active___un_3C_uni_3E_vyi.exit1743 ## in Loop: Header=BB1_69 Depth=1 cltq vextractf128 $1, %ymm1, 16(%r10,%rax,4) vmovups %xmm1, (%r10,%rax,4) addl $8, %eax jmp LBB1_73 .p2align 4, 0x90 LBB1_74: ## %cif_test_mixed687 ## in Loop: Header=BB1_69 Depth=1 movslq %eax, %rdi leaq (%r10,%rdi,4), %rbx xorl %edi, %edi testb $1, %bpl je LBB1_76 ## BB#75: ## %store.i.i1706 ## in Loop: Header=BB1_69 Depth=1 movl %edx, (%rbx) movl $1, %edi LBB1_76: ## %loopend.i.i1711 ## in Loop: Header=BB1_69 Depth=1 testb $2, %bpl je LBB1_78 ## BB#77: ## %store.i.i1706.1 ## in Loop: Header=BB1_69 Depth=1 vpextrd $1, %xmm1, (%rbx,%rdi,4) incl %edi LBB1_78: ## %loopend.i.i1711.1 ## in Loop: Header=BB1_69 Depth=1 testb $4, %bpl je LBB1_80 ## BB#79: ## %store.i.i1706.2 ## in Loop: Header=BB1_69 Depth=1 movslq %edi, %rdi vpextrd $2, %xmm1, (%rbx,%rdi,4) incl %edi LBB1_80: ## %loopend.i.i1711.2 ## in Loop: Header=BB1_69 Depth=1 testb $8, %bpl je LBB1_82 ## BB#81: ## %store.i.i1706.3 ## in Loop: Header=BB1_69 Depth=1 movslq %edi, %rdi vpextrd $3, %xmm1, (%rbx,%rdi,4) incl %edi LBB1_82: ## %loopend.i.i1711.3 ## in Loop: Header=BB1_69 Depth=1 testb $16, %bpl je LBB1_84 ## BB#83: ## %store.i.i1706.4 ## in Loop: Header=BB1_69 Depth=1 vextractf128 $1, %ymm1, %xmm0 movslq %edi, %rdi vmovd %xmm0, (%rbx,%rdi,4) incl %edi LBB1_84: ## %loopend.i.i1711.4 ## in Loop: Header=BB1_69 Depth=1 testb $32, %bpl je LBB1_86 ## BB#85: ## %store.i.i1706.5 ## in Loop: Header=BB1_69 Depth=1 vextractf128 $1, %ymm1, %xmm0 movslq %edi, %rdi vpextrd $1, %xmm0, (%rbx,%rdi,4) incl %edi LBB1_86: ## %loopend.i.i1711.5 ## in Loop: Header=BB1_69 Depth=1 testb $64, %bpl je LBB1_88 ## BB#87: ## %store.i.i1706.6 ## in Loop: Header=BB1_69 Depth=1 vextractf128 $1, %ymm1, %xmm0 movslq %edi, %rdi vpextrd $2, %xmm0, (%rbx,%rdi,4) incl %edi LBB1_88: ## %loopend.i.i1711.6 ## in Loop: Header=BB1_69 Depth=1 testb %bpl, %bpl jns LBB1_90 ## BB#89: ## %store.i.i1706.7 ## in Loop: Header=BB1_69 Depth=1 vextractf128 $1, %ymm1, %xmm0 movslq %edi, %rdi vpextrd $3, %xmm0, (%rbx,%rdi,4) incl %edi LBB1_90: ## %loopend.i.i1711.7 ## in Loop: Header=BB1_69 Depth=1 addl %eax, %edi movl %edi, %eax LBB1_73: ## %if_exit599 ## in Loop: Header=BB1_69 Depth=1 addl $8, %edx addl $32, %esi cmpl %ecx, %edx jl LBB1_69 LBB1_8: ## %partial_inner_all_outer cmpl %r12d, %edx jge LBB1_66 ## BB#9: ## %partial_inner_only vmovd %edx, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI1_2(%rip), %xmm0, %xmm11 vpaddd LCPI1_3(%rip), %xmm0, %xmm12 vmovd %r12d, %xmm0 vpshufd $0, %xmm0, %xmm13 ## xmm13 = xmm0[0,0,0,0] vpcmpgtd %xmm12, %xmm13, %xmm0 vpcmpgtd %xmm11, %xmm13, %xmm4 vinsertf128 $1, %xmm0, %ymm4, %ymm4 leal (,%rdx,4), %ecx movslq %ecx, %rcx vmaskmovps (%r13,%rcx), %ymm4, %ymm7 vmaskmovps (%r15,%rcx), %ymm4, %ymm0 vxorps %ymm8, %ymm8, %ymm8 vsubps %ymm0, %ymm8, %ymm6 vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vsubps %ymm0, %ymm7, %ymm0 vcmpnltps %ymm6, %ymm0, %ymm0 vpermilps $0, 16(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vsubps %ymm7, %ymm5, %ymm5 vcmpnltps %ymm6, %ymm5, %ymm5 vandps %ymm0, %ymm5, %ymm0 vblendvps %ymm4, %ymm0, %ymm8, %ymm5 vmovmskps %ymm5, %esi testl %esi, %esi je LBB1_66 ## BB#10: ## %if_then277 vmaskmovps (%r14,%rcx), %ymm4, %ymm9 vpermilps $0, -128(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm5, %ymm7, %ymm5 vpermilps $0, -112(%rsp), %xmm10 ## 16-byte Folded Reload ## xmm10 = mem[0,0,0,0] vinsertf128 $1, %xmm10, %ymm10, %ymm10 vmulps %ymm10, %ymm9, %ymm10 vaddps %ymm10, %ymm5, %ymm5 vcmpnltps %ymm6, %ymm5, %ymm10 vextractf128 $1, %ymm10, %xmm2 vmovdqa LCPI1_4(%rip), %xmm5 ## xmm5 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm5, %xmm2, %xmm2 vpshufb %xmm5, %xmm10, %xmm3 vpunpcklqdq %xmm2, %xmm3, %xmm2 ## xmm2 = xmm3[0],xmm2[0] vpermilps $0, -96(%rsp), %xmm3 ## 16-byte Folded Reload ## xmm3 = mem[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmulps %ymm3, %ymm9, %ymm3 vpermilps $0, -80(%rsp), %xmm9 ## 16-byte Folded Reload ## xmm9 = mem[0,0,0,0] vinsertf128 $1, %xmm9, %ymm9, %ymm9 vmulps %ymm9, %ymm7, %ymm9 vaddps %ymm3, %ymm9, %ymm3 vcmpnltps %ymm6, %ymm3, %ymm3 vextractf128 $1, %ymm3, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm3, %xmm3 vpunpcklqdq %xmm1, %xmm3, %xmm1 ## xmm1 = xmm3[0],xmm1[0] vpand %xmm2, %xmm1, %xmm10 vmaskmovps (%r11,%rcx), %ymm4, %ymm2 vpermilps $0, -64(%rsp), %xmm3 ## 16-byte Folded Reload ## xmm3 = mem[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmulps %ymm3, %ymm7, %ymm3 vpermilps $0, -48(%rsp), %xmm9 ## 16-byte Folded Reload ## xmm9 = mem[0,0,0,0] vinsertf128 $1, %xmm9, %ymm9, %ymm9 vmulps %ymm9, %ymm2, %ymm9 vaddps %ymm9, %ymm3, %ymm3 vcmpnltps %ymm6, %ymm3, %ymm3 vextractf128 $1, %ymm3, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm3, %xmm3 vpunpcklqdq %xmm1, %xmm3, %xmm1 ## xmm1 = xmm3[0],xmm1[0] vpermilps $0, -32(%rsp), %xmm3 ## 16-byte Folded Reload ## xmm3 = mem[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmulps %ymm3, %ymm7, %ymm3 vpermilps $0, -16(%rsp), %xmm7 ## 16-byte Folded Reload ## xmm7 = mem[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm2 vaddps %ymm2, %ymm3, %ymm2 vcmpnltps %ymm6, %ymm2, %ymm2 vextractf128 $1, %ymm2, %xmm3 vpshufb %xmm5, %xmm3, %xmm3 vpshufb %xmm5, %xmm2, %xmm2 vpunpcklqdq %xmm3, %xmm2, %xmm2 ## xmm2 = xmm2[0],xmm3[0] vpand %xmm2, %xmm1, %xmm1 vpand %xmm1, %xmm10, %xmm1 vpmovzxwd %xmm1, %xmm2 ## xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero vpslld $31, %xmm2, %xmm2 vpunpckhwd %xmm0, %xmm1, %xmm1 ## xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] vpslld $31, %xmm1, %xmm1 vinsertf128 $1, %xmm1, %ymm2, %ymm1 vblendvps %ymm1, %ymm0, %ymm8, %ymm5 vinsertf128 $1, %xmm12, %ymm11, %ymm0 vmovmskps %ymm4, %ecx cmpl $255, %ecx jne LBB1_31 ## BB#11: ## %cif_mask_all336 vmovmskps %ymm5, %esi testl %esi, %esi je LBB1_66 ## BB#12: ## %cif_mask_all336 movslq %eax, %rcx cmpl $255, %esi jne LBB1_48 ## BB#13: ## %packed_store_active___un_3C_uni_3E_vyi.exit1857 vextractf128 $1, %ymm0, 16(%r10,%rcx,4) vmovups %xmm0, (%r10,%rcx,4) addl $8, %eax jmp LBB1_66 LBB1_31: ## %cif_mask_mixed337 vinsertf128 $1, %xmm13, %ymm13, %ymm2 vextractf128 $1, %ymm0, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpcmpgtd %xmm1, %xmm3, %xmm3 vpcmpgtd %xmm0, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vxorps %ymm3, %ymm3, %ymm3 vblendvps %ymm2, %ymm5, %ymm3, %ymm2 vmovmskps %ymm2, %esi testl %esi, %esi je LBB1_66 ## BB#32: ## %loop.i.i1788.preheader movslq %eax, %rcx leaq (%r10,%rcx,4), %rdi xorl %ecx, %ecx testb $1, %sil je LBB1_34 ## BB#33: ## %store.i.i1792 movl %edx, (%rdi) movl $1, %ecx LBB1_34: ## %loopend.i.i1797 testb $2, %sil je LBB1_36 ## BB#35: ## %store.i.i1792.1 vpextrd $1, %xmm0, (%rdi,%rcx,4) incl %ecx LBB1_36: ## %loopend.i.i1797.1 testb $4, %sil je LBB1_38 ## BB#37: ## %store.i.i1792.2 movslq %ecx, %rcx vpextrd $2, %xmm0, (%rdi,%rcx,4) incl %ecx LBB1_38: ## %loopend.i.i1797.2 testb $8, %sil je LBB1_40 ## BB#39: ## %store.i.i1792.3 movslq %ecx, %rcx vpextrd $3, %xmm0, (%rdi,%rcx,4) incl %ecx LBB1_40: ## %loopend.i.i1797.3 testb $16, %sil je LBB1_42 ## BB#41: ## %store.i.i1792.4 movslq %ecx, %rcx vmovd %xmm1, (%rdi,%rcx,4) incl %ecx LBB1_42: ## %loopend.i.i1797.4 testb $32, %sil je LBB1_44 ## BB#43: ## %store.i.i1792.5 movslq %ecx, %rcx vpextrd $1, %xmm1, (%rdi,%rcx,4) incl %ecx LBB1_44: ## %loopend.i.i1797.5 testb $64, %sil je LBB1_46 ## BB#45: ## %store.i.i1792.6 movslq %ecx, %rcx vpextrd $2, %xmm1, (%rdi,%rcx,4) incl %ecx LBB1_46: ## %loopend.i.i1797.6 testb %sil, %sil jns LBB1_65 ## BB#47: ## %store.i.i1792.7 movslq %ecx, %rcx vpextrd $3, %xmm1, (%rdi,%rcx,4) jmp LBB1_64 LBB1_48: ## %cif_test_mixed368 leaq (%r10,%rcx,4), %rdi xorl %ecx, %ecx testb $1, %sil je LBB1_50 ## BB#49: ## %store.i.i1820 movl %edx, (%rdi) movl $1, %ecx LBB1_50: ## %loopend.i.i1825 testb $2, %sil je LBB1_52 ## BB#51: ## %store.i.i1820.1 vpextrd $1, %xmm0, (%rdi,%rcx,4) incl %ecx LBB1_52: ## %loopend.i.i1825.1 testb $4, %sil je LBB1_54 ## BB#53: ## %store.i.i1820.2 movslq %ecx, %rcx vpextrd $2, %xmm0, (%rdi,%rcx,4) incl %ecx LBB1_54: ## %loopend.i.i1825.2 testb $8, %sil je LBB1_56 ## BB#55: ## %store.i.i1820.3 movslq %ecx, %rcx vpextrd $3, %xmm0, (%rdi,%rcx,4) incl %ecx LBB1_56: ## %loopend.i.i1825.3 testb $16, %sil je LBB1_58 ## BB#57: ## %store.i.i1820.4 vextractf128 $1, %ymm0, %xmm1 movslq %ecx, %rcx vmovd %xmm1, (%rdi,%rcx,4) incl %ecx LBB1_58: ## %loopend.i.i1825.4 testb $32, %sil je LBB1_60 ## BB#59: ## %store.i.i1820.5 vextractf128 $1, %ymm0, %xmm1 movslq %ecx, %rcx vpextrd $1, %xmm1, (%rdi,%rcx,4) incl %ecx LBB1_60: ## %loopend.i.i1825.5 testb $64, %sil je LBB1_62 ## BB#61: ## %store.i.i1820.6 vextractf128 $1, %ymm0, %xmm1 movslq %ecx, %rcx vpextrd $2, %xmm1, (%rdi,%rcx,4) incl %ecx LBB1_62: ## %loopend.i.i1825.6 testb %sil, %sil jns LBB1_65 ## BB#63: ## %store.i.i1820.7 vextractf128 $1, %ymm0, %xmm0 movslq %ecx, %rcx vpextrd $3, %xmm0, (%rdi,%rcx,4) LBB1_64: ## %packed_store_active___un_3C_uni_3E_vyi.exit1799 incl %ecx LBB1_65: ## %packed_store_active___un_3C_uni_3E_vyi.exit1799 addl %eax, %ecx movl %ecx, %eax LBB1_66: ## %foreach_reset addq $296, %rsp ## imm = 0x128 popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .section __TEXT,__literal4,4byte_literals .p2align 2 LCPI2_0: .long 1073741824 ## float 2 LCPI2_1: .long 1056964608 ## float 0.5 LCPI2_2: .long 3212836864 ## float -1 LCPI2_54: .long 0 ## float 0 .section __TEXT,__literal16,16byte_literals .p2align 4 LCPI2_3: .long 0 ## 0x0 .long 1 ## 0x1 .long 2 ## 0x2 .long 3 ## 0x3 LCPI2_4: .long 4 ## 0x4 .long 5 ## 0x5 .long 6 ## 0x6 .long 7 ## 0x7 LCPI2_10: .long 939524096 ## 0x38000000 .long 939524096 ## 0x38000000 .long 939524096 ## 0x38000000 .long 939524096 ## 0x38000000 LCPI2_12: .long 947912704 ## 0x38800000 .long 947912704 ## 0x38800000 .long 947912704 ## 0x38800000 .long 947912704 ## 0x38800000 LCPI2_15: .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 LCPI2_23: .long 4294967170 ## 0xffffff82 .long 4294967170 ## 0xffffff82 .long 4294967170 ## 0xffffff82 .long 4294967170 ## 0xffffff82 LCPI2_46: .long 127 ## 0x7f .long 127 ## 0x7f .long 127 ## 0x7f .long 127 ## 0x7f LCPI2_47: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 LCPI2_51: .byte 0 ## 0x0 .byte 1 ## 0x1 .byte 4 ## 0x4 .byte 5 ## 0x5 .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 12 ## 0xc .byte 13 ## 0xd .byte 14 ## 0xe .byte 15 ## 0xf LCPI2_52: .byte 0 ## 0x0 .byte 2 ## 0x2 .byte 4 ## 0x4 .byte 6 ## 0x6 .byte 8 ## 0x8 .byte 10 ## 0xa .byte 12 ## 0xc .byte 14 ## 0xe .space 1 .space 1 .space 1 .space 1 .space 1 .space 1 .space 1 .space 1 LCPI2_53: .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .section __TEXT,__const .p2align 5 LCPI2_5: .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 LCPI2_6: .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 LCPI2_7: .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 LCPI2_8: .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff LCPI2_9: .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 LCPI2_11: .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 LCPI2_13: .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 LCPI2_14: .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 LCPI2_16: .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 LCPI2_17: .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 LCPI2_18: .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 LCPI2_19: .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 LCPI2_20: .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 LCPI2_21: .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 LCPI2_22: .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 LCPI2_24: .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff LCPI2_25: .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 LCPI2_26: .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 LCPI2_27: .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 LCPI2_28: .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 LCPI2_29: .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 LCPI2_30: .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 LCPI2_31: .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 LCPI2_32: .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 LCPI2_33: .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 LCPI2_34: .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 LCPI2_35: .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf LCPI2_36: .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN LCPI2_37: .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 LCPI2_38: .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 LCPI2_39: .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 LCPI2_40: .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 LCPI2_41: .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 LCPI2_42: .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 LCPI2_43: .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 LCPI2_44: .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 LCPI2_45: .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 LCPI2_48: .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf LCPI2_49: .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 LCPI2_50: .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 LCPI2_55: .space 32 .section __TEXT,__text,regular,pure_instructions .globl _ShadeTile___uniuniuniuniuniuniREFs_5B_unInputDataArrays_5D_unfunfunfunfun_3C_uni_3E_uniunbun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_ .p2align 4, 0x90 _ShadeTile___uniuniuniuniuniuniREFs_5B_unInputDataArrays_5D_unfunfunfunfun_3C_uni_3E_uniunbun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_: ## @ShadeTile___uniuniuniuniuniuniREFs_5B_unInputDataArrays_5D_unfunfunfunfun_3C_uni_3E_uniunbun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_ ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $2696, %rsp ## imm = 0xA88 movl %r8d, -112(%rsp) ## 4-byte Spill movl %ecx, -104(%rsp) ## 4-byte Spill movl %esi, -108(%rsp) ## 4-byte Spill movq 2800(%rsp), %rbx movq 2792(%rsp), %r10 movq 2784(%rsp), %r14 movq 2752(%rsp), %r15 vmovmskps %ymm4, %ecx cmpl $0, 2768(%rsp) sete %al orb 2776(%rsp), %al cmpl $255, %ecx movl %edi, -20(%rsp) ## 4-byte Spill jne LBB2_289 ## BB#1: ## %all_on testb $1, %al je LBB2_59 ## BB#2: ## %if_then movl 2768(%rsp), %eax shll $2, %eax movl %eax, %r12d cmpl $255, %eax jl LBB2_4 ## BB#3: ## %if_then movb $-1, %r12b LBB2_4: ## %if_then cmpl -104(%rsp), %edx ## 4-byte Folded Reload jge LBB2_359 ## BB#5: ## %for_loop.lr.ph movl -108(%rsp), %esi ## 4-byte Reload movl %esi, %ecx subl %edi, %ecx movl %ecx, %eax sarl $31, %eax shrl $29, %eax addl %ecx, %eax andl $-8, %eax subl %eax, %ecx movl %esi, %eax subl %ecx, %eax vmovd %esi, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 movzbl %r12b, %ecx vmovd %ecx, %xmm1 vpshuflw $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0,4,5,6,7] vpshufd $80, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,1,1] movl -112(%rsp), %ecx ## 4-byte Reload imull %edx, %ecx addl %edi, %ecx vpshufb LCPI2_52(%rip), %xmm1, %xmm1 ## xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] vmovdqa LCPI2_3(%rip), %xmm2 ## xmm2 = [0,1,2,3] vmovdqa LCPI2_4(%rip), %xmm3 ## xmm3 = [4,5,6,7] vextractf128 $1, %ymm0, %xmm4 .p2align 4, 0x90 LBB2_6: ## %for_loop ## =>This Loop Header: Depth=1 ## Child Loop BB2_8 Depth 2 movl %edi, %ebp cmpl %edi, %eax jle LBB2_9 ## BB#7: ## %foreach_full_body.lr.ph ## in Loop: Header=BB2_6 Depth=1 movl %ecx, %esi movl %edi, %ebp .p2align 4, 0x90 LBB2_8: ## %foreach_full_body ## Parent Loop BB2_6 Depth=1 ## => This Inner Loop Header: Depth=2 movslq %esi, %rsi vmovq %xmm1, (%r14,%rsi) vmovq %xmm1, (%r10,%rsi) vmovq %xmm1, (%rbx,%rsi) addl $8, %ebp addl $8, %esi cmpl %eax, %ebp jl LBB2_8 LBB2_9: ## %partial_inner_all_outer ## in Loop: Header=BB2_6 Depth=1 cmpl -108(%rsp), %ebp ## 4-byte Folded Reload jge LBB2_58 ## BB#10: ## %partial_inner_only ## in Loop: Header=BB2_6 Depth=1 vmovd %ebp, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vpaddd %xmm2, %xmm5, %xmm6 vpaddd %xmm3, %xmm5, %xmm5 vpcmpgtd %xmm5, %xmm4, %xmm5 vpcmpgtd %xmm6, %xmm0, %xmm6 vinsertf128 $1, %xmm5, %ymm6, %ymm5 movl %edx, %edi movl %edx, %esi imull -112(%rsp), %esi ## 4-byte Folded Reload addl %ebp, %esi movslq %esi, %rbp vmovmskps %ymm5, %esi movq %rsi, %r8 andq $1, %r8 movl %r12d, %ebx je LBB2_12 ## BB#11: ## %pl_dolane.i ## in Loop: Header=BB2_6 Depth=1 movb %bl, (%r14,%rbp) LBB2_12: ## %pl_loopend.i ## in Loop: Header=BB2_6 Depth=1 movq %rsi, %r9 andq $2, %r9 je LBB2_14 ## BB#13: ## %pl_dolane.1.i ## in Loop: Header=BB2_6 Depth=1 movb %bl, 1(%r14,%rbp) LBB2_14: ## %pl_loopend.1.i ## in Loop: Header=BB2_6 Depth=1 movq %rsi, %r11 andq $4, %r11 je LBB2_16 ## BB#15: ## %pl_dolane.2.i ## in Loop: Header=BB2_6 Depth=1 movb %bl, 2(%r14,%rbp) LBB2_16: ## %pl_loopend.2.i ## in Loop: Header=BB2_6 Depth=1 movq %rsi, %r14 andq $8, %r14 je LBB2_18 ## BB#17: ## %pl_dolane.3.i ## in Loop: Header=BB2_6 Depth=1 movq 2784(%rsp), %rdx movb %bl, 3(%rdx,%rbp) LBB2_18: ## %pl_loopend.3.i ## in Loop: Header=BB2_6 Depth=1 movq %rsi, %r15 andq $16, %r15 je LBB2_20 ## BB#19: ## %pl_dolane.4.i ## in Loop: Header=BB2_6 Depth=1 movq 2784(%rsp), %rdx movb %bl, 4(%rdx,%rbp) LBB2_20: ## %pl_loopend.4.i ## in Loop: Header=BB2_6 Depth=1 movq %rsi, %r13 andq $32, %r13 je LBB2_22 ## BB#21: ## %pl_dolane.5.i ## in Loop: Header=BB2_6 Depth=1 movq 2784(%rsp), %rdx movb %bl, 5(%rdx,%rbp) LBB2_22: ## %pl_loopend.5.i ## in Loop: Header=BB2_6 Depth=1 movq %rsi, %r10 andq $64, %r10 je LBB2_24 ## BB#23: ## %pl_dolane.6.i ## in Loop: Header=BB2_6 Depth=1 movq 2784(%rsp), %rdx movb %bl, 6(%rdx,%rbp) LBB2_24: ## %pl_loopend.6.i ## in Loop: Header=BB2_6 Depth=1 testb %sil, %sil jns LBB2_26 ## BB#25: ## %pl_dolane.7.i ## in Loop: Header=BB2_6 Depth=1 movq 2784(%rsp), %rdx movb %bl, 7(%rdx,%rbp) LBB2_26: ## %__masked_store_i8.exit ## in Loop: Header=BB2_6 Depth=1 testq %r8, %r8 je LBB2_28 ## BB#27: ## %pl_dolane.i17291 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, (%rdx,%rbp) LBB2_28: ## %pl_loopend.i17294 ## in Loop: Header=BB2_6 Depth=1 testq %r9, %r9 je LBB2_30 ## BB#29: ## %pl_dolane.1.i17296 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, 1(%rdx,%rbp) LBB2_30: ## %pl_loopend.1.i17299 ## in Loop: Header=BB2_6 Depth=1 testq %r11, %r11 je LBB2_32 ## BB#31: ## %pl_dolane.2.i17301 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, 2(%rdx,%rbp) LBB2_32: ## %pl_loopend.2.i17304 ## in Loop: Header=BB2_6 Depth=1 testq %r14, %r14 je LBB2_34 ## BB#33: ## %pl_dolane.3.i17306 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, 3(%rdx,%rbp) LBB2_34: ## %pl_loopend.3.i17309 ## in Loop: Header=BB2_6 Depth=1 testq %r15, %r15 je LBB2_36 ## BB#35: ## %pl_dolane.4.i17311 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, 4(%rdx,%rbp) LBB2_36: ## %pl_loopend.4.i17314 ## in Loop: Header=BB2_6 Depth=1 testq %r13, %r13 je LBB2_38 ## BB#37: ## %pl_dolane.5.i17316 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, 5(%rdx,%rbp) LBB2_38: ## %pl_loopend.5.i17319 ## in Loop: Header=BB2_6 Depth=1 testq %r10, %r10 je LBB2_40 ## BB#39: ## %pl_dolane.6.i17321 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, 6(%rdx,%rbp) LBB2_40: ## %pl_loopend.6.i17323 ## in Loop: Header=BB2_6 Depth=1 testb %sil, %sil jns LBB2_42 ## BB#41: ## %pl_dolane.7.i17325 ## in Loop: Header=BB2_6 Depth=1 movq 2792(%rsp), %rdx movb %bl, 7(%rdx,%rbp) LBB2_42: ## %__masked_store_i8.exit17326 ## in Loop: Header=BB2_6 Depth=1 testq %r8, %r8 je LBB2_44 ## BB#43: ## %pl_dolane.i17249 ## in Loop: Header=BB2_6 Depth=1 movq 2800(%rsp), %rdx movb %bl, (%rdx,%rbp) LBB2_44: ## %pl_loopend.i17252 ## in Loop: Header=BB2_6 Depth=1 testq %r9, %r9 movl %edi, %edx movq 2800(%rsp), %rbx je LBB2_46 ## BB#45: ## %pl_dolane.1.i17254 ## in Loop: Header=BB2_6 Depth=1 movb %r12b, 1(%rbx,%rbp) LBB2_46: ## %pl_loopend.1.i17257 ## in Loop: Header=BB2_6 Depth=1 testq %r11, %r11 je LBB2_48 ## BB#47: ## %pl_dolane.2.i17259 ## in Loop: Header=BB2_6 Depth=1 movb %r12b, 2(%rbx,%rbp) LBB2_48: ## %pl_loopend.2.i17262 ## in Loop: Header=BB2_6 Depth=1 testq %r14, %r14 movl -20(%rsp), %edi ## 4-byte Reload je LBB2_50 ## BB#49: ## %pl_dolane.3.i17264 ## in Loop: Header=BB2_6 Depth=1 movb %r12b, 3(%rbx,%rbp) LBB2_50: ## %pl_loopend.3.i17267 ## in Loop: Header=BB2_6 Depth=1 testq %r15, %r15 movq 2784(%rsp), %r14 je LBB2_52 ## BB#51: ## %pl_dolane.4.i17269 ## in Loop: Header=BB2_6 Depth=1 movb %r12b, 4(%rbx,%rbp) LBB2_52: ## %pl_loopend.4.i17272 ## in Loop: Header=BB2_6 Depth=1 testq %r13, %r13 je LBB2_54 ## BB#53: ## %pl_dolane.5.i17274 ## in Loop: Header=BB2_6 Depth=1 movb %r12b, 5(%rbx,%rbp) LBB2_54: ## %pl_loopend.5.i17277 ## in Loop: Header=BB2_6 Depth=1 testq %r10, %r10 je LBB2_56 ## BB#55: ## %pl_dolane.6.i17279 ## in Loop: Header=BB2_6 Depth=1 movb %r12b, 6(%rbx,%rbp) LBB2_56: ## %pl_loopend.6.i17281 ## in Loop: Header=BB2_6 Depth=1 testb %sil, %sil movq 2792(%rsp), %r10 jns LBB2_58 ## BB#57: ## %pl_dolane.7.i17283 ## in Loop: Header=BB2_6 Depth=1 movb %r12b, 7(%rbx,%rbp) .p2align 4, 0x90 LBB2_58: ## %foreach_reset ## in Loop: Header=BB2_6 Depth=1 incl %edx addl -112(%rsp), %ecx ## 4-byte Folded Reload cmpl -104(%rsp), %edx ## 4-byte Folded Reload jne LBB2_6 jmp LBB2_359 LBB2_289: ## %some_on testb $1, %al je LBB2_347 ## BB#290: ## %if_then3271 movl 2768(%rsp), %eax shll $2, %eax movl %eax, %r12d cmpl $255, %eax jl LBB2_292 ## BB#291: ## %if_then3271 movb $-1, %r12b LBB2_292: ## %if_then3271 cmpl -104(%rsp), %edx ## 4-byte Folded Reload jge LBB2_359 ## BB#293: ## %for_loop3282.lr.ph movl -108(%rsp), %esi ## 4-byte Reload movl %esi, %ecx subl %edi, %ecx movl %ecx, %eax sarl $31, %eax shrl $29, %eax addl %ecx, %eax andl $-8, %eax subl %eax, %ecx movl %esi, %eax subl %ecx, %eax vmovd %esi, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 movzbl %r12b, %ecx vmovd %ecx, %xmm1 vpshuflw $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0,4,5,6,7] vpshufd $80, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,1,1] movl -112(%rsp), %ecx ## 4-byte Reload imull %edx, %ecx addl %edi, %ecx vpshufb LCPI2_52(%rip), %xmm1, %xmm1 ## xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] vmovdqa LCPI2_3(%rip), %xmm2 ## xmm2 = [0,1,2,3] vmovdqa LCPI2_4(%rip), %xmm3 ## xmm3 = [4,5,6,7] vextractf128 $1, %ymm0, %xmm4 .p2align 4, 0x90 LBB2_294: ## %for_loop3282 ## =>This Loop Header: Depth=1 ## Child Loop BB2_296 Depth 2 movl %edi, %ebp cmpl %edi, %eax jle LBB2_297 ## BB#295: ## %foreach_full_body3291.lr.ph ## in Loop: Header=BB2_294 Depth=1 movl %ecx, %esi movl %edi, %ebp .p2align 4, 0x90 LBB2_296: ## %foreach_full_body3291 ## Parent Loop BB2_294 Depth=1 ## => This Inner Loop Header: Depth=2 movslq %esi, %rsi vmovq %xmm1, (%r14,%rsi) vmovq %xmm1, (%r10,%rsi) vmovq %xmm1, (%rbx,%rsi) addl $8, %ebp addl $8, %esi cmpl %eax, %ebp jl LBB2_296 LBB2_297: ## %partial_inner_all_outer3330 ## in Loop: Header=BB2_294 Depth=1 cmpl -108(%rsp), %ebp ## 4-byte Folded Reload jge LBB2_346 ## BB#298: ## %partial_inner_only3372 ## in Loop: Header=BB2_294 Depth=1 vmovd %ebp, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vpaddd %xmm2, %xmm5, %xmm6 vpaddd %xmm3, %xmm5, %xmm5 vpcmpgtd %xmm5, %xmm4, %xmm5 vpcmpgtd %xmm6, %xmm0, %xmm6 vinsertf128 $1, %xmm5, %ymm6, %ymm5 movl %edx, %edi movl %edx, %esi imull -112(%rsp), %esi ## 4-byte Folded Reload addl %ebp, %esi movslq %esi, %rbp vmovmskps %ymm5, %esi movq %rsi, %r8 andq $1, %r8 movl %r12d, %ebx je LBB2_300 ## BB#299: ## %pl_dolane.i15499 ## in Loop: Header=BB2_294 Depth=1 movb %bl, (%r14,%rbp) LBB2_300: ## %pl_loopend.i15502 ## in Loop: Header=BB2_294 Depth=1 movq %rsi, %r9 andq $2, %r9 je LBB2_302 ## BB#301: ## %pl_dolane.1.i15504 ## in Loop: Header=BB2_294 Depth=1 movb %bl, 1(%r14,%rbp) LBB2_302: ## %pl_loopend.1.i15507 ## in Loop: Header=BB2_294 Depth=1 movq %rsi, %r11 andq $4, %r11 je LBB2_304 ## BB#303: ## %pl_dolane.2.i15509 ## in Loop: Header=BB2_294 Depth=1 movb %bl, 2(%r14,%rbp) LBB2_304: ## %pl_loopend.2.i15512 ## in Loop: Header=BB2_294 Depth=1 movq %rsi, %r14 andq $8, %r14 je LBB2_306 ## BB#305: ## %pl_dolane.3.i15514 ## in Loop: Header=BB2_294 Depth=1 movq 2784(%rsp), %rdx movb %bl, 3(%rdx,%rbp) LBB2_306: ## %pl_loopend.3.i15517 ## in Loop: Header=BB2_294 Depth=1 movq %rsi, %r15 andq $16, %r15 je LBB2_308 ## BB#307: ## %pl_dolane.4.i15519 ## in Loop: Header=BB2_294 Depth=1 movq 2784(%rsp), %rdx movb %bl, 4(%rdx,%rbp) LBB2_308: ## %pl_loopend.4.i15522 ## in Loop: Header=BB2_294 Depth=1 movq %rsi, %r13 andq $32, %r13 je LBB2_310 ## BB#309: ## %pl_dolane.5.i15524 ## in Loop: Header=BB2_294 Depth=1 movq 2784(%rsp), %rdx movb %bl, 5(%rdx,%rbp) LBB2_310: ## %pl_loopend.5.i15527 ## in Loop: Header=BB2_294 Depth=1 movq %rsi, %r10 andq $64, %r10 je LBB2_312 ## BB#311: ## %pl_dolane.6.i15529 ## in Loop: Header=BB2_294 Depth=1 movq 2784(%rsp), %rdx movb %bl, 6(%rdx,%rbp) LBB2_312: ## %pl_loopend.6.i15531 ## in Loop: Header=BB2_294 Depth=1 testb %sil, %sil jns LBB2_314 ## BB#313: ## %pl_dolane.7.i15533 ## in Loop: Header=BB2_294 Depth=1 movq 2784(%rsp), %rdx movb %bl, 7(%rdx,%rbp) LBB2_314: ## %__masked_store_i8.exit15534 ## in Loop: Header=BB2_294 Depth=1 testq %r8, %r8 je LBB2_316 ## BB#315: ## %pl_dolane.i15457 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, (%rdx,%rbp) LBB2_316: ## %pl_loopend.i15460 ## in Loop: Header=BB2_294 Depth=1 testq %r9, %r9 je LBB2_318 ## BB#317: ## %pl_dolane.1.i15462 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, 1(%rdx,%rbp) LBB2_318: ## %pl_loopend.1.i15465 ## in Loop: Header=BB2_294 Depth=1 testq %r11, %r11 je LBB2_320 ## BB#319: ## %pl_dolane.2.i15467 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, 2(%rdx,%rbp) LBB2_320: ## %pl_loopend.2.i15470 ## in Loop: Header=BB2_294 Depth=1 testq %r14, %r14 je LBB2_322 ## BB#321: ## %pl_dolane.3.i15472 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, 3(%rdx,%rbp) LBB2_322: ## %pl_loopend.3.i15475 ## in Loop: Header=BB2_294 Depth=1 testq %r15, %r15 je LBB2_324 ## BB#323: ## %pl_dolane.4.i15477 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, 4(%rdx,%rbp) LBB2_324: ## %pl_loopend.4.i15480 ## in Loop: Header=BB2_294 Depth=1 testq %r13, %r13 je LBB2_326 ## BB#325: ## %pl_dolane.5.i15482 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, 5(%rdx,%rbp) LBB2_326: ## %pl_loopend.5.i15485 ## in Loop: Header=BB2_294 Depth=1 testq %r10, %r10 je LBB2_328 ## BB#327: ## %pl_dolane.6.i15487 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, 6(%rdx,%rbp) LBB2_328: ## %pl_loopend.6.i15489 ## in Loop: Header=BB2_294 Depth=1 testb %sil, %sil jns LBB2_330 ## BB#329: ## %pl_dolane.7.i15491 ## in Loop: Header=BB2_294 Depth=1 movq 2792(%rsp), %rdx movb %bl, 7(%rdx,%rbp) LBB2_330: ## %__masked_store_i8.exit15492 ## in Loop: Header=BB2_294 Depth=1 testq %r8, %r8 je LBB2_332 ## BB#331: ## %pl_dolane.i15415 ## in Loop: Header=BB2_294 Depth=1 movq 2800(%rsp), %rdx movb %bl, (%rdx,%rbp) LBB2_332: ## %pl_loopend.i15418 ## in Loop: Header=BB2_294 Depth=1 testq %r9, %r9 movl %edi, %edx movq 2800(%rsp), %rbx je LBB2_334 ## BB#333: ## %pl_dolane.1.i15420 ## in Loop: Header=BB2_294 Depth=1 movb %r12b, 1(%rbx,%rbp) LBB2_334: ## %pl_loopend.1.i15423 ## in Loop: Header=BB2_294 Depth=1 testq %r11, %r11 je LBB2_336 ## BB#335: ## %pl_dolane.2.i15425 ## in Loop: Header=BB2_294 Depth=1 movb %r12b, 2(%rbx,%rbp) LBB2_336: ## %pl_loopend.2.i15428 ## in Loop: Header=BB2_294 Depth=1 testq %r14, %r14 movl -20(%rsp), %edi ## 4-byte Reload je LBB2_338 ## BB#337: ## %pl_dolane.3.i15430 ## in Loop: Header=BB2_294 Depth=1 movb %r12b, 3(%rbx,%rbp) LBB2_338: ## %pl_loopend.3.i15433 ## in Loop: Header=BB2_294 Depth=1 testq %r15, %r15 movq 2784(%rsp), %r14 je LBB2_340 ## BB#339: ## %pl_dolane.4.i15435 ## in Loop: Header=BB2_294 Depth=1 movb %r12b, 4(%rbx,%rbp) LBB2_340: ## %pl_loopend.4.i15438 ## in Loop: Header=BB2_294 Depth=1 testq %r13, %r13 je LBB2_342 ## BB#341: ## %pl_dolane.5.i15440 ## in Loop: Header=BB2_294 Depth=1 movb %r12b, 5(%rbx,%rbp) LBB2_342: ## %pl_loopend.5.i15443 ## in Loop: Header=BB2_294 Depth=1 testq %r10, %r10 je LBB2_344 ## BB#343: ## %pl_dolane.6.i15445 ## in Loop: Header=BB2_294 Depth=1 movb %r12b, 6(%rbx,%rbp) LBB2_344: ## %pl_loopend.6.i15447 ## in Loop: Header=BB2_294 Depth=1 testb %sil, %sil movq 2792(%rsp), %r10 jns LBB2_346 ## BB#345: ## %pl_dolane.7.i15449 ## in Loop: Header=BB2_294 Depth=1 movb %r12b, 7(%rbx,%rbp) .p2align 4, 0x90 LBB2_346: ## %foreach_reset3299 ## in Loop: Header=BB2_294 Depth=1 incl %edx addl -112(%rsp), %ecx ## 4-byte Folded Reload cmpl -104(%rsp), %edx ## 4-byte Folded Reload jne LBB2_294 jmp LBB2_359 LBB2_59: ## %if_else cmpl -104(%rsp), %edx ## 4-byte Folded Reload jge LBB2_359 ## BB#60: ## %for_loop121.lr.ph vcvtsi2ssl %r9d, %xmm0, %xmm4 vmovss LCPI2_0(%rip), %xmm5 ## xmm5 = mem[0],zero,zero,zero vdivss %xmm4, %xmm5, %xmm4 vmovss %xmm4, 2080(%rsp) ## 4-byte Spill vcvtsi2ssl -112(%rsp), %xmm0, %xmm4 ## 4-byte Folded Reload vdivss %xmm4, %xmm5, %xmm4 movl -108(%rsp), %esi ## 4-byte Reload movl %esi, %eax subl %edi, %eax movl %eax, %ecx sarl $31, %ecx shrl $29, %ecx addl %eax, %ecx andl $-8, %ecx subl %ecx, %eax movl %esi, %r9d subl %eax, %r9d vmovd %esi, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmovups %ymm5, 2144(%rsp) ## 32-byte Spill vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmovups %ymm4, 1888(%rsp) ## 32-byte Spill vpermilps $0, %xmm3, %xmm3 ## xmm3 = xmm3[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmovups %ymm3, 1856(%rsp) ## 32-byte Spill vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vmovups %ymm2, 1824(%rsp) ## 32-byte Spill vpermilps $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1792(%rsp) ## 32-byte Spill vpermilps $0, %xmm1, %xmm0 ## xmm0 = xmm1[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1376(%rsp) ## 32-byte Spill vpxor %xmm9, %xmm9, %xmm9 ## implicit-def: %YMM3 ## implicit-def: %XMM6 ## implicit-def: %XMM0 vmovaps %xmm0, -16(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, 48(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, 672(%rsp) ## 16-byte Spill ## implicit-def: %XMM10 ## implicit-def: %XMM14 ## implicit-def: %XMM0 vmovaps %xmm0, 1440(%rsp) ## 16-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 480(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 448(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 544(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 896(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1696(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 2112(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1600(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1760(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1728(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1664(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1632(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1568(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1536(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1504(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1472(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1344(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1312(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 2016(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1984(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1952(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovdqu %ymm0, 1920(%rsp) ## 32-byte Spill movl %r9d, 2048(%rsp) ## 4-byte Spill .p2align 4, 0x90 LBB2_61: ## %for_loop121 ## =>This Loop Header: Depth=1 ## Child Loop BB2_67 Depth 2 ## Child Loop BB2_71 Depth 3 ## Child Loop BB2_268 Depth 2 vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %edx, %xmm0, %xmm0 vaddss LCPI2_1(%rip), %xmm0, %xmm0 vmulss 2080(%rsp), %xmm0, %xmm0 ## 4-byte Folded Reload vaddss LCPI2_2(%rip), %xmm0, %xmm0 vxorps %xmm1, %xmm1, %xmm1 vsubss %xmm0, %xmm1, %xmm1 cmpl %edi, %r9d movl %edx, -100(%rsp) ## 4-byte Spill vmovdqu %ymm3, 416(%rsp) ## 32-byte Spill vmovdqa %xmm6, 352(%rsp) ## 16-byte Spill vmovdqa %xmm10, 176(%rsp) ## 16-byte Spill vmovaps %xmm14, 800(%rsp) ## 16-byte Spill vmovaps %xmm1, 864(%rsp) ## 16-byte Spill jle LBB2_62 ## BB#66: ## %foreach_full_body131.lr.ph ## in Loop: Header=BB2_61 Depth=1 movl %edx, %r8d imull -112(%rsp), %r8d ## 4-byte Folded Reload vmovd %r8d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1408(%rsp) ## 32-byte Spill vpermilps $0, %xmm1, %xmm0 ## xmm0 = xmm1[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 608(%rsp) ## 32-byte Spill movl %edi, %r11d .p2align 4, 0x90 LBB2_67: ## %foreach_full_body131 ## Parent Loop BB2_61 Depth=1 ## => This Loop Header: Depth=2 ## Child Loop BB2_71 Depth 3 vmovd %r11d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI2_3(%rip), %xmm0, %xmm1 vpaddd LCPI2_4(%rip), %xmm0, %xmm2 vmovdqu 1408(%rsp), %ymm0 ## 32-byte Reload vmovdqu %ymm1, -96(%rsp) ## 32-byte Spill vpaddd %xmm0, %xmm1, %xmm10 vextractf128 $1, %ymm0, %xmm0 vmovdqa %xmm2, 128(%rsp) ## 16-byte Spill vpaddd %xmm0, %xmm2, %xmm1 vpslld $2, %xmm10, %xmm0 vpmovsxdq %xmm0, %xmm2 vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1] vpmovsxdq %xmm0, %xmm0 vmovq (%r15), %xmm3 ## xmm3 = mem[0],zero vpshufd $68, %xmm3, %xmm3 ## xmm3 = xmm3[0,1,0,1] vpaddq %xmm0, %xmm3, %xmm0 vmovdqa %xmm3, 320(%rsp) ## 16-byte Spill vpaddq %xmm2, %xmm3, %xmm2 vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vmovq %xmm0, %rdx vpextrq $1, %xmm0, %rsi vmovss (%rax), %xmm0 ## xmm0 = mem[0],zero,zero,zero vinsertps $16, (%rcx), %xmm0, %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[2,3] vinsertps $32, (%rdx), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3] vinsertps $48, (%rsi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[0] vmovups %ymm0, 512(%rsp) ## 32-byte Spill vmovdqa %xmm1, -64(%rsp) ## 16-byte Spill vpaddd %xmm1, %xmm1, %xmm0 vpaddd %xmm10, %xmm10, %xmm2 vpmovsxdq %xmm2, %xmm4 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm6 vpmovsxdq %xmm0, %xmm7 vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1] vpmovsxdq %xmm0, %xmm5 vmovq 8(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm2 vpaddq %xmm7, %xmm0, %xmm3 vpaddq %xmm6, %xmm0, %xmm1 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm1, %rdx vpextrq $1, %xmm1, %rsi vmovq %xmm3, %rdi vpextrq $1, %xmm3, %rbx vmovq %xmm2, %rbp vpextrq $1, %xmm2, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm9, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm9[4],xmm0[5],xmm9[5],xmm0[6],xmm9[6],xmm0[7],xmm9[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm11 vmovaps LCPI2_8(%rip), %ymm13 ## ymm13 = [32767,32767,32767,32767,32767,32767,32767,32767] vandps %ymm13, %ymm11, %ymm8 vmovq 16(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm7, %xmm0, %xmm3 vpaddq %xmm6, %xmm0, %xmm2 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vmovq %xmm3, %rdi vpextrq $1, %xmm3, %rbx vmovq %xmm1, %rbp vpextrq $1, %xmm1, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm9, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm9[4],xmm0[5],xmm9[5],xmm0[6],xmm9[6],xmm0[7],xmm9[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vpxor %xmm9, %xmm9, %xmm9 vinsertf128 $1, %xmm1, %ymm0, %ymm15 vmovq 24(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm4, %xmm0, %xmm2 vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vpaddq %xmm7, %xmm0, %xmm2 vpaddq %xmm6, %xmm0, %xmm0 vmovq %xmm0, %rdx vpextrq $1, %xmm0, %rsi vmovq %xmm2, %rdi vpextrq $1, %xmm2, %rbx vmovq %xmm1, %rbp vpextrq $1, %xmm1, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm9, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm9[4],xmm0[5],xmm9[5],xmm0[6],xmm9[6],xmm0[7],xmm9[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovq 32(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm7, %xmm0, %xmm2 vpaddq %xmm6, %xmm0, %xmm3 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm3, %rdx vpextrq $1, %xmm3, %rsi vmovq %xmm2, %rdi vpextrq $1, %xmm2, %rbx vmovq %xmm1, %rbp vpextrq $1, %xmm1, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm9, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm9[4],xmm0[5],xmm9[5],xmm0[6],xmm9[6],xmm0[7],xmm9[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm0 vpmovsxdq %xmm10, %xmm3 vpshufd $78, %xmm10, %xmm1 ## xmm1 = xmm10[2,3,0,1] vpmovsxdq %xmm1, %xmm1 vmovq 40(%r15), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm4 ## xmm4 = xmm2[0,1,0,1] vpaddq %xmm1, %xmm4, %xmm2 vmovdqa %xmm4, 96(%rsp) ## 16-byte Spill vpaddq %xmm3, %xmm4, %xmm4 vmovq %xmm4, %rax vpextrq $1, %xmm4, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vpinsrb $0, (%rax), %xmm0, %xmm2 vpinsrb $1, (%rcx), %xmm2, %xmm2 vmovq 48(%r15), %xmm4 ## xmm4 = mem[0],zero vpshufd $68, %xmm4, %xmm5 ## xmm5 = xmm4[0,1,0,1] vpaddq %xmm3, %xmm5, %xmm4 vmovq %xmm4, %r14 vpextrq $1, %xmm4, %rcx vmovdqa %xmm5, 192(%rsp) ## 16-byte Spill vpaddq %xmm1, %xmm5, %xmm4 vmovq %xmm4, %rdi vpextrq $1, %xmm4, %rbx vmovq 56(%r15), %xmm4 ## xmm4 = mem[0],zero vpshufd $68, %xmm4, %xmm5 ## xmm5 = xmm4[0,1,0,1] vpaddq %xmm3, %xmm5, %xmm3 vmovq %xmm3, %rbp vpextrq $1, %xmm3, %rax vpslld $13, %xmm8, %xmm3 vextractf128 $1, %ymm8, %xmm4 vpslld $13, %xmm4, %xmm14 vpinsrb $2, (%rdx), %xmm2, %xmm2 vpinsrb $3, (%rsi), %xmm2, %xmm6 vmovdqa %xmm5, 256(%rsp) ## 16-byte Spill vpaddq %xmm1, %xmm5, %xmm1 vmovq %xmm1, %rdx vpextrq $1, %xmm1, %rsi vinsertf128 $1, %xmm14, %ymm3, %ymm12 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vpaddd %xmm5, %xmm3, %xmm1 vmovdqu %ymm1, 704(%rsp) ## 32-byte Spill vmovdqa LCPI2_12(%rip), %xmm7 ## xmm7 = [947912704,947912704,947912704,947912704] vpaddd %xmm7, %xmm3, %xmm1 vmovdqu %ymm1, 736(%rsp) ## 32-byte Spill vmovaps LCPI2_14(%rip), %ymm2 ## ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768] vmovaps %ymm2, %ymm13 vandps %ymm13, %ymm11, %ymm1 vmovaps LCPI2_8(%rip), %ymm4 ## ymm4 = [32767,32767,32767,32767,32767,32767,32767,32767] vandps %ymm4, %ymm15, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm10 vinsertf128 $1, %xmm10, %ymm3, %ymm9 vpaddd %xmm5, %xmm3, %xmm2 vmovdqu %ymm2, 1056(%rsp) ## 32-byte Spill vpaddd %xmm7, %xmm3, %xmm2 vmovdqu %ymm2, 1088(%rsp) ## 32-byte Spill vandps %ymm13, %ymm15, %ymm15 vandps %ymm4, %ymm0, %ymm3 vpslld $13, %xmm3, %xmm8 vextractf128 $1, %ymm3, %xmm3 vpslld $13, %xmm3, %xmm2 vmovdqa %xmm2, 1120(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm8, %ymm2 vpaddd %xmm5, %xmm8, %xmm3 vmovdqu %ymm3, 1152(%rsp) ## 32-byte Spill vpaddd %xmm7, %xmm8, %xmm3 vmovdqu %ymm3, 1184(%rsp) ## 32-byte Spill vandps %ymm13, %ymm0, %ymm4 vpmovzxbd %xmm6, %xmm0 ## xmm0 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero vmovdqu %ymm0, 224(%rsp) ## 32-byte Spill vpinsrb $0, (%r14), %xmm0, %xmm0 vpinsrb $1, (%rcx), %xmm0, %xmm0 vpinsrb $2, (%rdi), %xmm0, %xmm0 vpinsrb $3, (%rbx), %xmm0, %xmm0 vpmovzxbd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero vmovdqu %ymm0, 64(%rsp) ## 32-byte Spill vpinsrb $0, (%rbp), %xmm0, %xmm0 vpinsrb $1, (%rax), %xmm0, %xmm0 vpinsrb $2, (%rdx), %xmm0, %xmm0 vpinsrb $3, (%rsi), %xmm0, %xmm0 vpmovzxbd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero vmovdqu %ymm0, 384(%rsp) ## 32-byte Spill movl 2768(%rsp), %edi testl %edi, %edi vmovaps LCPI2_9(%rip), %ymm0 ## ymm0 = [260046848,260046848,260046848,260046848,260046848,260046848,260046848,260046848] vandps %ymm0, %ymm12, %ymm11 vpxor %xmm3, %xmm3, %xmm3 vpcmpeqd %xmm3, %xmm11, %xmm8 vmovups %ymm1, 576(%rsp) ## 32-byte Spill vpslld $16, %xmm1, %xmm1 vmovdqa LCPI2_15(%rip), %xmm13 ## xmm13 = [260046848,260046848,260046848,260046848] vpcmpeqd %xmm13, %xmm11, %xmm6 vmovdqu %ymm6, 1024(%rsp) ## 32-byte Spill vmovups %ymm9, 832(%rsp) ## 32-byte Spill vandps %ymm0, %ymm9, %ymm6 vpcmpeqd %xmm3, %xmm6, %xmm3 vmovdqu %ymm3, 768(%rsp) ## 32-byte Spill vpxor %xmm7, %xmm7, %xmm7 vmovups %ymm15, 640(%rsp) ## 32-byte Spill vpslld $16, %xmm15, %xmm3 vmovdqu %ymm3, 1280(%rsp) ## 32-byte Spill vpcmpeqd %xmm13, %xmm6, %xmm9 vmovaps LCPI2_8(%rip), %ymm3 ## ymm3 = [32767,32767,32767,32767,32767,32767,32767,32767] vandps (%rsp), %ymm3, %ymm15 ## 32-byte Folded Reload vpslld $13, %xmm15, %xmm3 vmovdqu %ymm3, 1216(%rsp) ## 32-byte Spill vmovups %ymm2, 1248(%rsp) ## 32-byte Spill vandps %ymm0, %ymm2, %ymm3 vpcmpeqd %xmm7, %xmm3, %xmm7 vmovups %ymm4, 288(%rsp) ## 32-byte Spill vpslld $16, %xmm4, %xmm0 vpcmpeqd %xmm13, %xmm3, %xmm2 jle LBB2_68 ## BB#70: ## %cif_mask_all.lr.ph ## in Loop: Header=BB2_67 Depth=2 vmovdqa %xmm5, %xmm4 vpaddd %xmm4, %xmm14, %xmm5 vmovdqu %ymm2, 960(%rsp) ## 32-byte Spill vmovdqa %xmm4, %xmm2 vmovdqu %ymm1, 992(%rsp) ## 32-byte Spill vmovups 704(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm1, %ymm1 vmovdqu %ymm7, 928(%rsp) ## 32-byte Spill vmovdqa LCPI2_12(%rip), %xmm7 ## xmm7 = [947912704,947912704,947912704,947912704] vpaddd %xmm7, %xmm14, %xmm5 vmovups 736(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm4, %ymm4 vextractf128 $1, %ymm11, %xmm5 vpxor %xmm11, %xmm11, %xmm11 vmovdqu %ymm0, 704(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm5, %xmm0 vinsertf128 $1, %xmm0, %ymm8, %ymm0 vmovaps LCPI2_13(%rip), %ymm14 ## ymm14 = [-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05] vaddps %ymm14, %ymm4, %ymm4 vblendvps %ymm0, %ymm4, %ymm1, %ymm0 vmovdqa LCPI2_15(%rip), %xmm13 ## xmm13 = [260046848,260046848,260046848,260046848] vpcmpeqd %xmm13, %xmm5, %xmm1 vmovups 1024(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm4, %ymm1 vmovaps LCPI2_11(%rip), %ymm11 ## ymm11 = [1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192] vorps %ymm11, %ymm12, %ymm4 vblendvps %ymm1, %ymm4, %ymm0, %ymm0 vmovups %ymm0, 736(%rsp) ## 32-byte Spill vpaddd %xmm2, %xmm10, %xmm0 vmovdqa %xmm2, %xmm8 vmovups 1056(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vpaddd %xmm7, %xmm10, %xmm1 vmovups 1088(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vextractf128 $1, %ymm6, %xmm4 vpxor %xmm6, %xmm6, %xmm6 vpcmpeqd %xmm6, %xmm4, %xmm5 vmovups 768(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm2, %ymm5 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm5, %ymm1, %ymm0, %ymm0 vpcmpeqd %xmm13, %xmm4, %xmm1 vinsertf128 $1, %xmm1, %ymm9, %ymm1 vorps 832(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm0, %ymm10 vmovdqa 1120(%rsp), %xmm2 ## 16-byte Reload vpaddd %xmm8, %xmm2, %xmm0 vmovups 1152(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vpaddd %xmm7, %xmm2, %xmm1 vmovups 1184(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vextractf128 $1, %ymm3, %xmm2 vpcmpeqd %xmm6, %xmm2, %xmm3 vpxor %xmm6, %xmm6, %xmm6 vmovups 928(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm4, %ymm3 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm3, %ymm1, %ymm0, %ymm0 vpcmpeqd %xmm13, %xmm2, %xmm1 vmovups 960(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps 1248(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm0, %ymm12 vextractf128 $1, %ymm15, %xmm0 vpslld $13, %xmm0, %xmm0 vmovups 1216(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpaddd %xmm8, %xmm1, %xmm2 vpaddd %xmm8, %xmm0, %xmm3 vinsertf128 $1, %xmm2, %ymm3, %ymm2 vpaddd %xmm7, %xmm1, %xmm1 vpaddd %xmm7, %xmm0, %xmm3 vinsertf128 $1, %xmm1, %ymm3, %ymm1 vandps LCPI2_9(%rip), %ymm0, %ymm3 vextractf128 $1, %ymm3, %xmm4 vpcmpeqd %xmm6, %xmm4, %xmm5 vpcmpeqd %xmm6, %xmm3, %xmm6 vinsertf128 $1, %xmm5, %ymm6, %ymm5 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm5, %ymm1, %ymm2, %ymm1 vpcmpeqd %xmm13, %xmm4, %xmm2 vpcmpeqd %xmm13, %xmm3, %xmm3 vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps %ymm11, %ymm0, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm14 vmovups -96(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, 128(%rsp), %ymm0, %ymm1 ## 16-byte Folded Reload vmovdqa -64(%rsp), %xmm0 ## 16-byte Reload vpslld $2, %xmm0, %xmm2 vpmovsxdq %xmm2, %xmm3 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm2 vmovdqa 320(%rsp), %xmm4 ## 16-byte Reload vpaddq %xmm2, %xmm4, %xmm2 vpaddq %xmm3, %xmm4, %xmm3 vmovq %xmm3, %rax vpextrq $1, %xmm3, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vmovss (%rax), %xmm2 ## xmm2 = mem[0],zero,zero,zero vinsertps $16, (%rcx), %xmm2, %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] vinsertps $32, (%rdx), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] vinsertps $48, (%rsi), %xmm2, %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] vmovups 512(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vcvtdq2ps %ymm1, %ymm1 vmovaps LCPI2_5(%rip), %ymm5 ## ymm5 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vaddps %ymm5, %ymm1, %ymm1 vmulps 1888(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmovaps LCPI2_6(%rip), %ymm3 ## ymm3 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] vmovaps %ymm3, %ymm7 vaddps %ymm7, %ymm1, %ymm1 vsubps 1824(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmovups 1856(%rsp), %ymm3 ## 32-byte Reload vdivps %ymm2, %ymm3, %ymm13 vmulps %ymm13, %ymm1, %ymm1 vdivps 1792(%rsp), %ymm1, %ymm4 ## 32-byte Folded Reload vmulps 608(%rsp), %ymm13, %ymm1 ## 32-byte Folded Reload vdivps 1376(%rsp), %ymm1, %ymm11 ## 32-byte Folded Reload vmulps %ymm4, %ymm4, %ymm1 vmulps %ymm11, %ymm11, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vmulps %ymm13, %ymm13, %ymm2 vaddps %ymm1, %ymm2, %ymm1 vrsqrtps %ymm1, %ymm2 vmulps %ymm1, %ymm2, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vmovaps LCPI2_7(%rip), %ymm9 ## ymm9 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm1, %ymm9, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vmovups 576(%rsp), %ymm2 ## 32-byte Reload vextractf128 $1, %ymm2, %xmm2 vpslld $16, %xmm2, %xmm2 vmovups 992(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps 736(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmovups 640(%rsp), %ymm3 ## 32-byte Reload vextractf128 $1, %ymm3, %xmm3 vpslld $16, %xmm3, %xmm3 vmovups 1280(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 vorps %ymm10, %ymm3, %ymm3 vmovaps %ymm4, %ymm8 vmulps %ymm2, %ymm2, %ymm4 vsubps %ymm4, %ymm2, %ymm4 vmulps %ymm3, %ymm3, %ymm6 vsubps %ymm6, %ymm3, %ymm6 vaddps %ymm6, %ymm4, %ymm4 vmovaps LCPI2_16(%rip), %ymm6 ## ymm6 = [4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00] vmovaps %ymm6, %ymm10 vmulps %ymm10, %ymm4, %ymm6 vaddps %ymm7, %ymm6, %ymm6 vsqrtps %ymm6, %ymm6 vmulps %ymm10, %ymm2, %ymm2 vmovaps LCPI2_17(%rip), %ymm7 ## ymm7 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00] vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm6, %ymm2, %ymm2 vmovups %ymm2, -96(%rsp) ## 32-byte Spill vmulps %ymm10, %ymm3, %ymm2 vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm2, %ymm6, %ymm2 vmovups %ymm2, 128(%rsp) ## 32-byte Spill vmovups 288(%rsp), %ymm2 ## 32-byte Reload vextractf128 $1, %ymm2, %xmm2 vpslld $16, %xmm2, %xmm2 vmovups 704(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps %ymm12, %ymm2, %ymm10 vpmovsxdq %xmm0, %xmm2 vmovdqa 96(%rsp), %xmm6 ## 16-byte Reload vpaddq %xmm2, %xmm6, %xmm3 vmovq %xmm3, %rax vpextrq $1, %xmm3, %rcx vpshufd $78, %xmm0, %xmm3 ## xmm3 = xmm0[2,3,0,1] vpmovsxdq %xmm3, %xmm3 vpaddq %xmm3, %xmm6, %xmm6 vmovq %xmm6, %rdx vpextrq $1, %xmm6, %rsi vpinsrb $0, (%rax), %xmm0, %xmm6 vpinsrb $1, (%rcx), %xmm6, %xmm6 vpinsrb $2, (%rdx), %xmm6, %xmm6 vpinsrb $3, (%rsi), %xmm6, %xmm6 vpmovzxbd %xmm6, %xmm6 ## xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero vmovups 224(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm0, %ymm6 vmovdqa 192(%rsp), %xmm0 ## 16-byte Reload vpaddq %xmm2, %xmm0, %xmm7 vmovq %xmm7, %rax vpextrq $1, %xmm7, %rcx vpaddq %xmm3, %xmm0, %xmm7 vmovq %xmm7, %rdx vpextrq $1, %xmm7, %rsi vpinsrb $0, (%rax), %xmm0, %xmm7 vpinsrb $1, (%rcx), %xmm7, %xmm7 vpinsrb $2, (%rdx), %xmm7, %xmm7 vpinsrb $3, (%rsi), %xmm7, %xmm7 vpmovzxbd %xmm7, %xmm7 ## xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero vmovups 64(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm0, %ymm7 vmulps %ymm5, %ymm1, %ymm1 vmovdqa 256(%rsp), %xmm0 ## 16-byte Reload vpaddq %xmm3, %xmm0, %xmm3 vpaddq %xmm2, %xmm0, %xmm2 vmulps %ymm1, %ymm8, %ymm5 vmovups %ymm5, 192(%rsp) ## 32-byte Spill vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vmulps %ymm1, %ymm11, %ymm2 vmovups %ymm2, 256(%rsp) ## 32-byte Spill vmulps %ymm1, %ymm13, %ymm1 vmovups %ymm1, 64(%rsp) ## 32-byte Spill vmulps LCPI2_18(%rip), %ymm4, %ymm1 vsubps %ymm1, %ymm9, %ymm9 vcvtdq2ps %ymm6, %ymm1 vmovaps LCPI2_19(%rip), %ymm2 ## ymm2 = [3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03] vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 224(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 288(%rsp) ## 32-byte Spill vmovq %xmm3, %rdx vpextrq $1, %xmm3, %rsi vpinsrb $0, (%rax), %xmm0, %xmm1 vpinsrb $1, (%rcx), %xmm1, %xmm1 vpinsrb $2, (%rdx), %xmm1, %xmm1 vpinsrb $3, (%rsi), %xmm1, %xmm1 vpmovzxbd %xmm1, %xmm1 ## xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero vmovups 384(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm0, %ymm1 vcvtdq2ps %ymm1, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 384(%rsp) ## 32-byte Spill vmovaps LCPI2_14(%rip), %ymm0 ## ymm0 = [32768,32768,32768,32768,32768,32768,32768,32768] vandps (%rsp), %ymm0, %ymm1 ## 32-byte Folded Reload vpslld $16, %xmm1, %xmm2 vextractf128 $1, %ymm1, %xmm1 vpslld $16, %xmm1, %xmm1 vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps %ymm14, %ymm1, %ymm0 movq 64(%r15), %r14 movq 72(%r15), %r12 movq 80(%r15), %r13 movq 120(%r15), %rdx vmovups %ymm10, 96(%rsp) ## 32-byte Spill vaddps LCPI2_20(%rip), %ymm10, %ymm1 vmulps LCPI2_21(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vmovups %ymm0, 576(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovaps %ymm11, %ymm0 movl %edi, %ecx movq 2760(%rsp), %rbx vxorps %ymm1, %ymm1, %ymm1 vmovups %ymm1, -64(%rsp) ## 32-byte Spill vxorps %ymm10, %ymm10, %ymm10 vxorps %ymm15, %ymm15, %ymm15 vmovups %ymm13, 640(%rsp) ## 32-byte Spill vmovups %ymm8, 320(%rsp) ## 32-byte Spill vmovups %ymm0, 512(%rsp) ## 32-byte Spill .p2align 4, 0x90 LBB2_71: ## %cif_mask_all ## Parent Loop BB2_61 Depth=1 ## Parent Loop BB2_67 Depth=2 ## => This Inner Loop Header: Depth=3 movslq (%rbx), %rax vmovss (%rdx,%rax,4), %xmm4 ## xmm4 = mem[0],zero,zero,zero vbroadcastss (%r14,%rax,4), %ymm1 vsubps %ymm8, %ymm1, %ymm6 vbroadcastss (%r12,%rax,4), %ymm1 vsubps %ymm0, %ymm1, %ymm1 vbroadcastss (%r13,%rax,4), %ymm2 vsubps %ymm13, %ymm2, %ymm3 vmulps %ymm6, %ymm6, %ymm2 vmulps %ymm1, %ymm1, %ymm7 vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm2, %ymm2 vmulss %xmm4, %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vcmpnleps %ymm2, %ymm7, %ymm7 vmovmskps %ymm7, %esi cmpl $255, %esi je LBB2_74 ## BB#72: ## %cif_mask_all ## in Loop: Header=BB2_71 Depth=3 testl %esi, %esi je LBB2_73 ## BB#79: ## %cif_test_mixed706 ## in Loop: Header=BB2_71 Depth=3 vsqrtps %ymm2, %ymm2 vrcpps %ymm2, %ymm11 vmulps %ymm11, %ymm2, %ymm12 vmovaps LCPI2_20(%rip), %ymm5 ## ymm5 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm12, %ymm5, %ymm12 vmulps %ymm12, %ymm11, %ymm11 vmulps %ymm11, %ymm6, %ymm12 vblendvps %ymm7, %ymm12, %ymm6, %ymm12 vmulps %ymm11, %ymm1, %ymm6 vblendvps %ymm7, %ymm6, %ymm1, %ymm1 vmulps %ymm11, %ymm3, %ymm6 vblendvps %ymm7, %ymm6, %ymm3, %ymm3 vmovups -96(%rsp), %ymm5 ## 32-byte Reload vmulps %ymm12, %ymm5, %ymm6 vmovaps %ymm9, %ymm14 vmovups 128(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm1, %ymm9, %ymm11 vaddps %ymm11, %ymm6, %ymm6 vmulps %ymm3, %ymm14, %ymm11 vaddps %ymm11, %ymm6, %ymm11 vcmpnleps %ymm15, %ymm11, %ymm6 vblendvps %ymm7, %ymm6, %ymm15, %ymm6 vmovmskps %ymm6, %esi testl %esi, %esi je LBB2_80 ## BB#81: ## %safe_if_run_true1007 ## in Loop: Header=BB2_71 Depth=3 movq 88(%r15), %rsi vsubss (%rsi,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm4 vsubps 192(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 64(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vrsqrtps %ymm7, %ymm12 vmulps %ymm12, %ymm7, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps %ymm5, %ymm0 vmovaps LCPI2_7(%rip), %ymm5 ## ymm5 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm5, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI2_5(%rip), %ymm5 ## ymm5 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vxorps %ymm13, %ymm13, %ymm13 vmovaps %ymm5, %ymm15 vmulps %ymm15, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm12 vblendvps %ymm6, %ymm12, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm12 vblendvps %ymm6, %ymm12, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm7 vblendvps %ymm6, %ymm7, %ymm3, %ymm3 vmulps %ymm2, %ymm0, %ymm2 vmulps %ymm1, %ymm9, %ymm1 vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm14, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vmaxps %ymm13, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm13, %ymm1 vcmpnltps %ymm3, %ymm13, %ymm2 vmovups 640(%rsp), %ymm13 ## 32-byte Reload vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vmovdqa LCPI2_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm5, %xmm5 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vmovups 1984(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm6, %ymm5, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm3, %ymm3 vorps %ymm15, %ymm3, %ymm3 vxorps %ymm15, %ymm15, %ymm15 vmovups 2016(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm6, %ymm3, %ymm0, %ymm0 vmovups %ymm0, 2016(%rsp) ## 32-byte Spill vsubps %ymm0, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm5 vaddps LCPI2_26(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_27(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_28(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_29(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_30(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_31(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_32(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_33(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps %ymm8, %ymm5, %ymm5 vsubps %ymm3, %ymm15, %ymm3 vmulps %ymm5, %ymm3, %ymm3 vmovups %ymm7, 1984(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm5 vmulps LCPI2_34(%rip), %ymm5, %ymm5 vaddps %ymm3, %ymm5, %ymm3 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 96(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm5 vsubps %ymm5, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps LCPI2_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI2_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm5 vpaddd %xmm0, %xmm5, %xmm7 vpcmpgtd %xmm0, %xmm5, %xmm5 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm5, %ymm3, %ymm3 vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm5 vpcmpgtd %xmm2, %xmm0, %xmm12 vinsertf128 $1, %xmm5, %ymm12, %ymm5 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm5, %ymm15, %ymm1, %ymm1 vminps %ymm8, %ymm4, %ymm2 vmulps %ymm2, %ymm11, %ymm2 movq 96(%r15), %rsi vmulps 576(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmovups 320(%rsp), %ymm8 ## 32-byte Reload vmulps %ymm1, %ymm2, %ymm1 movq 104(%r15), %rdi movq 112(%r15), %rbp vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 224(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm2 vblendvps %ymm6, %ymm2, %ymm10, %ymm10 vbroadcastss (%rdi,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm2 vblendvps %ymm6, %ymm2, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 384(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups (%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm1 vblendvps %ymm6, %ymm1, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovups 512(%rsp), %ymm0 ## 32-byte Reload vmovaps %ymm14, %ymm9 jmp LBB2_73 .p2align 4, 0x90 LBB2_74: ## %cif_mask_all370 ## in Loop: Header=BB2_71 Depth=3 vsqrtps %ymm2, %ymm2 vrcpps %ymm2, %ymm7 vmulps %ymm7, %ymm2, %ymm11 vmovaps LCPI2_20(%rip), %ymm5 ## ymm5 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm11, %ymm5, %ymm11 vmulps %ymm11, %ymm7, %ymm7 vmulps %ymm7, %ymm6, %ymm12 vmulps %ymm7, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm3 vmulps -96(%rsp), %ymm12, %ymm6 ## 32-byte Folded Reload vmulps 128(%rsp), %ymm1, %ymm7 ## 32-byte Folded Reload vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm9, %ymm7 vaddps %ymm6, %ymm7, %ymm6 vcmpnleps %ymm15, %ymm6, %ymm11 vmovmskps %ymm11, %esi testl %esi, %esi je LBB2_73 ## BB#75: ## %cif_mask_all370 ## in Loop: Header=BB2_71 Depth=3 cmpl $255, %esi jne LBB2_78 ## BB#76: ## %cif_test_all379 ## in Loop: Header=BB2_71 Depth=3 movq 88(%r15), %rsi vsubss (%rsi,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm15 vsubps 192(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 64(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm11 vaddps %ymm11, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm11 vaddps %ymm7, %ymm11, %ymm7 vrsqrtps %ymm7, %ymm11 vmulps %ymm11, %ymm7, %ymm7 vmulps %ymm7, %ymm11, %ymm7 vmovaps LCPI2_7(%rip), %ymm4 ## ymm4 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm4, %ymm7 vmulps %ymm7, %ymm11, %ymm7 vmovaps LCPI2_5(%rip), %ymm4 ## ymm4 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm4, %ymm5 vmulps %ymm5, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm3 vmulps -96(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps 128(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm9, %ymm2 vaddps %ymm1, %ymm2, %ymm1 vxorps %ymm12, %ymm12, %ymm12 vmaxps %ymm12, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm12, %ymm1 vcmpnltps %ymm3, %ymm12, %ymm2 vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm4, %xmm4 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vandps LCPI2_24(%rip), %ymm3, %ymm3 vorps %ymm5, %ymm3, %ymm3 vsubps %ymm3, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm7 vaddps LCPI2_26(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_27(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_28(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_29(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_30(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_31(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_32(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_33(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps %ymm8, %ymm7, %ymm7 vsubps %ymm3, %ymm12, %ymm3 vmulps %ymm7, %ymm3, %ymm3 vcvtdq2ps %ymm4, %ymm4 vmulps LCPI2_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 96(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps LCPI2_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI2_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm0, %xmm4, %xmm7 vpcmpgtd %xmm0, %xmm4, %xmm4 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm4 vpcmpgtd %xmm2, %xmm0, %xmm11 vinsertf128 $1, %xmm4, %ymm11, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm12, %ymm1, %ymm1 vminps %ymm8, %ymm15, %ymm2 vxorps %ymm15, %ymm15, %ymm15 vmulps %ymm2, %ymm6, %ymm2 movq 96(%r15), %rsi vmulps 576(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 movq 104(%r15), %rdi movq 112(%r15), %rbp vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 224(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm10 vbroadcastss (%rdi,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 384(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups (%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm0 jmp LBB2_77 LBB2_80: ## in Loop: Header=BB2_71 Depth=3 vmovaps %ymm14, %ymm9 jmp LBB2_73 LBB2_78: ## %cif_test_mixed ## in Loop: Header=BB2_71 Depth=3 movq 88(%r15), %rsi vsubss (%rsi,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm15 vsubps 192(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 64(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm12 vaddps %ymm7, %ymm12, %ymm7 vrsqrtps %ymm7, %ymm12 vmulps %ymm12, %ymm7, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI2_7(%rip), %ymm4 ## ymm4 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm4, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI2_5(%rip), %ymm4 ## ymm4 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm4, %ymm5 vmulps %ymm5, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm12 vblendvps %ymm11, %ymm12, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm12 vblendvps %ymm11, %ymm12, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm7 vblendvps %ymm11, %ymm7, %ymm3, %ymm3 vmulps -96(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps 128(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm9, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vxorps %ymm13, %ymm13, %ymm13 vmaxps %ymm13, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm13, %ymm1 vcmpnltps %ymm3, %ymm13, %ymm2 vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm4, %xmm4 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vmovups 1920(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm11, %ymm4, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm3, %ymm3 vorps %ymm5, %ymm3, %ymm3 vmovups 1952(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm11, %ymm3, %ymm0, %ymm0 vmovups %ymm0, 1952(%rsp) ## 32-byte Spill vsubps %ymm0, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm4 vaddps LCPI2_26(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_27(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_28(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_29(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_30(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_31(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_32(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_33(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm8, %ymm4, %ymm4 vsubps %ymm3, %ymm13, %ymm3 vmulps %ymm4, %ymm3, %ymm3 vmovups %ymm7, 1920(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm4 vmulps LCPI2_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 96(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps LCPI2_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI2_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm0, %xmm4, %xmm7 vpcmpgtd %xmm0, %xmm4, %xmm4 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm4 vpcmpgtd %xmm2, %xmm0, %xmm12 vinsertf128 $1, %xmm4, %ymm12, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm13, %ymm1, %ymm1 vmovups 640(%rsp), %ymm13 ## 32-byte Reload vminps %ymm8, %ymm15, %ymm2 vxorps %ymm15, %ymm15, %ymm15 vmulps %ymm2, %ymm6, %ymm2 movq 96(%r15), %rsi vmulps 576(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 movq 104(%r15), %rdi movq 112(%r15), %rbp vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 224(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm2 vblendvps %ymm11, %ymm2, %ymm10, %ymm10 vbroadcastss (%rdi,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm2 vblendvps %ymm11, %ymm2, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 384(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups (%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm1 vblendvps %ymm11, %ymm1, %ymm0, %ymm0 LBB2_77: ## %cif_done ## in Loop: Header=BB2_71 Depth=3 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovups 320(%rsp), %ymm8 ## 32-byte Reload vmovups 512(%rsp), %ymm0 ## 32-byte Reload LBB2_73: ## %cif_done ## in Loop: Header=BB2_71 Depth=3 addq $4, %rbx decl %ecx jne LBB2_71 jmp LBB2_69 .p2align 4, 0x90 LBB2_68: ## in Loop: Header=BB2_67 Depth=2 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vxorps %ymm10, %ymm10, %ymm10 vxorps %ymm15, %ymm15, %ymm15 LBB2_69: ## %for_exit293 ## in Loop: Header=BB2_67 Depth=2 vmaxps %ymm15, %ymm10, %ymm0 vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vminps %ymm8, %ymm0, %ymm2 vcmpnleps %ymm2, %ymm15, %ymm0 vcmpnltps %ymm2, %ymm15, %ymm1 vblendvps %ymm1, %ymm8, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm9 ## xmm9 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm9, %xmm4, %xmm4 vpaddd %xmm9, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovaps LCPI2_24(%rip), %ymm10 ## ymm10 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm10, %ymm2, %ymm2 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm7, %ymm2, %ymm2 vsubps %ymm2, %ymm8, %ymm2 vmovaps LCPI2_25(%rip), %ymm11 ## ymm11 = [1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00] vmulps %ymm11, %ymm2, %ymm4 vmovaps LCPI2_26(%rip), %ymm12 ## ymm12 = [-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00] vaddps %ymm12, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_27(%rip), %ymm13 ## ymm13 = [1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00] vaddps %ymm13, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_28(%rip), %ymm7 ## ymm7 = [-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01] vaddps %ymm7, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_29(%rip), %ymm10 ## ymm10 = [3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01] vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_30(%rip), %ymm11 ## ymm11 = [1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01] vaddps %ymm11, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_31(%rip), %ymm5 ## ymm5 = [2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_32(%rip), %ymm5 ## ymm5 = [3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_33(%rip), %ymm5 ## ymm5 = [5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vaddps %ymm8, %ymm4, %ymm4 vsubps %ymm2, %ymm15, %ymm2 vmulps %ymm4, %ymm2, %ymm2 vcvtdq2ps %ymm3, %ymm3 vmovaps LCPI2_34(%rip), %ymm4 ## ymm4 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm4, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI2_35(%rip), %ymm3 ## ymm3 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vmovaps %ymm3, %ymm4 vmovaps LCPI2_36(%rip), %ymm3 ## ymm3 = [nan,nan,nan,nan,nan,nan,nan,nan] vblendvps %ymm0, %ymm3, %ymm4, %ymm0 vblendvps %ymm1, %ymm0, %ymm2, %ymm0 vmovaps LCPI2_49(%rip), %ymm1 ## ymm1 = [4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01] vmulps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_37(%rip), %ymm1 ## ymm1 = [1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00] vmulps %ymm1, %ymm0, %ymm1 vroundps $9, %ymm1, %ymm1 vcvttps2dq %ymm1, %ymm2 vmovaps LCPI2_38(%rip), %ymm3 ## ymm3 = [6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01] vmulps %ymm3, %ymm1, %ymm3 vsubps %ymm3, %ymm0, %ymm0 vmovaps LCPI2_39(%rip), %ymm3 ## ymm3 = [1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06] vmulps %ymm3, %ymm1, %ymm1 vsubps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_40(%rip), %ymm1 ## ymm1 = [2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04] vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_41(%rip), %ymm14 ## ymm14 = [1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03] vaddps %ymm14, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_42(%rip), %ymm13 ## ymm13 = [8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03] vaddps %ymm13, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_43(%rip), %ymm12 ## ymm12 = [4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02] vaddps %ymm12, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_44(%rip), %ymm11 ## ymm11 = [1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01] vaddps %ymm11, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_45(%rip), %ymm6 ## ymm6 = [4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01] vaddps %ymm6, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vaddps %ymm8, %ymm0, %ymm0 vmovdqa LCPI2_46(%rip), %xmm7 ## xmm7 = [127,127,127,127] vpaddd %xmm7, %xmm2, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpaddd %xmm7, %xmm3, %xmm4 vpcmpgtd %xmm7, %xmm3, %xmm3 vpcmpgtd %xmm7, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vmovdqa LCPI2_47(%rip), %xmm10 ## xmm10 = [1,1,1,1] vpcmpgtd %xmm4, %xmm10, %xmm3 vpcmpgtd %xmm1, %xmm10, %xmm5 vinsertf128 $1, %xmm3, %ymm5, %ymm3 vpslld $23, %xmm1, %xmm1 vpslld $23, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm1, %ymm1 vmulps %ymm0, %ymm1, %ymm0 vmovaps LCPI2_48(%rip), %ymm1 ## ymm1 = [inf,inf,inf,inf,inf,inf,inf,inf] vblendvps %ymm2, %ymm1, %ymm0, %ymm0 vblendvps %ymm3, %ymm15, %ymm0, %ymm0 vmovups %ymm0, -96(%rsp) ## 32-byte Spill vmovups -64(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm15, %ymm1, %ymm1 vminps %ymm8, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm15, %ymm1 vcmpnltps %ymm3, %ymm15, %ymm2 vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm4 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vpaddd %xmm9, %xmm5, %xmm5 vpaddd %xmm9, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovaps LCPI2_24(%rip), %ymm0 ## ymm0 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm0, %ymm3, %ymm3 vorps LCPI2_5(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm5 vaddps LCPI2_26(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_27(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_28(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_29(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_30(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_31(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_32(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_33(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps %ymm8, %ymm5, %ymm5 vsubps %ymm3, %ymm15, %ymm3 vmulps %ymm5, %ymm3, %ymm3 vcvtdq2ps %ymm4, %ymm4 vmulps LCPI2_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm4 ## ymm4 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm4, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps LCPI2_49(%rip), %ymm1, %ymm1 vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps %ymm14, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm13, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm12, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm11, %ymm2, %ymm2 vmovaps %ymm11, %ymm12 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm6, %ymm2, %ymm2 vmovaps %ymm6, %ymm11 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vaddps %ymm8, %ymm1, %ymm1 vpaddd %xmm7, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm7, %xmm4, %xmm5 vpcmpgtd %xmm7, %xmm4, %xmm4 vpcmpgtd %xmm7, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vpcmpgtd %xmm5, %xmm10, %xmm4 vpcmpgtd %xmm2, %xmm10, %xmm6 vinsertf128 $1, %xmm4, %ymm6, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm5, %xmm5 vinsertf128 $1, %xmm5, %ymm2, %ymm2 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm15, %ymm1, %ymm1 vmovups (%rsp), %ymm2 ## 32-byte Reload vmaxps %ymm15, %ymm2, %ymm2 vminps %ymm8, %ymm2, %ymm4 vcmpnleps %ymm4, %ymm15, %ymm2 vcmpnltps %ymm4, %ymm15, %ymm3 vblendvps %ymm3, %ymm8, %ymm4, %ymm4 vpsrad $23, %xmm4, %xmm5 vextractf128 $1, %ymm4, %xmm6 vpsrad $23, %xmm6, %xmm6 vpaddd %xmm9, %xmm6, %xmm6 vpaddd %xmm9, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vandps %ymm0, %ymm4, %ymm4 vorps LCPI2_5(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm8, %ymm4 vmulps LCPI2_25(%rip), %ymm4, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps %ymm8, %ymm6, %ymm6 vsubps %ymm4, %ymm15, %ymm4 vmulps %ymm6, %ymm4, %ymm4 vcvtdq2ps %ymm5, %ymm5 vmulps LCPI2_34(%rip), %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm2, LCPI2_36(%rip), %ymm0, %ymm2 vblendvps %ymm3, %ymm2, %ymm4, %ymm2 vmulps LCPI2_49(%rip), %ymm2, %ymm2 vmulps LCPI2_37(%rip), %ymm2, %ymm3 vroundps $9, %ymm3, %ymm3 vcvttps2dq %ymm3, %ymm4 vmulps LCPI2_38(%rip), %ymm3, %ymm5 vsubps %ymm5, %ymm2, %ymm2 vmulps LCPI2_39(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm2, %ymm2 vmulps LCPI2_40(%rip), %ymm2, %ymm3 vaddps %ymm14, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm13, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_43(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm12, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm8, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm2 vpaddd %xmm7, %xmm4, %xmm3 vextractf128 $1, %ymm4, %xmm5 vpaddd %xmm7, %xmm5, %xmm6 vpcmpgtd %xmm7, %xmm5, %xmm5 vpcmpgtd %xmm7, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vpcmpgtd %xmm6, %xmm10, %xmm5 vpcmpgtd %xmm3, %xmm10, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vpslld $23, %xmm3, %xmm3 vpslld $23, %xmm6, %xmm6 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm3, %ymm2 vblendvps %ymm4, LCPI2_48(%rip), %ymm2, %ymm2 vblendvps %ymm5, %ymm15, %ymm2, %ymm2 vmovaps LCPI2_50(%rip), %ymm3 ## ymm3 = [2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02] vmovaps %ymm3, %ymm4 vmulps -96(%rsp), %ymm4, %ymm0 ## 32-byte Folded Reload vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm3 vmovdqa LCPI2_51(%rip), %xmm5 ## xmm5 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm5, %xmm3, %xmm3 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm3, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm3[0] leal (%r11,%r8), %eax cltq vmovdqa LCPI2_52(%rip), %xmm3 ## xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> vpshufb %xmm3, %xmm0, %xmm0 movq 2784(%rsp), %rcx vmovq %xmm0, (%rcx,%rax) vmulps %ymm4, %ymm1, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] vpshufb %xmm3, %xmm0, %xmm0 vmovq %xmm0, (%r10,%rax) vmulps %ymm4, %ymm2, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] vpshufb %xmm3, %xmm0, %xmm0 movq 2800(%rsp), %rcx vmovq %xmm0, (%rcx,%rax) addl $8, %r11d cmpl %r9d, %r11d vpxor %xmm9, %xmm9, %xmm9 jl LBB2_67 jmp LBB2_63 .p2align 4, 0x90 LBB2_62: ## in Loop: Header=BB2_61 Depth=1 movl %edi, %r11d LBB2_63: ## %partial_inner_all_outer170 ## in Loop: Header=BB2_61 Depth=1 cmpl -108(%rsp), %r11d ## 4-byte Folded Reload jge LBB2_64 ## BB#82: ## %partial_inner_only1646 ## in Loop: Header=BB2_61 Depth=1 vmovd %r11d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI2_3(%rip), %xmm0, %xmm8 vpaddd LCPI2_4(%rip), %xmm0, %xmm9 vmovups 2144(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm0 vpcmpgtd %xmm9, %xmm0, %xmm0 vpcmpgtd %xmm8, %xmm1, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm7 movl -100(%rsp), %eax ## 4-byte Reload movl %eax, %ecx imull -112(%rsp), %ecx ## 4-byte Folded Reload vmovd %ecx, %xmm0 vpshufd $0, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,0,0] vpaddd %xmm1, %xmm9, %xmm0 vpaddd %xmm1, %xmm8, %xmm1 vpslld $2, %xmm0, %xmm5 vpslld $2, %xmm1, %xmm2 vpmovsxdq %xmm2, %xmm4 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm6 vpmovsxdq %xmm5, %xmm2 vmovq (%r15), %xmm3 ## xmm3 = mem[0],zero vpshufd $68, %xmm3, %xmm3 ## xmm3 = xmm3[0,1,0,1] vpaddq %xmm2, %xmm3, %xmm2 vpaddq %xmm6, %xmm3, %xmm6 vpaddq %xmm4, %xmm3, %xmm4 vinsertf128 $1, %xmm6, %ymm4, %ymm4 vmovups %ymm7, 512(%rsp) ## 32-byte Spill vmovmskps %ymm7, %r9d movq %r9, %r8 andq $1, %r8 jne LBB2_84 ## BB#83: ## in Loop: Header=BB2_61 Depth=1 movl 2768(%rsp), %r12d vmovups 416(%rsp), %ymm7 ## 32-byte Reload jmp LBB2_85 .p2align 4, 0x90 LBB2_64: ## in Loop: Header=BB2_61 Depth=1 movl -100(%rsp), %edx ## 4-byte Reload movl -20(%rsp), %edi ## 4-byte Reload vmovdqu 416(%rsp), %ymm3 ## 32-byte Reload vmovdqa 352(%rsp), %xmm6 ## 16-byte Reload vmovaps 176(%rsp), %xmm10 ## 16-byte Reload vmovaps 800(%rsp), %xmm14 ## 16-byte Reload jmp LBB2_65 .p2align 4, 0x90 LBB2_84: ## %pl_dolane.i16484 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm4, %rax vmovss (%rax), %xmm6 ## xmm6 = mem[0],zero,zero,zero vmovups 416(%rsp), %ymm7 ## 32-byte Reload vblendps $1, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0],ymm7[1,2,3,4,5,6,7] movl 2768(%rsp), %r12d LBB2_85: ## %pl_loopend.i16487 ## in Loop: Header=BB2_61 Depth=1 vpshufd $78, %xmm5, %xmm5 ## xmm5 = xmm5[2,3,0,1] movq %r9, %rsi andq $2, %rsi je LBB2_87 ## BB#86: ## %pl_dolane.1.i16492 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm4, %rax vinsertps $16, (%rax), %xmm7, %xmm6 ## xmm6 = xmm7[0],mem[0],xmm7[2,3] vblendps $15, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0,1,2,3],ymm7[4,5,6,7] LBB2_87: ## %pl_loopend.1.i16495 ## in Loop: Header=BB2_61 Depth=1 vmovaps LCPI2_9(%rip), %ymm10 ## ymm10 = [260046848,260046848,260046848,260046848,260046848,260046848,260046848,260046848] vmovaps LCPI2_14(%rip), %ymm13 ## ymm13 = [32768,32768,32768,32768,32768,32768,32768,32768] vmovdqa LCPI2_15(%rip), %xmm14 ## xmm14 = [260046848,260046848,260046848,260046848] vpmovsxdq %xmm5, %xmm5 movq %r9, %rdi movl %ecx, %r13d andq $4, %rdi je LBB2_89 ## BB#88: ## %pl_dolane.2.i16499 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm4, %xmm6 vmovq %xmm6, %rax vinsertps $32, (%rax), %xmm7, %xmm6 ## xmm6 = xmm7[0,1],mem[0],xmm7[3] vblendps $15, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0,1,2,3],ymm7[4,5,6,7] LBB2_89: ## %pl_loopend.2.i16502 ## in Loop: Header=BB2_61 Depth=1 vpaddq %xmm5, %xmm3, %xmm3 movq %r9, %rbp andq $8, %rbp je LBB2_91 ## BB#90: ## %pl_dolane.3.i16506 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm4, %xmm4 vpextrq $1, %xmm4, %rax vinsertps $48, (%rax), %xmm7, %xmm4 ## xmm4 = xmm7[0,1,2],mem[0] vblendps $15, %ymm4, %ymm7, %ymm7 ## ymm7 = ymm4[0,1,2,3],ymm7[4,5,6,7] LBB2_91: ## %pl_loopend.3.i16509 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm3, %ymm2, %ymm2 movq %r9, %rbx andq $16, %rbx je LBB2_93 ## BB#92: ## %pl_dolane.4.i16513 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm2, %rax vextractf128 $1, %ymm7, %xmm3 vmovss (%rax), %xmm4 ## xmm4 = mem[0],zero,zero,zero vblendps $1, %xmm4, %xmm3, %xmm3 ## xmm3 = xmm4[0],xmm3[1,2,3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB2_93: ## %pl_loopend.4.i16516 ## in Loop: Header=BB2_61 Depth=1 movq %r9, %rdx andq $32, %rdx je LBB2_95 ## BB#94: ## %pl_dolane.5.i16520 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm2, %rax vextractf128 $1, %ymm7, %xmm3 vinsertps $16, (%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB2_95: ## %pl_loopend.5.i16523 ## in Loop: Header=BB2_61 Depth=1 movq %r9, %rcx andq $64, %rcx vmovdqu %ymm8, -64(%rsp) ## 32-byte Spill vmovdqa %xmm9, -96(%rsp) ## 16-byte Spill je LBB2_97 ## BB#96: ## %pl_dolane.6.i16527 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm2, %xmm3 vmovq %xmm3, %rax vextractf128 $1, %ymm7, %xmm3 vinsertps $32, (%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB2_97: ## %pl_loopend.6.i16529 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm0, %ymm1, %ymm9 testb %r9b, %r9b jns LBB2_99 ## BB#98: ## %pl_dolane.7.i16533 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm2, %xmm0 vpextrq $1, %xmm0, %rax vextractf128 $1, %ymm7, %xmm0 vinsertps $48, (%rax), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[0] vinsertf128 $1, %xmm0, %ymm7, %ymm7 LBB2_99: ## %__gather64_float.exit16534 ## in Loop: Header=BB2_61 Depth=1 vmovups %ymm7, 416(%rsp) ## 32-byte Spill vmovaps 864(%rsp), %xmm0 ## 16-byte Reload vpermilps $0, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,0,0] vpaddd %xmm9, %xmm9, %xmm3 vextractf128 $1, %ymm9, %xmm12 vpaddd %xmm12, %xmm12, %xmm4 vpmovsxdq %xmm4, %xmm0 vpmovsxdq %xmm3, %xmm2 vpshufd $78, %xmm3, %xmm3 ## xmm3 = xmm3[2,3,0,1] vpmovsxdq %xmm3, %xmm3 vmovq 8(%r15), %xmm5 ## xmm5 = mem[0],zero vpshufd $68, %xmm5, %xmm6 ## xmm6 = xmm5[0,1,0,1] vpaddq %xmm0, %xmm6, %xmm5 vpaddq %xmm3, %xmm6, %xmm7 vpaddq %xmm2, %xmm6, %xmm8 vinsertf128 $1, %xmm7, %ymm8, %ymm7 testq %r8, %r8 jne LBB2_101 ## BB#100: ## in Loop: Header=BB2_61 Depth=1 vmovdqa 352(%rsp), %xmm8 ## 16-byte Reload jmp LBB2_102 .p2align 4, 0x90 LBB2_101: ## %pl_dolane.i16409 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm7, %rax vmovdqa 352(%rsp), %xmm8 ## 16-byte Reload vpinsrw $0, (%rax), %xmm8, %xmm8 LBB2_102: ## %pl_loopend.i16412 ## in Loop: Header=BB2_61 Depth=1 vpshufd $78, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,0,1] testq %rsi, %rsi je LBB2_104 ## BB#103: ## %pl_dolane.1.i16418 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm7, %rax vpinsrw $1, (%rax), %xmm8, %xmm8 LBB2_104: ## %pl_loopend.1.i16421 ## in Loop: Header=BB2_61 Depth=1 vpmovsxdq %xmm4, %xmm4 testq %rdi, %rdi vmovups %ymm1, (%rsp) ## 32-byte Spill je LBB2_106 ## BB#105: ## %pl_dolane.2.i16426 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm7, %xmm1 vmovq %xmm1, %rax vpinsrw $2, (%rax), %xmm8, %xmm8 LBB2_106: ## %pl_loopend.2.i16429 ## in Loop: Header=BB2_61 Depth=1 vpaddq %xmm4, %xmm6, %xmm6 testq %rbp, %rbp je LBB2_108 ## BB#107: ## %pl_dolane.3.i16434 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm7, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $3, (%rax), %xmm8, %xmm8 LBB2_108: ## %pl_loopend.3.i16437 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm6, %ymm5, %ymm5 testq %rbx, %rbx je LBB2_110 ## BB#109: ## %pl_dolane.4.i16442 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm5, %rax vpinsrw $4, (%rax), %xmm8, %xmm8 LBB2_110: ## %pl_loopend.4.i16445 ## in Loop: Header=BB2_61 Depth=1 testq %rdx, %rdx je LBB2_112 ## BB#111: ## %pl_dolane.5.i16450 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm5, %rax vpinsrw $5, (%rax), %xmm8, %xmm8 LBB2_112: ## %pl_loopend.5.i16453 ## in Loop: Header=BB2_61 Depth=1 testq %rcx, %rcx je LBB2_114 ## BB#113: ## %pl_dolane.6.i16458 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm8, %xmm8 LBB2_114: ## %pl_loopend.6.i16460 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm4, %ymm0, %ymm0 vinsertf128 $1, %xmm3, %ymm2, %ymm4 testb %r9b, %r9b jns LBB2_116 ## BB#115: ## %pl_dolane.7.i16465 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm8, %xmm8 LBB2_116: ## %__gather64_i16.exit16466 ## in Loop: Header=BB2_61 Depth=1 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vpxor %xmm6, %xmm6, %xmm6 vpunpckhwd %xmm6, %xmm8, %xmm1 ## xmm1 = xmm8[4],xmm6[4],xmm8[5],xmm6[5],xmm8[6],xmm6[6],xmm8[7],xmm6[7] vpmovzxwd %xmm8, %xmm2 ## xmm2 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI2_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 288(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm15 vandps %ymm10, %ymm15, %ymm2 vpaddd %xmm5, %xmm3, %xmm5 vmovdqu %ymm5, 224(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm3, %xmm3 vmovdqu %ymm3, 832(%rsp) ## 32-byte Spill vpcmpeqd %xmm6, %xmm2, %xmm3 vmovdqu %ymm3, 384(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm6 vpslld $16, %xmm6, %xmm11 vmovups %ymm2, 64(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm2, %xmm1 vmovdqu %ymm1, 256(%rsp) ## 32-byte Spill vmovq 16(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vextractf128 $1, %ymm4, %xmm7 vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm5 vpaddq %xmm0, %xmm3, %xmm2 testq %r8, %r8 je LBB2_118 ## BB#117: ## %pl_dolane.i16333 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm5, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_118: ## %pl_loopend.i16336 ## in Loop: Header=BB2_61 Depth=1 testq %rsi, %rsi vmovdqu %ymm11, 320(%rsp) ## 32-byte Spill je LBB2_120 ## BB#119: ## %pl_dolane.1.i16342 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm5, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_120: ## %pl_loopend.1.i16345 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm0, %xmm11 testq %rdi, %rdi je LBB2_122 ## BB#121: ## %pl_dolane.2.i16350 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_122: ## %pl_loopend.2.i16353 ## in Loop: Header=BB2_61 Depth=1 vpaddq %xmm11, %xmm3, %xmm3 testq %rbp, %rbp je LBB2_124 ## BB#123: ## %pl_dolane.3.i16358 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $3, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_124: ## %pl_loopend.3.i16361 ## in Loop: Header=BB2_61 Depth=1 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vinsertf128 $1, %xmm3, %ymm2, %ymm2 testq %rbx, %rbx jne LBB2_126 ## BB#125: ## in Loop: Header=BB2_61 Depth=1 vmovdqa -16(%rsp), %xmm3 ## 16-byte Reload jmp LBB2_127 .p2align 4, 0x90 LBB2_126: ## %pl_dolane.4.i16366 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm2, %rax vmovdqa -16(%rsp), %xmm3 ## 16-byte Reload vpinsrw $4, (%rax), %xmm3, %xmm3 LBB2_127: ## %pl_loopend.4.i16369 ## in Loop: Header=BB2_61 Depth=1 testq %rdx, %rdx je LBB2_129 ## BB#128: ## %pl_dolane.5.i16374 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm2, %rax vpinsrw $5, (%rax), %xmm3, %xmm3 LBB2_129: ## %pl_loopend.5.i16377 ## in Loop: Header=BB2_61 Depth=1 testq %rcx, %rcx je LBB2_131 ## BB#130: ## %pl_dolane.6.i16382 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm3, %xmm3 LBB2_131: ## %pl_loopend.6.i16384 ## in Loop: Header=BB2_61 Depth=1 testb %r9b, %r9b vmovups %ymm15, 192(%rsp) ## 32-byte Spill jns LBB2_133 ## BB#132: ## %pl_dolane.7.i16389 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm3, %xmm3 LBB2_133: ## %__gather64_i16.exit16390 ## in Loop: Header=BB2_61 Depth=1 vxorps %xmm15, %xmm15, %xmm15 vpunpckhwd %xmm15, %xmm3, %xmm1 ## xmm1 = xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7] vmovdqa %xmm3, -16(%rsp) ## 16-byte Spill vpmovzxwd %xmm3, %xmm2 ## xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI2_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 768(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 1056(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm2 vpaddd %xmm5, %xmm3, %xmm5 vmovdqu %ymm5, 736(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm3, %xmm3 vmovdqu %ymm3, 992(%rsp) ## 32-byte Spill vpcmpeqd %xmm15, %xmm2, %xmm3 vmovdqu %ymm3, 960(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm15 vpslld $16, %xmm15, %xmm1 vmovdqu %ymm1, 1088(%rsp) ## 32-byte Spill vmovups %ymm2, 704(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm2, %xmm1 vmovdqu %ymm1, 1024(%rsp) ## 32-byte Spill vmovq 24(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm5 vpaddq %xmm0, %xmm3, %xmm2 testq %r8, %r8 je LBB2_135 ## BB#134: ## %pl_dolane.i16257 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm5, %rax vmovdqa 48(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 48(%rsp) ## 16-byte Spill LBB2_135: ## %pl_loopend.i16260 ## in Loop: Header=BB2_61 Depth=1 testq %rsi, %rsi je LBB2_137 ## BB#136: ## %pl_dolane.1.i16266 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm5, %rax vmovdqa 48(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 48(%rsp) ## 16-byte Spill LBB2_137: ## %pl_loopend.1.i16269 ## in Loop: Header=BB2_61 Depth=1 testq %rdi, %rdi je LBB2_139 ## BB#138: ## %pl_dolane.2.i16274 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vmovdqa 48(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 48(%rsp) ## 16-byte Spill LBB2_139: ## %pl_loopend.2.i16277 ## in Loop: Header=BB2_61 Depth=1 vpaddq %xmm11, %xmm3, %xmm3 testq %rbp, %rbp je LBB2_141 ## BB#140: ## %pl_dolane.3.i16282 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa 48(%rsp), %xmm1 ## 16-byte Reload vpinsrw $3, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 48(%rsp) ## 16-byte Spill LBB2_141: ## %pl_loopend.3.i16285 ## in Loop: Header=BB2_61 Depth=1 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vinsertf128 $1, %xmm3, %ymm2, %ymm2 testq %rbx, %rbx jne LBB2_143 ## BB#142: ## in Loop: Header=BB2_61 Depth=1 vmovdqa 48(%rsp), %xmm3 ## 16-byte Reload jmp LBB2_144 .p2align 4, 0x90 LBB2_143: ## %pl_dolane.4.i16290 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm2, %rax vmovdqa 48(%rsp), %xmm3 ## 16-byte Reload vpinsrw $4, (%rax), %xmm3, %xmm3 LBB2_144: ## %pl_loopend.4.i16293 ## in Loop: Header=BB2_61 Depth=1 testq %rdx, %rdx je LBB2_146 ## BB#145: ## %pl_dolane.5.i16298 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm2, %rax vpinsrw $5, (%rax), %xmm3, %xmm3 LBB2_146: ## %pl_loopend.5.i16301 ## in Loop: Header=BB2_61 Depth=1 testq %rcx, %rcx je LBB2_148 ## BB#147: ## %pl_dolane.6.i16306 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm3, %xmm3 LBB2_148: ## %pl_loopend.6.i16308 ## in Loop: Header=BB2_61 Depth=1 testb %r9b, %r9b vmovups %ymm6, 128(%rsp) ## 32-byte Spill vmovups %ymm15, 1120(%rsp) ## 32-byte Spill vmovdqa %xmm12, %xmm15 jns LBB2_150 ## BB#149: ## %pl_dolane.7.i16313 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm3, %xmm3 LBB2_150: ## %__gather64_i16.exit16314 ## in Loop: Header=BB2_61 Depth=1 vpxor %xmm6, %xmm6, %xmm6 vpunpckhwd %xmm6, %xmm3, %xmm1 ## xmm1 = xmm3[4],xmm6[4],xmm3[5],xmm6[5],xmm3[6],xmm6[6],xmm3[7],xmm6[7] vmovdqa %xmm3, 48(%rsp) ## 16-byte Spill vpmovzxwd %xmm3, %xmm2 ## xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI2_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 2528(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 608(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm12 vpaddd %xmm5, %xmm3, %xmm2 vmovdqu %ymm2, 2592(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm3, %xmm2 vmovdqu %ymm2, 2464(%rsp) ## 32-byte Spill vpcmpeqd %xmm6, %xmm12, %xmm2 vmovdqu %ymm2, 2496(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm6 vpslld $16, %xmm6, %xmm1 vmovdqu %ymm1, 1408(%rsp) ## 32-byte Spill vmovq 32(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm4 vmovups %ymm12, 864(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm12, %xmm1 vpaddq %xmm0, %xmm3, %xmm0 testq %r8, %r8 vmovdqu %ymm1, 2560(%rsp) ## 32-byte Spill je LBB2_152 ## BB#151: ## %pl_dolane.i16181 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm4, %rax vmovdqa 672(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 672(%rsp) ## 16-byte Spill LBB2_152: ## %pl_loopend.i16184 ## in Loop: Header=BB2_61 Depth=1 vmovdqa %xmm15, %xmm12 testq %rsi, %rsi je LBB2_154 ## BB#153: ## %pl_dolane.1.i16190 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm4, %rax vmovdqa 672(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 672(%rsp) ## 16-byte Spill LBB2_154: ## %pl_loopend.1.i16193 ## in Loop: Header=BB2_61 Depth=1 testq %rdi, %rdi je LBB2_156 ## BB#155: ## %pl_dolane.2.i16198 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm4, %xmm1 vmovq %xmm1, %rax vmovdqa 672(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 672(%rsp) ## 16-byte Spill LBB2_156: ## %pl_loopend.2.i16201 ## in Loop: Header=BB2_61 Depth=1 vpaddq %xmm11, %xmm3, %xmm2 testq %rbp, %rbp vmovups %ymm6, 928(%rsp) ## 32-byte Spill jne LBB2_158 ## BB#157: ## in Loop: Header=BB2_61 Depth=1 vmovdqa 672(%rsp), %xmm6 ## 16-byte Reload jmp LBB2_159 .p2align 4, 0x90 LBB2_158: ## %pl_dolane.3.i16206 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm4, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa 672(%rsp), %xmm6 ## 16-byte Reload vpinsrw $3, (%rax), %xmm6, %xmm6 LBB2_159: ## %pl_loopend.3.i16209 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm2, %ymm0, %ymm0 testq %rbx, %rbx je LBB2_161 ## BB#160: ## %pl_dolane.4.i16214 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm0, %rax vpinsrw $4, (%rax), %xmm6, %xmm6 LBB2_161: ## %pl_loopend.4.i16217 ## in Loop: Header=BB2_61 Depth=1 testq %rdx, %rdx je LBB2_163 ## BB#162: ## %pl_dolane.5.i16222 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm0, %rax vpinsrw $5, (%rax), %xmm6, %xmm6 LBB2_163: ## %pl_loopend.5.i16225 ## in Loop: Header=BB2_61 Depth=1 testq %rcx, %rcx je LBB2_165 ## BB#164: ## %pl_dolane.6.i16230 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm0, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm6, %xmm6 LBB2_165: ## %pl_loopend.6.i16232 ## in Loop: Header=BB2_61 Depth=1 testb %r9b, %r9b vmovdqa %xmm8, 352(%rsp) ## 16-byte Spill jns LBB2_167 ## BB#166: ## %pl_dolane.7.i16237 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm0, %xmm0 vpextrq $1, %xmm0, %rax vpinsrw $7, (%rax), %xmm6, %xmm6 LBB2_167: ## %__gather64_i16.exit16238 ## in Loop: Header=BB2_61 Depth=1 vpxor %xmm3, %xmm3, %xmm3 vpunpckhwd %xmm3, %xmm6, %xmm0 ## xmm0 = xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7] vpmovzxwd %xmm6, %xmm1 ## xmm1 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero vinsertf128 $1, %xmm0, %ymm1, %ymm0 vandps LCPI2_8(%rip), %ymm0, %ymm1 vpslld $13, %xmm1, %xmm2 vextractf128 $1, %ymm1, %xmm1 vpslld $13, %xmm1, %xmm1 vmovdqa %xmm1, 2304(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm1, %ymm2, %ymm15 vandps %ymm10, %ymm15, %ymm8 vpaddd %xmm5, %xmm2, %xmm1 vmovdqu %ymm1, 2272(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm2, %xmm1 vmovdqu %ymm1, 2240(%rsp) ## 32-byte Spill vpcmpeqd %xmm3, %xmm8, %xmm1 vmovdqu %ymm1, 2208(%rsp) ## 32-byte Spill vandps %ymm13, %ymm0, %ymm0 vpslld $16, %xmm0, %xmm10 vpcmpeqd %xmm14, %xmm8, %xmm13 vpmovsxdq %xmm12, %xmm11 vpmovsxdq %xmm9, %xmm4 vpshufd $78, %xmm9, %xmm1 ## xmm1 = xmm9[2,3,0,1] vpmovsxdq %xmm1, %xmm5 vmovq 40(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm7 ## xmm7 = xmm1[0,1,0,1] vpaddq %xmm5, %xmm7, %xmm1 vpaddq %xmm4, %xmm7, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm9 vpaddq %xmm11, %xmm7, %xmm3 testq %r8, %r8 vmovdqu %ymm10, 2368(%rsp) ## 32-byte Spill jne LBB2_169 ## BB#168: ## in Loop: Header=BB2_61 Depth=1 vmovdqa 176(%rsp), %xmm10 ## 16-byte Reload jmp LBB2_170 .p2align 4, 0x90 LBB2_169: ## %pl_dolane.i16105 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm9, %rax movzbl (%rax), %eax vmovdqa 176(%rsp), %xmm10 ## 16-byte Reload vpinsrw $0, %eax, %xmm10, %xmm10 LBB2_170: ## %pl_loopend.i16108 ## in Loop: Header=BB2_61 Depth=1 vmovdqa 800(%rsp), %xmm14 ## 16-byte Reload vpshufd $78, %xmm12, %xmm1 ## xmm1 = xmm12[2,3,0,1] testq %rsi, %rsi je LBB2_172 ## BB#171: ## %pl_dolane.1.i16114 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm9, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm10, %xmm10 LBB2_172: ## %pl_loopend.1.i16117 ## in Loop: Header=BB2_61 Depth=1 vpmovsxdq %xmm1, %xmm1 testq %rdi, %rdi je LBB2_174 ## BB#173: ## %pl_dolane.2.i16122 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm9, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $2, %eax, %xmm10, %xmm10 LBB2_174: ## %pl_loopend.2.i16125 ## in Loop: Header=BB2_61 Depth=1 vpaddq %xmm1, %xmm7, %xmm2 testq %rbp, %rbp vmovups %ymm0, 2400(%rsp) ## 32-byte Spill je LBB2_176 ## BB#175: ## %pl_dolane.3.i16130 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm9, %xmm7 vpextrq $1, %xmm7, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm10, %xmm10 LBB2_176: ## %pl_loopend.3.i16133 ## in Loop: Header=BB2_61 Depth=1 vmovups -64(%rsp), %ymm7 ## 32-byte Reload vmovaps -96(%rsp), %xmm0 ## 16-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm3 testq %rbx, %rbx je LBB2_178 ## BB#177: ## %pl_dolane.4.i16138 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm3, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm10, %xmm10 LBB2_178: ## %pl_loopend.4.i16141 ## in Loop: Header=BB2_61 Depth=1 testq %rdx, %rdx je LBB2_180 ## BB#179: ## %pl_dolane.5.i16146 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm10, %xmm10 LBB2_180: ## %pl_loopend.5.i16149 ## in Loop: Header=BB2_61 Depth=1 testq %rcx, %rcx je LBB2_182 ## BB#181: ## %pl_dolane.6.i16154 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm3, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm10, %xmm10 LBB2_182: ## %pl_loopend.6.i16156 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm0, %ymm7, %ymm7 vinsertf128 $1, %xmm1, %ymm11, %ymm0 vinsertf128 $1, %xmm5, %ymm4, %ymm4 testb %r9b, %r9b vmovups %ymm15, 2432(%rsp) ## 32-byte Spill jns LBB2_184 ## BB#183: ## %pl_dolane.7.i16161 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm3, %xmm1 vpextrq $1, %xmm1, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm10, %xmm10 LBB2_184: ## %__gather64_i8.exit16162 ## in Loop: Header=BB2_61 Depth=1 vcvtdq2ps %ymm7, %ymm9 vpand LCPI2_53(%rip), %xmm10, %xmm1 vpmovzxwd %xmm1, %xmm11 ## xmm11 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero vmovq 48(%r15), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm3 ## xmm3 = xmm2[0,1,0,1] vpaddq %xmm0, %xmm3, %xmm7 vextractf128 $1, %ymm4, %xmm5 vpaddq %xmm5, %xmm3, %xmm2 vpaddq %xmm4, %xmm3, %xmm12 vinsertf128 $1, %xmm2, %ymm12, %ymm15 testq %r8, %r8 vmovdqa %xmm1, -64(%rsp) ## 16-byte Spill je LBB2_186 ## BB#185: ## %pl_dolane.i16037 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm15, %rax movzbl (%rax), %eax vpinsrw $0, %eax, %xmm14, %xmm14 LBB2_186: ## %pl_loopend.i16040 ## in Loop: Header=BB2_61 Depth=1 vmovups 416(%rsp), %ymm1 ## 32-byte Reload vaddps LCPI2_5(%rip), %ymm9, %ymm2 testq %rsi, %rsi vmovdqu %ymm13, 2336(%rsp) ## 32-byte Spill je LBB2_188 ## BB#187: ## %pl_dolane.1.i16046 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm15, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm14, %xmm14 LBB2_188: ## %pl_loopend.1.i16049 ## in Loop: Header=BB2_61 Depth=1 vmulps 1888(%rsp), %ymm2, %ymm9 ## 32-byte Folded Reload vsubps 1824(%rsp), %ymm1, %ymm13 ## 32-byte Folded Reload vextractf128 $1, %ymm0, %xmm12 testq %rdi, %rdi vmovdqu %ymm11, -96(%rsp) ## 32-byte Spill je LBB2_190 ## BB#189: ## %pl_dolane.2.i16054 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm15, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $2, %eax, %xmm14, %xmm14 LBB2_190: ## %pl_loopend.2.i16057 ## in Loop: Header=BB2_61 Depth=1 vaddps LCPI2_6(%rip), %ymm9, %ymm9 vmovups 1856(%rsp), %ymm1 ## 32-byte Reload vdivps %ymm13, %ymm1, %ymm11 vmovups (%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm1, %ymm13 vpaddq %xmm12, %xmm3, %xmm3 testq %rbp, %rbp je LBB2_192 ## BB#191: ## %pl_dolane.3.i16062 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm15, %xmm2 vpextrq $1, %xmm2, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm14, %xmm14 LBB2_192: ## %pl_loopend.3.i16065 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm11, %ymm9, %ymm9 vmulps %ymm11, %ymm13, %ymm13 vinsertf128 $1, %xmm3, %ymm7, %ymm3 testq %rbx, %rbx je LBB2_194 ## BB#193: ## %pl_dolane.4.i16070 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm3, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm14, %xmm14 LBB2_194: ## %pl_loopend.4.i16073 ## in Loop: Header=BB2_61 Depth=1 vdivps 1792(%rsp), %ymm9, %ymm1 ## 32-byte Folded Reload vmovups %ymm1, 96(%rsp) ## 32-byte Spill vdivps 1376(%rsp), %ymm13, %ymm1 ## 32-byte Folded Reload vmovups %ymm1, (%rsp) ## 32-byte Spill testq %rdx, %rdx je LBB2_196 ## BB#195: ## %pl_dolane.5.i16078 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm14, %xmm14 LBB2_196: ## %pl_loopend.5.i16081 ## in Loop: Header=BB2_61 Depth=1 vmovups 96(%rsp), %ymm1 ## 32-byte Reload vmulps %ymm1, %ymm1, %ymm7 vmovups (%rsp), %ymm1 ## 32-byte Reload vmulps %ymm1, %ymm1, %ymm2 testq %rcx, %rcx vmovdqa %xmm6, 672(%rsp) ## 16-byte Spill je LBB2_198 ## BB#197: ## %pl_dolane.6.i16086 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm3, %xmm6 vmovq %xmm6, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm14, %xmm14 LBB2_198: ## %pl_loopend.6.i16088 ## in Loop: Header=BB2_61 Depth=1 vmovdqa 1440(%rsp), %xmm6 ## 16-byte Reload vaddps %ymm2, %ymm7, %ymm7 vmulps %ymm11, %ymm11, %ymm2 testb %r9b, %r9b jns LBB2_200 ## BB#199: ## %pl_dolane.7.i16093 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm3, %xmm3 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm14, %xmm14 LBB2_200: ## %__gather64_i8.exit16094 ## in Loop: Header=BB2_61 Depth=1 vaddps %ymm7, %ymm2, %ymm9 vpand LCPI2_53(%rip), %xmm14, %xmm7 vpmovzxwd %xmm7, %xmm13 ## xmm13 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero vmovq 56(%r15), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm3 ## xmm3 = xmm2[0,1,0,1] vpaddq %xmm0, %xmm3, %xmm0 vpaddq %xmm5, %xmm3, %xmm2 vpaddq %xmm4, %xmm3, %xmm4 vinsertf128 $1, %xmm2, %ymm4, %ymm5 testq %r8, %r8 je LBB2_202 ## BB#201: ## %pl_dolane.i15969 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm5, %rax movzbl (%rax), %eax vpinsrw $0, %eax, %xmm6, %xmm6 LBB2_202: ## %pl_loopend.i15972 ## in Loop: Header=BB2_61 Depth=1 vrsqrtps %ymm9, %ymm4 testq %rsi, %rsi je LBB2_204 ## BB#203: ## %pl_dolane.1.i15978 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm5, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm6, %xmm6 LBB2_204: ## %pl_loopend.1.i15981 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm9, %ymm4, %ymm2 testq %rdi, %rdi je LBB2_206 ## BB#205: ## %pl_dolane.2.i15986 ## in Loop: Header=BB2_61 Depth=1 vmovdqa %xmm6, %xmm1 vextractf128 $1, %ymm5, %xmm6 vmovq %xmm6, %rax vmovdqa %xmm1, %xmm6 movzbl (%rax), %eax vpinsrw $2, %eax, %xmm6, %xmm6 LBB2_206: ## %pl_loopend.2.i15989 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm2, %ymm4, %ymm9 vpaddq %xmm12, %xmm3, %xmm2 testq %rbp, %rbp je LBB2_208 ## BB#207: ## %pl_dolane.3.i15994 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm5, %xmm3 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm6, %xmm6 LBB2_208: ## %pl_loopend.3.i15997 ## in Loop: Header=BB2_61 Depth=1 vmovaps LCPI2_7(%rip), %ymm1 ## ymm1 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm9, %ymm1, %ymm3 vinsertf128 $1, %xmm2, %ymm0, %ymm0 testq %rbx, %rbx je LBB2_210 ## BB#209: ## %pl_dolane.4.i16002 ## in Loop: Header=BB2_61 Depth=1 vmovq %xmm0, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm6, %xmm6 LBB2_210: ## %pl_loopend.4.i16005 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm3, %ymm4, %ymm2 testq %rdx, %rdx je LBB2_212 ## BB#211: ## %pl_dolane.5.i16010 ## in Loop: Header=BB2_61 Depth=1 vpextrq $1, %xmm0, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm6, %xmm6 LBB2_212: ## %pl_loopend.5.i16013 ## in Loop: Header=BB2_61 Depth=1 vmulps LCPI2_5(%rip), %ymm2, %ymm3 testq %rcx, %rcx je LBB2_214 ## BB#213: ## %pl_dolane.6.i16018 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm0, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm6, %xmm6 LBB2_214: ## %pl_loopend.6.i16020 ## in Loop: Header=BB2_61 Depth=1 vmulps 96(%rsp), %ymm3, %ymm4 ## 32-byte Folded Reload vmulps (%rsp), %ymm3, %ymm5 ## 32-byte Folded Reload vmulps %ymm3, %ymm11, %ymm3 testb %r9b, %r9b vmovdqa %xmm10, 176(%rsp) ## 16-byte Spill jns LBB2_216 ## BB#215: ## %pl_dolane.7.i16025 ## in Loop: Header=BB2_61 Depth=1 vextractf128 $1, %ymm0, %xmm0 vpextrq $1, %xmm0, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm6, %xmm6 LBB2_216: ## %__gather64_i8.exit16026 ## in Loop: Header=BB2_61 Depth=1 vmovups 544(%rsp), %ymm0 ## 32-byte Reload vmovups 512(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm1, %ymm4, %ymm0, %ymm0 vmovups %ymm0, 544(%rsp) ## 32-byte Spill vmovups 448(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm1, %ymm5, %ymm0, %ymm0 vmovups %ymm0, 448(%rsp) ## 32-byte Spill vmovups 480(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm1, %ymm3, %ymm0, %ymm0 vmovups %ymm0, 480(%rsp) ## 32-byte Spill vpand LCPI2_53(%rip), %xmm6, %xmm10 vpmovzxwd %xmm10, %xmm0 ## xmm0 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero testl %r12d, %r12d vmovdqa %xmm14, 800(%rsp) ## 16-byte Spill vmovdqa %xmm6, 1440(%rsp) ## 16-byte Spill movq %r8, 640(%rsp) ## 8-byte Spill movq %rsi, 1280(%rsp) ## 8-byte Spill movq %rdi, 1248(%rsp) ## 8-byte Spill movq %rbp, 1216(%rsp) ## 8-byte Spill movq %rbx, 1184(%rsp) ## 8-byte Spill movq %rdx, 1152(%rsp) ## 8-byte Spill movq %rcx, 576(%rsp) ## 8-byte Spill jle LBB2_217 ## BB#267: ## %for_loop1815.lr.ph ## in Loop: Header=BB2_61 Depth=1 vmovdqu %ymm0, 2176(%rsp) ## 32-byte Spill vmovdqa LCPI2_12(%rip), %xmm15 ## xmm15 = [947912704,947912704,947912704,947912704] vmovdqa 288(%rsp), %xmm0 ## 16-byte Reload vpaddd %xmm15, %xmm0, %xmm2 vmovups 832(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm1, %ymm2 vmovdqa LCPI2_10(%rip), %xmm4 ## xmm4 = [939524096,939524096,939524096,939524096] vpaddd %xmm4, %xmm0, %xmm3 vmovups 224(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm0, %ymm3 vmovups 64(%rsp), %ymm0 ## 32-byte Reload vextractf128 $1, %ymm0, %xmm12 vxorps %xmm0, %xmm0, %xmm0 vpcmpeqd %xmm0, %xmm12, %xmm6 vmovups 384(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vmovaps LCPI2_13(%rip), %ymm14 ## ymm14 = [-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05] vaddps %ymm14, %ymm2, %ymm2 vblendvps %ymm6, %ymm2, %ymm3, %ymm2 vmovdqa 768(%rsp), %xmm1 ## 16-byte Reload vpaddd %xmm15, %xmm1, %xmm3 vmovups 992(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 vpaddd %xmm4, %xmm1, %xmm6 vmovups 736(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vmovups 704(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm1 vmovdqa %xmm7, 64(%rsp) ## 16-byte Spill vpcmpeqd %xmm0, %xmm1, %xmm7 vmovdqa %ymm13, %ymm9 vmovups 960(%rsp), %ymm13 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm13, %ymm7 vaddps %ymm14, %ymm3, %ymm3 vblendvps %ymm7, %ymm3, %ymm6, %ymm6 vpcmpeqd LCPI2_15(%rip), %xmm12, %xmm3 vmovups 256(%rsp), %ymm7 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm7, %ymm3 vmovups %ymm11, 288(%rsp) ## 32-byte Spill vmovaps LCPI2_11(%rip), %ymm11 ## ymm11 = [1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192] vorps 192(%rsp), %ymm11, %ymm5 ## 32-byte Folded Reload vblendvps %ymm3, %ymm5, %ymm2, %ymm12 vpcmpeqd LCPI2_15(%rip), %xmm1, %xmm1 vmovups 1024(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps 1056(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm6, %ymm13 vmovdqa 2528(%rsp), %xmm2 ## 16-byte Reload vpaddd %xmm15, %xmm2, %xmm1 vmovups 2464(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm3, %ymm1 vpaddd %xmm4, %xmm2, %xmm2 vmovups 2592(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups 864(%rsp), %ymm3 ## 32-byte Reload vextractf128 $1, %ymm3, %xmm6 vpcmpeqd %xmm0, %xmm6, %xmm7 vmovups 2496(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm3, %ymm7 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm7, %ymm1, %ymm2, %ymm1 vmovdqa 2304(%rsp), %xmm3 ## 16-byte Reload vpaddd %xmm15, %xmm3, %xmm2 vmovups 2240(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm5, %ymm2 vpaddd %xmm4, %xmm3, %xmm7 vmovups 2272(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm3, %ymm7 vextractf128 $1, %ymm8, %xmm4 vpcmpeqd %xmm0, %xmm4, %xmm3 vmovups 2208(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm5, %ymm3 vaddps %ymm14, %ymm2, %ymm2 vmovdqa LCPI2_15(%rip), %xmm5 ## xmm5 = [260046848,260046848,260046848,260046848] vmovdqa %xmm5, %xmm14 vblendvps %ymm3, %ymm2, %ymm7, %ymm2 vpcmpeqd %xmm14, %xmm6, %xmm3 vmovups 2560(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm5, %ymm3 vorps 608(%rsp), %ymm11, %ymm6 ## 32-byte Folded Reload vblendvps %ymm3, %ymm6, %ymm1, %ymm8 vmovups 128(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm3 vpslld $16, %xmm3, %xmm3 vmovups 320(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm1, %ymm3 vmovups 1120(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm6 vpslld $16, %xmm6, %xmm6 vmovups 1088(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vorps %ymm3, %ymm12, %ymm3 vorps %ymm6, %ymm13, %ymm5 vpcmpeqd %xmm14, %xmm4, %xmm4 vmovups 2336(%rsp), %ymm1 vinsertf128 $1, %xmm4, %ymm1, %ymm4 vorps 2432(%rsp), %ymm11, %ymm6 vblendvps %ymm4, %ymm6, %ymm2, %ymm2 vmulps %ymm3, %ymm3, %ymm4 vmulps %ymm5, %ymm5, %ymm6 vsubps %ymm4, %ymm3, %ymm4 vsubps %ymm6, %ymm5, %ymm6 vaddps %ymm6, %ymm4, %ymm4 vmovdqa -64(%rsp), %xmm1 ## 16-byte Reload vpunpckhwd %xmm0, %xmm1, %xmm6 ## xmm6 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] vmovups -96(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vmovdqa 64(%rsp), %xmm1 ## 16-byte Reload vpunpckhwd %xmm0, %xmm1, %xmm7 ## xmm7 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] vxorps %xmm12, %xmm12, %xmm12 vinsertf128 $1, %xmm7, %ymm9, %ymm7 vmovups 928(%rsp), %ymm0 ## 32-byte Reload vextractf128 $1, %ymm0, %xmm0 vpslld $16, %xmm0, %xmm0 vmovups 1408(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vmovups 2400(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm1 vpslld $16, %xmm1, %xmm1 vmovups 2368(%rsp), %ymm11 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm11, %ymm1 vmovaps LCPI2_16(%rip), %ymm11 ## ymm11 = [4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00] vmovaps %ymm11, %ymm13 vmulps %ymm13, %ymm4, %ymm11 vaddps LCPI2_6(%rip), %ymm11, %ymm11 vmulps %ymm13, %ymm3, %ymm3 vmulps %ymm13, %ymm5, %ymm5 vsqrtps %ymm11, %ymm11 vmovaps LCPI2_17(%rip), %ymm13 ## ymm13 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00] vaddps %ymm13, %ymm3, %ymm3 vaddps %ymm13, %ymm5, %ymm5 vmulps %ymm11, %ymm3, %ymm3 vmovups %ymm3, 128(%rsp) ## 32-byte Spill vmulps %ymm5, %ymm11, %ymm13 vmovups 288(%rsp), %ymm11 ## 32-byte Reload vmulps LCPI2_18(%rip), %ymm4, %ymm3 vcvtdq2ps %ymm6, %ymm4 vcvtdq2ps %ymm7, %ymm5 vmovaps LCPI2_7(%rip), %ymm6 ## ymm6 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm3, %ymm6, %ymm3 vmovups %ymm3, 320(%rsp) ## 32-byte Spill vorps %ymm0, %ymm8, %ymm0 vorps %ymm1, %ymm2, %ymm14 vmovaps LCPI2_19(%rip), %ymm1 ## ymm1 = [3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03] vmovaps %ymm1, %ymm2 vmulps %ymm2, %ymm4, %ymm1 vmovups %ymm1, 192(%rsp) ## 32-byte Spill vmulps %ymm2, %ymm5, %ymm1 vmovups %ymm1, 256(%rsp) ## 32-byte Spill vpunpckhwd %xmm12, %xmm10, %xmm1 ## xmm1 = xmm10[4],xmm12[4],xmm10[5],xmm12[5],xmm10[6],xmm12[6],xmm10[7],xmm12[7] vmovups 2176(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm3, %ymm1 vcvtdq2ps %ymm1, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 64(%rsp) ## 32-byte Spill movq 64(%r15), %r14 movq 72(%r15), %rcx movq 80(%r15), %rsi movq 120(%r15), %r8 vaddps LCPI2_20(%rip), %ymm14, %ymm1 vmulps LCPI2_21(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vmovups %ymm0, 224(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill movq 2760(%rsp), %rdi vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -96(%rsp) ## 32-byte Spill vxorps %ymm15, %ymm15, %ymm15 movl %r13d, %ebp vmovups 96(%rsp), %ymm10 ## 32-byte Reload .p2align 4, 0x90 LBB2_268: ## %for_loop1815 ## Parent Loop BB2_61 Depth=1 ## => This Inner Loop Header: Depth=2 movslq (%rdi), %r13 vmovss (%r8,%r13,4), %xmm1 ## xmm1 = mem[0],zero,zero,zero vbroadcastss (%r14,%r13,4), %ymm0 vsubps %ymm10, %ymm0, %ymm3 vbroadcastss (%rcx,%r13,4), %ymm0 vsubps (%rsp), %ymm0, %ymm2 ## 32-byte Folded Reload vbroadcastss (%rsi,%r13,4), %ymm0 vsubps %ymm11, %ymm0, %ymm0 vmulps %ymm3, %ymm3, %ymm4 vmulps %ymm2, %ymm2, %ymm5 vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm0, %ymm0, %ymm5 vaddps %ymm5, %ymm4, %ymm4 vmulss %xmm1, %xmm1, %xmm5 vpermilps $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 cmpl $255, %r9d jne LBB2_275 ## BB#269: ## %cif_mask_all1885 ## in Loop: Header=BB2_268 Depth=2 vcmpnleps %ymm4, %ymm5, %ymm6 vmovmskps %ymm6, %eax testl %eax, %eax je LBB2_276 ## BB#270: ## %cif_mask_all1885 ## in Loop: Header=BB2_268 Depth=2 cmpl $255, %eax jne LBB2_280 ## BB#271: ## %cif_mask_all1927 ## in Loop: Header=BB2_268 Depth=2 vsqrtps %ymm4, %ymm6 vrcpps %ymm6, %ymm4 vmulps %ymm4, %ymm6, %ymm5 vmovaps LCPI2_20(%rip), %ymm7 ## ymm7 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm5, %ymm7, %ymm5 vmulps %ymm5, %ymm4, %ymm7 vmulps %ymm7, %ymm3, %ymm5 vmulps %ymm7, %ymm2, %ymm4 vmulps %ymm7, %ymm0, %ymm3 vmovups 128(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm5, %ymm9, %ymm0 vmulps %ymm4, %ymm13, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 320(%rsp), %ymm12 ## 32-byte Reload vmulps %ymm3, %ymm12, %ymm2 vaddps %ymm0, %ymm2, %ymm0 vcmpnleps LCPI2_55(%rip), %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB2_276 ## BB#272: ## %cif_mask_all1927 ## in Loop: Header=BB2_268 Depth=2 cmpl $255, %eax jne LBB2_277 ## BB#273: ## %cif_test_all1936 ## in Loop: Header=BB2_268 Depth=2 movq 88(%r15), %rax vsubss (%rax,%r13,4), %xmm1, %xmm2 vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm2, %ymm1, %ymm10 vsubps 544(%rsp), %ymm5, %ymm2 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm5 vmulps %ymm4, %ymm4, %ymm6 vaddps %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm3, %ymm6 vaddps %ymm5, %ymm6, %ymm5 jmp LBB2_274 .p2align 4, 0x90 LBB2_275: ## %cif_mask_mixed1886 ## in Loop: Header=BB2_268 Depth=2 vcmpnleps %ymm4, %ymm5, %ymm5 vandps 512(%rsp), %ymm5, %ymm7 ## 32-byte Folded Reload vmovmskps %ymm7, %eax testl %eax, %eax je LBB2_276 ## BB#283: ## %safe_if_run_true2757 ## in Loop: Header=BB2_268 Depth=2 vsqrtps %ymm4, %ymm6 vrcpps %ymm6, %ymm4 vmulps %ymm4, %ymm6, %ymm5 vmovaps LCPI2_20(%rip), %ymm8 ## ymm8 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm5, %ymm8, %ymm5 vmulps %ymm5, %ymm4, %ymm8 vmulps %ymm8, %ymm3, %ymm4 vblendvps %ymm7, %ymm4, %ymm3, %ymm5 vmulps %ymm8, %ymm2, %ymm3 vblendvps %ymm7, %ymm3, %ymm2, %ymm4 vmulps %ymm8, %ymm0, %ymm2 vblendvps %ymm7, %ymm2, %ymm0, %ymm3 vmovups 128(%rsp), %ymm9 vmulps %ymm5, %ymm9, %ymm0 vmulps %ymm4, %ymm13, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 320(%rsp), %ymm12 vmulps %ymm3, %ymm12, %ymm2 vaddps %ymm2, %ymm0, %ymm0 cmpl $255, %eax jne LBB2_287 ## BB#284: ## %cif_mask_all2797 ## in Loop: Header=BB2_268 Depth=2 vcmpnleps LCPI2_55(%rip), %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB2_276 ## BB#285: ## %cif_mask_all2797 ## in Loop: Header=BB2_268 Depth=2 cmpl $255, %eax jne LBB2_580 ## BB#286: ## %cif_test_all2806 ## in Loop: Header=BB2_268 Depth=2 movq 88(%r15), %rax vsubss (%rax,%r13,4), %xmm1, %xmm2 vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm2, %ymm1, %ymm10 vsubps 544(%rsp), %ymm5, %ymm2 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm5 vmulps %ymm4, %ymm4, %ymm6 vaddps %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm3, %ymm6 vaddps %ymm6, %ymm5, %ymm5 LBB2_274: ## %cif_done1887 ## in Loop: Header=BB2_268 Depth=2 vrsqrtps %ymm5, %ymm6 vmulps %ymm6, %ymm5, %ymm5 vmulps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_7(%rip), %ymm7 ## ymm7 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm5, %ymm7, %ymm5 vmulps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_5(%rip), %ymm6 ## ymm6 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm6, %ymm7 vmulps %ymm7, %ymm5, %ymm5 vmulps %ymm5, %ymm2, %ymm2 vmulps %ymm5, %ymm4, %ymm4 vmulps %ymm5, %ymm3, %ymm3 vmulps %ymm2, %ymm9, %ymm2 vmulps %ymm4, %ymm13, %ymm4 vaddps %ymm4, %ymm2, %ymm2 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vxorps %ymm8, %ymm8, %ymm8 vmaxps %ymm8, %ymm2, %ymm4 vcmpnleps %ymm4, %ymm8, %ymm2 vcmpnltps %ymm4, %ymm8, %ymm3 vmovaps LCPI2_22(%rip), %ymm9 ## ymm9 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm3, %ymm9, %ymm4, %ymm4 vpsrad $23, %xmm4, %xmm5 vextractf128 $1, %ymm4, %xmm6 vpsrad $23, %xmm6, %xmm6 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm6, %xmm6 vpaddd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vandps LCPI2_24(%rip), %ymm4, %ymm4 vorps %ymm7, %ymm4, %ymm4 vsubps %ymm4, %ymm9, %ymm4 vmulps LCPI2_25(%rip), %ymm4, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps %ymm9, %ymm6, %ymm6 vsubps %ymm4, %ymm8, %ymm4 vmulps %ymm6, %ymm4, %ymm4 vcvtdq2ps %ymm5, %ymm5 vmulps LCPI2_34(%rip), %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm2, LCPI2_36(%rip), %ymm1, %ymm2 vblendvps %ymm3, %ymm2, %ymm4, %ymm2 vmulps %ymm2, %ymm14, %ymm2 vmulps LCPI2_37(%rip), %ymm2, %ymm3 vroundps $9, %ymm3, %ymm3 vcvttps2dq %ymm3, %ymm4 vmulps LCPI2_38(%rip), %ymm3, %ymm5 vsubps %ymm5, %ymm2, %ymm2 vmulps LCPI2_39(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm2, %ymm2 vmulps LCPI2_40(%rip), %ymm2, %ymm3 vaddps LCPI2_41(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_42(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_43(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_44(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_45(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm9, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm2 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm4, %xmm3 vextractf128 $1, %ymm4, %xmm5 vpaddd %xmm1, %xmm5, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vpcmpgtd %xmm1, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm6, %xmm1, %xmm5 vpcmpgtd %xmm3, %xmm1, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vpslld $23, %xmm3, %xmm3 vpslld $23, %xmm6, %xmm6 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vaddps %ymm9, %ymm2, %ymm2 vmulps %ymm2, %ymm3, %ymm2 vblendvps %ymm4, LCPI2_48(%rip), %ymm2, %ymm2 vblendvps %ymm5, %ymm8, %ymm2, %ymm2 vminps %ymm9, %ymm10, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 96(%r15), %rax vmulps 224(%rsp), %ymm2, %ymm1 ## 32-byte Folded Reload vaddps %ymm9, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%r13,4), %ymm1 vmulps 192(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%r13,4), %ymm1 vmulps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -96(%rsp), %ymm2 ## 32-byte Reload vaddps %ymm1, %ymm2, %ymm2 vmovups %ymm2, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%r13,4), %ymm1 vmulps 64(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -64(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm1 jmp LBB2_279 LBB2_280: ## %cif_test_mixed2318 ## in Loop: Header=BB2_268 Depth=2 vmovups %ymm15, 384(%rsp) ## 32-byte Spill vmovaps %ymm14, %ymm15 vsqrtps %ymm4, %ymm4 vrcpps %ymm4, %ymm5 vmulps %ymm5, %ymm4, %ymm7 vmovaps LCPI2_20(%rip), %ymm8 ## ymm8 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm7, %ymm8, %ymm7 vmulps %ymm7, %ymm5, %ymm7 vmulps %ymm7, %ymm3, %ymm5 vblendvps %ymm6, %ymm5, %ymm3, %ymm3 vmulps %ymm7, %ymm2, %ymm5 vblendvps %ymm6, %ymm5, %ymm2, %ymm5 vmulps %ymm7, %ymm0, %ymm2 vblendvps %ymm6, %ymm2, %ymm0, %ymm7 vmovaps %ymm13, %ymm12 vmovups 128(%rsp), %ymm13 ## 32-byte Reload vmulps %ymm3, %ymm13, %ymm0 vmulps %ymm5, %ymm12, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 320(%rsp), %ymm14 ## 32-byte Reload vmulps %ymm7, %ymm14, %ymm2 vaddps %ymm2, %ymm0, %ymm2 vxorps %ymm9, %ymm9, %ymm9 vcmpnleps %ymm9, %ymm2, %ymm0 vblendvps %ymm6, %ymm0, %ymm9, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB2_281 ## BB#282: ## %safe_if_run_true2619 ## in Loop: Header=BB2_268 Depth=2 movq 88(%r15), %rax vsubss (%rax,%r13,4), %xmm1, %xmm6 vpermilps $0, %xmm6, %xmm6 ## xmm6 = xmm6[0,0,0,0] vinsertf128 $1, %xmm6, %ymm6, %ymm6 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm4, %ymm1, %ymm1 vdivps %ymm6, %ymm1, %ymm11 vsubps 544(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm5, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm7, %ymm5 ## 32-byte Folded Reload vmulps %ymm3, %ymm3, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm5, %ymm5, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm3, %ymm7 vblendvps %ymm0, %ymm7, %ymm3, %ymm3 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm0, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm5, %ymm6 vblendvps %ymm0, %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm13, %ymm3 vmulps %ymm4, %ymm12, %ymm4 vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm5, %ymm14, %ymm4 vaddps %ymm4, %ymm3, %ymm3 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1728(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm0, %ymm6, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1760(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm0, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1760(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1728(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm6 vmulps LCPI2_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI2_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmovaps %ymm15, %ymm14 vmulps %ymm3, %ymm14, %ymm3 vmulps LCPI2_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI2_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI2_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI2_40(%rip), %ymm3, %ymm4 vaddps LCPI2_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI2_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 288(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm2, %ymm1 movq 96(%r15), %rax vmulps 224(%rsp), %ymm3, %ymm2 ## 32-byte Folded Reload vaddps %ymm10, %ymm2, %ymm2 vmovups 96(%rsp), %ymm10 ## 32-byte Reload vmulps %ymm2, %ymm1, %ymm1 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%r13,4), %ymm2 vmulps 192(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups 384(%rsp), %ymm15 ## 32-byte Reload vaddps %ymm2, %ymm15, %ymm2 vblendvps %ymm0, %ymm2, %ymm15, %ymm15 vbroadcastss (%rdx,%r13,4), %ymm2 vmulps 256(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -96(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm2, %ymm3, %ymm2 vblendvps %ymm0, %ymm2, %ymm3, %ymm3 vmovups %ymm3, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%r13,4), %ymm2 vmulps 64(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups -64(%rsp), %ymm2 ## 32-byte Reload vaddps %ymm1, %ymm2, %ymm1 vblendvps %ymm0, %ymm1, %ymm2, %ymm2 vmovups %ymm2, -64(%rsp) ## 32-byte Spill vmovaps %ymm12, %ymm13 jmp LBB2_276 LBB2_287: ## %cif_mask_mixed2798 ## in Loop: Header=BB2_268 Depth=2 vxorps %ymm9, %ymm9, %ymm9 vcmpnleps %ymm9, %ymm0, %ymm2 vblendvps %ymm2, %ymm7, %ymm9, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB2_276 ## BB#288: ## %safe_if_run_true3055 ## in Loop: Header=BB2_268 Depth=2 movq 88(%r15), %rax vsubss (%rax,%r13,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 544(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps 128(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vmulps %ymm4, %ymm13, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1696(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 896(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 896(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1696(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm6 vmulps LCPI2_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI2_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmulps %ymm3, %ymm14, %ymm3 vmulps LCPI2_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI2_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI2_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI2_40(%rip), %ymm3, %ymm4 vaddps LCPI2_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI2_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 288(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 96(%r15), %rax vmulps 224(%rsp), %ymm3, %ymm1 ## 32-byte Folded Reload vaddps %ymm10, %ymm1, %ymm1 vmovups 96(%rsp), %ymm10 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%r13,4), %ymm1 vmulps 192(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm1 vblendvps %ymm2, %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%r13,4), %ymm1 vmulps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -96(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm1, %ymm3, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm3 vmovups %ymm3, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%r13,4), %ymm1 vmulps 64(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -64(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm1 vmovups %ymm1, -64(%rsp) ## 32-byte Spill jmp LBB2_276 LBB2_281: ## in Loop: Header=BB2_268 Depth=2 vmovaps %ymm12, %ymm13 vmovaps %ymm15, %ymm14 vmovups 384(%rsp), %ymm15 ## 32-byte Reload jmp LBB2_276 LBB2_277: ## %cif_test_mixed2053 ## in Loop: Header=BB2_268 Depth=2 movq 88(%r15), %rax vsubss (%rax,%r13,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 544(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm6, %ymm7, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps %ymm5, %ymm9, %ymm5 vmulps %ymm4, %ymm13, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vxorps %ymm9, %ymm9, %ymm9 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1632(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1664(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1664(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1632(%rsp) ## 32-byte Spill jmp LBB2_278 LBB2_580: ## %cif_test_mixed2926 ## in Loop: Header=BB2_268 Depth=2 movq 88(%r15), %rax vsubss (%rax,%r13,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 544(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps %ymm5, %ymm9, %ymm5 vmulps %ymm4, %ymm13, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vxorps %ymm9, %ymm9, %ymm9 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1600(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 2112(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 2112(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1600(%rsp) ## 32-byte Spill LBB2_278: ## %cif_done1887 ## in Loop: Header=BB2_268 Depth=2 vcvtdq2ps %ymm7, %ymm6 vmulps LCPI2_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI2_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmulps %ymm3, %ymm14, %ymm3 vmulps LCPI2_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI2_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI2_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI2_40(%rip), %ymm3, %ymm4 vaddps LCPI2_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI2_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 288(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 96(%r15), %rax vmulps 224(%rsp), %ymm3, %ymm1 ## 32-byte Folded Reload vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%r13,4), %ymm1 vmulps 192(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm1 vblendvps %ymm2, %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%r13,4), %ymm1 vmulps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -96(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm1, %ymm3, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm3 vmovups %ymm3, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%r13,4), %ymm1 vmulps 64(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -64(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm1 LBB2_279: ## %cif_done1887 ## in Loop: Header=BB2_268 Depth=2 vmovups %ymm1, -64(%rsp) ## 32-byte Spill vmovups 96(%rsp), %ymm10 ## 32-byte Reload LBB2_276: ## %cif_done1887 ## in Loop: Header=BB2_268 Depth=2 addq $4, %rdi decl %r12d jne LBB2_268 jmp LBB2_218 LBB2_217: ## in Loop: Header=BB2_61 Depth=1 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -96(%rsp) ## 32-byte Spill vxorps %ymm15, %ymm15, %ymm15 movl %r13d, %ebp LBB2_218: ## %for_exit1816 ## in Loop: Header=BB2_61 Depth=1 addl %ebp, %r11d vxorps %ymm0, %ymm0, %ymm0 vmaxps %ymm0, %ymm15, %ymm0 vxorps %ymm9, %ymm9, %ymm9 vmovaps LCPI2_22(%rip), %ymm11 ## ymm11 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vminps %ymm11, %ymm0, %ymm2 vcmpnleps %ymm2, %ymm9, %ymm0 vcmpnltps %ymm2, %ymm9, %ymm1 vblendvps %ymm1, %ymm11, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm12 ## xmm12 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm12, %xmm4, %xmm4 vpaddd %xmm12, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovups 1312(%rsp), %ymm5 ## 32-byte Reload vmovups 512(%rsp), %ymm15 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm5, %ymm5 vmovaps LCPI2_24(%rip), %ymm3 ## ymm3 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm3, %ymm2, %ymm2 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm7, %ymm2, %ymm2 vmovups 1344(%rsp), %ymm3 ## 32-byte Reload vblendvps %ymm15, %ymm2, %ymm3, %ymm3 vmovups %ymm3, 1344(%rsp) ## 32-byte Spill vsubps %ymm3, %ymm11, %ymm2 vmovaps LCPI2_25(%rip), %ymm3 ## ymm3 = [1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00] vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_26(%rip), %ymm4 ## ymm4 = [-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_27(%rip), %ymm4 ## ymm4 = [1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_28(%rip), %ymm4 ## ymm4 = [-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_29(%rip), %ymm4 ## ymm4 = [3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_30(%rip), %ymm4 ## ymm4 = [1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_31(%rip), %ymm4 ## ymm4 = [2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_32(%rip), %ymm4 ## ymm4 = [3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_33(%rip), %ymm4 ## ymm4 = [5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vsubps %ymm2, %ymm9, %ymm2 vmulps %ymm3, %ymm2, %ymm2 vmovups %ymm5, 1312(%rsp) ## 32-byte Spill vcvtdq2ps %ymm5, %ymm3 vmovaps LCPI2_34(%rip), %ymm4 ## ymm4 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm4, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI2_35(%rip), %ymm3 ## ymm3 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vmovaps LCPI2_36(%rip), %ymm7 ## ymm7 = [nan,nan,nan,nan,nan,nan,nan,nan] vblendvps %ymm0, %ymm7, %ymm3, %ymm0 vblendvps %ymm1, %ymm0, %ymm2, %ymm0 vmovaps LCPI2_49(%rip), %ymm1 ## ymm1 = [4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01] vmulps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_37(%rip), %ymm1 ## ymm1 = [1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00] vmulps %ymm1, %ymm0, %ymm1 vroundps $9, %ymm1, %ymm1 vcvttps2dq %ymm1, %ymm2 vmovaps LCPI2_38(%rip), %ymm3 ## ymm3 = [6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01] vmulps %ymm3, %ymm1, %ymm3 vsubps %ymm3, %ymm0, %ymm0 vmovaps LCPI2_39(%rip), %ymm3 ## ymm3 = [1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06] vmulps %ymm3, %ymm1, %ymm1 vsubps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_40(%rip), %ymm1 ## ymm1 = [2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04] vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_41(%rip), %ymm3 ## ymm3 = [1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03] vaddps %ymm3, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_42(%rip), %ymm6 ## ymm6 = [8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03] vaddps %ymm6, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_43(%rip), %ymm8 ## ymm8 = [4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02] vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_44(%rip), %ymm10 ## ymm10 = [1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01] vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_45(%rip), %ymm13 ## ymm13 = [4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01] vaddps %ymm13, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm11, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vaddps %ymm11, %ymm0, %ymm0 vmovdqa LCPI2_46(%rip), %xmm13 ## xmm13 = [127,127,127,127] vpaddd %xmm13, %xmm2, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpaddd %xmm13, %xmm3, %xmm4 vpcmpgtd %xmm13, %xmm3, %xmm3 vpcmpgtd %xmm13, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vmovdqa LCPI2_47(%rip), %xmm6 ## xmm6 = [1,1,1,1] vpcmpgtd %xmm4, %xmm6, %xmm3 vpcmpgtd %xmm1, %xmm6, %xmm5 vinsertf128 $1, %xmm3, %ymm5, %ymm3 vpslld $23, %xmm1, %xmm1 vpslld $23, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm1, %ymm1 vmulps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, LCPI2_48(%rip), %ymm0, %ymm0 vblendvps %ymm3, %ymm9, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovups -96(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm9, %ymm1, %ymm1 vminps %ymm11, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm9, %ymm1 vcmpnltps %ymm3, %ymm9, %ymm2 vblendvps %ymm2, %ymm11, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm4 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vpaddd %xmm12, %xmm5, %xmm5 vpaddd %xmm12, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovups 1472(%rsp), %ymm5 ## 32-byte Reload vblendvps %ymm15, %ymm4, %ymm5, %ymm5 vmovaps LCPI2_24(%rip), %ymm7 ## ymm7 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm7, %ymm3, %ymm3 vmovaps LCPI2_5(%rip), %ymm0 ## ymm0 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm0, %ymm3, %ymm3 vmovups 1504(%rsp), %ymm4 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm4, %ymm4 vmovups %ymm4, 1504(%rsp) ## 32-byte Spill vsubps %ymm4, %ymm11, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm4 vaddps LCPI2_26(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_27(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_28(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_29(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_30(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_31(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_32(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_33(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm11, %ymm4, %ymm4 vsubps %ymm3, %ymm9, %ymm3 vmulps %ymm4, %ymm3, %ymm3 vmovups %ymm5, 1472(%rsp) ## 32-byte Spill vcvtdq2ps %ymm5, %ymm4 vmovaps LCPI2_34(%rip), %ymm8 ## ymm8 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm8, %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm4 ## ymm4 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm4, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps LCPI2_49(%rip), %ymm1, %ymm1 vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm14 vmulps LCPI2_38(%rip), %ymm2, %ymm3 vsubps %ymm3, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm5 vmulps LCPI2_40(%rip), %ymm5, %ymm1 vaddps LCPI2_41(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI2_42(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI2_43(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI2_45(%rip), %ymm1, %ymm10 vpaddd %xmm13, %xmm14, %xmm1 vpcmpgtd %xmm13, %xmm14, %xmm2 vmovdqu %ymm2, 128(%rsp) ## 32-byte Spill vpcmpgtd %xmm1, %xmm6, %xmm2 vmovdqu %ymm2, -96(%rsp) ## 32-byte Spill vpslld $23, %xmm1, %xmm1 vmovdqu %ymm1, 320(%rsp) ## 32-byte Spill vmovups -64(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm9, %ymm1, %ymm1 vminps %ymm11, %ymm1, %ymm2 vcmpnleps %ymm2, %ymm9, %ymm1 vcmpnltps %ymm2, %ymm9, %ymm4 vblendvps %ymm4, %ymm11, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm6 vpsrad $23, %xmm6, %xmm6 vpaddd %xmm12, %xmm6, %xmm6 vpaddd %xmm12, %xmm3, %xmm3 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vmovups 1536(%rsp), %ymm6 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm6, %ymm6 vandps %ymm7, %ymm2, %ymm2 vorps %ymm0, %ymm2, %ymm2 vmovups 1568(%rsp), %ymm3 ## 32-byte Reload vblendvps %ymm15, %ymm2, %ymm3, %ymm3 vxorps %ymm15, %ymm15, %ymm15 vmovups %ymm3, 1568(%rsp) ## 32-byte Spill vsubps %ymm3, %ymm11, %ymm2 vmulps LCPI2_25(%rip), %ymm2, %ymm3 vaddps LCPI2_26(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_27(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_28(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_29(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_30(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_31(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_32(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_33(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vsubps %ymm2, %ymm15, %ymm2 vmulps %ymm3, %ymm2, %ymm2 vmovups %ymm6, 1536(%rsp) ## 32-byte Spill vcvtdq2ps %ymm6, %ymm3 vmulps %ymm8, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm4, %ymm1, %ymm2, %ymm1 vmulps LCPI2_49(%rip), %ymm1, %ymm1 vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm7 vmulps LCPI2_38(%rip), %ymm2, %ymm3 vsubps %ymm3, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm4 vmulps LCPI2_40(%rip), %ymm4, %ymm1 vaddps LCPI2_41(%rip), %ymm1, %ymm2 vpaddd %xmm13, %xmm7, %xmm3 vpcmpgtd %xmm13, %xmm7, %xmm11 vmovups (%rsp), %ymm0 ## 32-byte Reload vmulps LCPI2_50(%rip), %ymm0, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmovdqa LCPI2_51(%rip), %xmm6 ## xmm6 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm6, %xmm1, %xmm1 vpshufb %xmm6, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm1 ## xmm1 = xmm0[0],xmm1[0] vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm3, %xmm0, %xmm12 vpslld $23, %xmm3, %xmm13 movslq %r11d, %rax movq 640(%rsp), %rbx ## 8-byte Reload testq %rbx, %rbx movq 2784(%rsp), %rsi je LBB2_220 ## BB#219: ## %pl_dolane.i15850 ## in Loop: Header=BB2_61 Depth=1 vpextrb $0, %xmm1, (%rsi,%rax) LBB2_220: ## %pl_loopend.i15853 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm2, %ymm4, %ymm3 vmulps %ymm10, %ymm5, %ymm0 vextractf128 $1, %ymm14, %xmm2 movq 1280(%rsp), %rbp ## 8-byte Reload testq %rbp, %rbp movl -100(%rsp), %edx ## 4-byte Reload movl -20(%rsp), %edi ## 4-byte Reload movq 2800(%rsp), %rcx movq %rcx, %r8 vmovaps 800(%rsp), %xmm14 ## 16-byte Reload movq 1248(%rsp), %rcx ## 8-byte Reload movq 1216(%rsp), %r11 ## 8-byte Reload movq 1184(%rsp), %r14 ## 8-byte Reload movq 1152(%rsp), %r12 ## 8-byte Reload je LBB2_222 ## BB#221: ## %pl_dolane.1.i15856 ## in Loop: Header=BB2_61 Depth=1 vpextrb $2, %xmm1, 1(%rsi,%rax) LBB2_222: ## %pl_loopend.1.i15859 ## in Loop: Header=BB2_61 Depth=1 vaddps LCPI2_42(%rip), %ymm3, %ymm9 vaddps LCPI2_22(%rip), %ymm0, %ymm3 vpaddd LCPI2_46(%rip), %xmm2, %xmm0 testq %rcx, %rcx vmovaps 176(%rsp), %xmm10 ## 16-byte Reload je LBB2_224 ## BB#223: ## %pl_dolane.2.i15862 ## in Loop: Header=BB2_61 Depth=1 vpextrb $4, %xmm1, 2(%rsi,%rax) LBB2_224: ## %pl_loopend.2.i15865 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm9, %ymm4, %ymm9 vmulps %ymm3, %ymm5, %ymm5 vpslld $23, %xmm0, %xmm3 testq %r11, %r11 je LBB2_226 ## BB#225: ## %pl_dolane.3.i15868 ## in Loop: Header=BB2_61 Depth=1 vpextrb $6, %xmm1, 3(%rsi,%rax) LBB2_226: ## %pl_loopend.3.i15871 ## in Loop: Header=BB2_61 Depth=1 vaddps LCPI2_43(%rip), %ymm9, %ymm9 vaddps LCPI2_22(%rip), %ymm5, %ymm5 vpcmpgtd LCPI2_46(%rip), %xmm2, %xmm2 vmovups 320(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 testq %r14, %r14 je LBB2_228 ## BB#227: ## %pl_dolane.4.i15874 ## in Loop: Header=BB2_61 Depth=1 vpextrb $8, %xmm1, 4(%rsi,%rax) LBB2_228: ## %pl_loopend.4.i15877 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm9, %ymm4, %ymm8 vmovups 128(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm6, %ymm2 vmovdqa LCPI2_47(%rip), %xmm6 ## xmm6 = [1,1,1,1] vpcmpgtd %xmm0, %xmm6, %xmm0 vmulps %ymm5, %ymm3, %ymm3 testq %r12, %r12 je LBB2_230 ## BB#229: ## %pl_dolane.5.i15880 ## in Loop: Header=BB2_61 Depth=1 vpextrb $10, %xmm1, 5(%rsi,%rax) LBB2_230: ## %pl_loopend.5.i15883 ## in Loop: Header=BB2_61 Depth=1 vaddps LCPI2_44(%rip), %ymm8, %ymm5 vmovups -96(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm6, %ymm0 vblendvps %ymm2, LCPI2_48(%rip), %ymm3, %ymm2 cmpq $0, 576(%rsp) ## 8-byte Folded Reload je LBB2_232 ## BB#231: ## %pl_dolane.6.i15886 ## in Loop: Header=BB2_61 Depth=1 vpextrb $12, %xmm1, 6(%rsi,%rax) LBB2_232: ## %pl_loopend.6.i15888 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm5, %ymm4, %ymm3 vblendvps %ymm0, %ymm15, %ymm2, %ymm2 testb %r9b, %r9b vxorps %xmm9, %xmm9, %xmm9 vmovdqa 352(%rsp), %xmm6 ## 16-byte Reload jns LBB2_234 ## BB#233: ## %pl_dolane.7.i15891 ## in Loop: Header=BB2_61 Depth=1 vpextrb $14, %xmm1, 7(%rsi,%rax) LBB2_234: ## %__masked_store_i8.exit15892 ## in Loop: Header=BB2_61 Depth=1 vaddps LCPI2_45(%rip), %ymm3, %ymm0 vmulps LCPI2_50(%rip), %ymm2, %ymm1 vcvttps2dq %ymm1, %ymm1 vextractf128 $1, %ymm1, %xmm2 vmovdqa LCPI2_51(%rip), %xmm3 ## xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm3, %xmm2, %xmm2 vpshufb %xmm3, %xmm1, %xmm1 vpunpcklqdq %xmm2, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm2[0] testq %rbx, %rbx je LBB2_236 ## BB#235: ## %pl_dolane.i15800 ## in Loop: Header=BB2_61 Depth=1 vpextrb $0, %xmm1, (%r10,%rax) LBB2_236: ## %pl_loopend.i15803 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm0, %ymm4, %ymm0 vextractf128 $1, %ymm7, %xmm2 testq %rbp, %rbp je LBB2_238 ## BB#237: ## %pl_dolane.1.i15806 ## in Loop: Header=BB2_61 Depth=1 vpextrb $2, %xmm1, 1(%r10,%rax) LBB2_238: ## %pl_loopend.1.i15809 ## in Loop: Header=BB2_61 Depth=1 vaddps LCPI2_22(%rip), %ymm0, %ymm3 vpaddd LCPI2_46(%rip), %xmm2, %xmm0 testq %rcx, %rcx je LBB2_240 ## BB#239: ## %pl_dolane.2.i15812 ## in Loop: Header=BB2_61 Depth=1 vpextrb $4, %xmm1, 2(%r10,%rax) LBB2_240: ## %pl_loopend.2.i15815 ## in Loop: Header=BB2_61 Depth=1 vmulps %ymm3, %ymm4, %ymm3 vpslld $23, %xmm0, %xmm4 testq %r11, %r11 je LBB2_242 ## BB#241: ## %pl_dolane.3.i15818 ## in Loop: Header=BB2_61 Depth=1 vpextrb $6, %xmm1, 3(%r10,%rax) LBB2_242: ## %pl_loopend.3.i15821 ## in Loop: Header=BB2_61 Depth=1 vaddps LCPI2_22(%rip), %ymm3, %ymm3 vpcmpgtd LCPI2_46(%rip), %xmm2, %xmm2 vinsertf128 $1, %xmm4, %ymm13, %ymm4 testq %r14, %r14 je LBB2_244 ## BB#243: ## %pl_dolane.4.i15824 ## in Loop: Header=BB2_61 Depth=1 vpextrb $8, %xmm1, 4(%r10,%rax) LBB2_244: ## %pl_loopend.4.i15827 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm2, %ymm11, %ymm2 vmovdqa LCPI2_47(%rip), %xmm5 ## xmm5 = [1,1,1,1] vpcmpgtd %xmm0, %xmm5, %xmm0 vmulps %ymm3, %ymm4, %ymm3 testq %r12, %r12 je LBB2_246 ## BB#245: ## %pl_dolane.5.i15830 ## in Loop: Header=BB2_61 Depth=1 vpextrb $10, %xmm1, 5(%r10,%rax) LBB2_246: ## %pl_loopend.5.i15833 ## in Loop: Header=BB2_61 Depth=1 vinsertf128 $1, %xmm0, %ymm12, %ymm0 vblendvps %ymm2, LCPI2_48(%rip), %ymm3, %ymm2 cmpq $0, 576(%rsp) ## 8-byte Folded Reload je LBB2_248 ## BB#247: ## %pl_dolane.6.i15836 ## in Loop: Header=BB2_61 Depth=1 vpextrb $12, %xmm1, 6(%r10,%rax) LBB2_248: ## %pl_loopend.6.i15838 ## in Loop: Header=BB2_61 Depth=1 vblendvps %ymm0, %ymm15, %ymm2, %ymm0 testb %r9b, %r9b vmovdqu 416(%rsp), %ymm3 ## 32-byte Reload jns LBB2_250 ## BB#249: ## %pl_dolane.7.i15841 ## in Loop: Header=BB2_61 Depth=1 vpextrb $14, %xmm1, 7(%r10,%rax) LBB2_250: ## %__masked_store_i8.exit15842 ## in Loop: Header=BB2_61 Depth=1 vmulps LCPI2_50(%rip), %ymm0, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmovdqa LCPI2_51(%rip), %xmm2 ## xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm2, %xmm1, %xmm1 vpshufb %xmm2, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] testq %rbx, %rbx je LBB2_252 ## BB#251: ## %pl_dolane.i15750 ## in Loop: Header=BB2_61 Depth=1 vpextrb $0, %xmm0, (%r8,%rax) LBB2_252: ## %pl_loopend.i15753 ## in Loop: Header=BB2_61 Depth=1 testq %rbp, %rbp je LBB2_254 ## BB#253: ## %pl_dolane.1.i15756 ## in Loop: Header=BB2_61 Depth=1 vpextrb $2, %xmm0, 1(%r8,%rax) LBB2_254: ## %pl_loopend.1.i15759 ## in Loop: Header=BB2_61 Depth=1 testq %rcx, %rcx je LBB2_256 ## BB#255: ## %pl_dolane.2.i15762 ## in Loop: Header=BB2_61 Depth=1 vpextrb $4, %xmm0, 2(%r8,%rax) LBB2_256: ## %pl_loopend.2.i15765 ## in Loop: Header=BB2_61 Depth=1 testq %r11, %r11 je LBB2_258 ## BB#257: ## %pl_dolane.3.i15768 ## in Loop: Header=BB2_61 Depth=1 vpextrb $6, %xmm0, 3(%r8,%rax) LBB2_258: ## %pl_loopend.3.i15771 ## in Loop: Header=BB2_61 Depth=1 testq %r14, %r14 je LBB2_260 ## BB#259: ## %pl_dolane.4.i15774 ## in Loop: Header=BB2_61 Depth=1 vpextrb $8, %xmm0, 4(%r8,%rax) LBB2_260: ## %pl_loopend.4.i15777 ## in Loop: Header=BB2_61 Depth=1 testq %r12, %r12 je LBB2_262 ## BB#261: ## %pl_dolane.5.i15780 ## in Loop: Header=BB2_61 Depth=1 vpextrb $10, %xmm0, 5(%r8,%rax) LBB2_262: ## %pl_loopend.5.i15783 ## in Loop: Header=BB2_61 Depth=1 cmpq $0, 576(%rsp) ## 8-byte Folded Reload je LBB2_264 ## BB#263: ## %pl_dolane.6.i15786 ## in Loop: Header=BB2_61 Depth=1 vpextrb $12, %xmm0, 6(%r8,%rax) LBB2_264: ## %pl_loopend.6.i15788 ## in Loop: Header=BB2_61 Depth=1 testb %r9b, %r9b jns LBB2_266 ## BB#265: ## %pl_dolane.7.i15791 ## in Loop: Header=BB2_61 Depth=1 vpextrb $14, %xmm0, 7(%r8,%rax) LBB2_266: ## %foreach_reset139 ## in Loop: Header=BB2_61 Depth=1 movl 2048(%rsp), %r9d ## 4-byte Reload LBB2_65: ## %foreach_reset139 ## in Loop: Header=BB2_61 Depth=1 incl %edx cmpl -104(%rsp), %edx ## 4-byte Folded Reload jne LBB2_61 jmp LBB2_359 LBB2_347: ## %if_else3272 cmpl -104(%rsp), %edx ## 4-byte Folded Reload jge LBB2_359 ## BB#348: ## %for_loop3431.lr.ph vcvtsi2ssl %r9d, %xmm0, %xmm4 vmovss LCPI2_0(%rip), %xmm5 ## xmm5 = mem[0],zero,zero,zero vdivss %xmm4, %xmm5, %xmm4 vmovss %xmm4, 2112(%rsp) ## 4-byte Spill vcvtsi2ssl -112(%rsp), %xmm0, %xmm4 ## 4-byte Folded Reload vdivss %xmm4, %xmm5, %xmm4 movl -108(%rsp), %esi ## 4-byte Reload movl %esi, %eax subl %edi, %eax movl %eax, %ecx sarl $31, %ecx shrl $29, %ecx addl %eax, %ecx andl $-8, %ecx subl %ecx, %eax movl %esi, %r9d subl %eax, %r9d vmovd %esi, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmovups %ymm5, 2624(%rsp) ## 32-byte Spill vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmovups %ymm4, 1408(%rsp) ## 32-byte Spill vpermilps $0, %xmm3, %xmm3 ## xmm3 = xmm3[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmovups %ymm3, 1888(%rsp) ## 32-byte Spill vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vmovups %ymm2, 1856(%rsp) ## 32-byte Spill vpermilps $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1824(%rsp) ## 32-byte Spill vpermilps $0, %xmm1, %xmm0 ## xmm0 = xmm1[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1792(%rsp) ## 32-byte Spill vpxor %xmm8, %xmm8, %xmm8 ## implicit-def: %YMM3 ## implicit-def: %YMM4 ## implicit-def: %YMM10 ## implicit-def: %YMM5 ## implicit-def: %XMM6 ## implicit-def: %XMM0 vmovaps %xmm0, 416(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, -16(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, 176(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, 1344(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, 1312(%rsp) ## 16-byte Spill ## implicit-def: %XMM7 ## implicit-def: %YMM0 vmovups %ymm0, 1664(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1632(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 800(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 672(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1696(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 864(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1600(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 2080(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1568(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1536(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1504(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1760(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1728(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1472(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 2016(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1984(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1952(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovdqu %ymm0, 1920(%rsp) ## 32-byte Spill movl %r9d, 380(%rsp) ## 4-byte Spill .p2align 4, 0x90 LBB2_349: ## %for_loop3431 ## =>This Loop Header: Depth=1 ## Child Loop BB2_354 Depth 2 ## Child Loop BB2_361 Depth 3 ## Child Loop BB2_559 Depth 2 vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %edx, %xmm0, %xmm0 vaddss LCPI2_1(%rip), %xmm0, %xmm0 vmulss 2112(%rsp), %xmm0, %xmm0 ## 4-byte Folded Reload vaddss LCPI2_2(%rip), %xmm0, %xmm0 vxorps %xmm1, %xmm1, %xmm1 vsubss %xmm0, %xmm1, %xmm1 cmpl %edi, %r9d movl %edx, -100(%rsp) ## 4-byte Spill vmovdqu %ymm3, 608(%rsp) ## 32-byte Spill vmovups %ymm4, 480(%rsp) ## 32-byte Spill vmovdqu %ymm10, 448(%rsp) ## 32-byte Spill vmovdqu %ymm5, 544(%rsp) ## 32-byte Spill vmovdqa %xmm6, 48(%rsp) ## 16-byte Spill vmovdqa %xmm7, 352(%rsp) ## 16-byte Spill vmovaps %xmm1, 896(%rsp) ## 16-byte Spill jle LBB2_350 ## BB#353: ## %foreach_full_body3443.lr.ph ## in Loop: Header=BB2_349 Depth=1 movl %edx, %r8d imull -112(%rsp), %r8d ## 4-byte Folded Reload vmovd %r8d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 928(%rsp) ## 32-byte Spill vpermilps $0, %xmm1, %xmm0 ## xmm0 = xmm1[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1376(%rsp) ## 32-byte Spill movl %edi, %r11d .p2align 4, 0x90 LBB2_354: ## %foreach_full_body3443 ## Parent Loop BB2_349 Depth=1 ## => This Loop Header: Depth=2 ## Child Loop BB2_361 Depth 3 vmovd %r11d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI2_3(%rip), %xmm0, %xmm1 vpaddd LCPI2_4(%rip), %xmm0, %xmm2 vmovdqu 928(%rsp), %ymm0 ## 32-byte Reload vmovdqu %ymm1, -96(%rsp) ## 32-byte Spill vpaddd %xmm0, %xmm1, %xmm10 vextractf128 $1, %ymm0, %xmm0 vmovdqa %xmm2, 128(%rsp) ## 16-byte Spill vpaddd %xmm0, %xmm2, %xmm1 vpslld $2, %xmm10, %xmm0 vpmovsxdq %xmm0, %xmm2 vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1] vpmovsxdq %xmm0, %xmm0 vmovq (%r15), %xmm3 ## xmm3 = mem[0],zero vpshufd $68, %xmm3, %xmm3 ## xmm3 = xmm3[0,1,0,1] vpaddq %xmm0, %xmm3, %xmm0 vmovdqa %xmm3, 320(%rsp) ## 16-byte Spill vpaddq %xmm2, %xmm3, %xmm2 vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vmovq %xmm0, %rdx vpextrq $1, %xmm0, %rsi vmovss (%rax), %xmm0 ## xmm0 = mem[0],zero,zero,zero vinsertps $16, (%rcx), %xmm0, %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[2,3] vinsertps $32, (%rdx), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3] vinsertps $48, (%rsi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[0] vmovups %ymm0, 512(%rsp) ## 32-byte Spill vmovdqa %xmm1, -64(%rsp) ## 16-byte Spill vpaddd %xmm1, %xmm1, %xmm0 vpaddd %xmm10, %xmm10, %xmm2 vpmovsxdq %xmm2, %xmm4 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm6 vpmovsxdq %xmm0, %xmm7 vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1] vpmovsxdq %xmm0, %xmm5 vmovq 8(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm2 vpaddq %xmm7, %xmm0, %xmm3 vpaddq %xmm6, %xmm0, %xmm1 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm1, %rdx vpextrq $1, %xmm1, %rsi vmovq %xmm3, %rdi vpextrq $1, %xmm3, %rbx vmovq %xmm2, %rbp vpextrq $1, %xmm2, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm8, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm8[4],xmm0[5],xmm8[5],xmm0[6],xmm8[6],xmm0[7],xmm8[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vpxor %xmm12, %xmm12, %xmm12 vinsertf128 $1, %xmm1, %ymm0, %ymm11 vmovaps LCPI2_8(%rip), %ymm13 ## ymm13 = [32767,32767,32767,32767,32767,32767,32767,32767] vmovaps %ymm13, %ymm9 vandps %ymm9, %ymm11, %ymm8 vmovq 16(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm7, %xmm0, %xmm3 vpaddq %xmm6, %xmm0, %xmm2 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vmovq %xmm3, %rdi vpextrq $1, %xmm3, %rbx vmovq %xmm1, %rbp vpextrq $1, %xmm1, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm12, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm15 vmovq 24(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm4, %xmm0, %xmm2 vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vpaddq %xmm7, %xmm0, %xmm2 vpaddq %xmm6, %xmm0, %xmm0 vmovq %xmm0, %rdx vpextrq $1, %xmm0, %rsi vmovq %xmm2, %rdi vpextrq $1, %xmm2, %rbx vmovq %xmm1, %rbp vpextrq $1, %xmm1, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm12, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovq 32(%r15), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm7, %xmm0, %xmm2 vpaddq %xmm6, %xmm0, %xmm3 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm3, %rdx vpextrq $1, %xmm3, %rsi vmovq %xmm2, %rdi vpextrq $1, %xmm2, %rbx vmovq %xmm1, %rbp vpextrq $1, %xmm1, %r14 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbx), %xmm0, %xmm0 vpinsrw $6, (%rbp), %xmm0, %xmm0 vpinsrw $7, (%r14), %xmm0, %xmm0 vpunpckhwd %xmm12, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm0 vpmovsxdq %xmm10, %xmm3 vpshufd $78, %xmm10, %xmm1 ## xmm1 = xmm10[2,3,0,1] vpmovsxdq %xmm1, %xmm1 vmovq 40(%r15), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm4 ## xmm4 = xmm2[0,1,0,1] vpaddq %xmm1, %xmm4, %xmm2 vmovdqa %xmm4, 96(%rsp) ## 16-byte Spill vpaddq %xmm3, %xmm4, %xmm4 vmovq %xmm4, %rax vpextrq $1, %xmm4, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vpinsrb $0, (%rax), %xmm0, %xmm2 vpinsrb $1, (%rcx), %xmm2, %xmm2 vmovq 48(%r15), %xmm4 ## xmm4 = mem[0],zero vpshufd $68, %xmm4, %xmm5 ## xmm5 = xmm4[0,1,0,1] vpaddq %xmm3, %xmm5, %xmm4 vmovq %xmm4, %r14 vpextrq $1, %xmm4, %rcx vmovdqa %xmm5, 192(%rsp) ## 16-byte Spill vpaddq %xmm1, %xmm5, %xmm4 vmovq %xmm4, %rdi vpextrq $1, %xmm4, %rbx vmovq 56(%r15), %xmm4 ## xmm4 = mem[0],zero vpshufd $68, %xmm4, %xmm5 ## xmm5 = xmm4[0,1,0,1] vpaddq %xmm3, %xmm5, %xmm3 vmovq %xmm3, %rbp vpextrq $1, %xmm3, %rax vpslld $13, %xmm8, %xmm3 vextractf128 $1, %ymm8, %xmm4 vpslld $13, %xmm4, %xmm14 vpinsrb $2, (%rdx), %xmm2, %xmm2 vpinsrb $3, (%rsi), %xmm2, %xmm6 vmovdqa %xmm5, 256(%rsp) ## 16-byte Spill vpaddq %xmm1, %xmm5, %xmm1 vmovq %xmm1, %rdx vpextrq $1, %xmm1, %rsi vinsertf128 $1, %xmm14, %ymm3, %ymm12 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vpaddd %xmm5, %xmm3, %xmm1 vmovdqu %ymm1, 768(%rsp) ## 32-byte Spill vmovdqa LCPI2_12(%rip), %xmm13 ## xmm13 = [947912704,947912704,947912704,947912704] vmovdqa %xmm13, %xmm4 vpaddd %xmm4, %xmm3, %xmm1 vmovdqu %ymm1, 704(%rsp) ## 32-byte Spill vmovaps LCPI2_14(%rip), %ymm2 ## ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768] vmovaps %ymm2, %ymm13 vandps %ymm13, %ymm11, %ymm1 vandps %ymm9, %ymm15, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm10 vinsertf128 $1, %xmm10, %ymm3, %ymm2 vpaddd %xmm5, %xmm3, %xmm7 vmovdqu %ymm7, 1120(%rsp) ## 32-byte Spill vpaddd %xmm4, %xmm3, %xmm3 vmovdqu %ymm3, 832(%rsp) ## 32-byte Spill vandps %ymm13, %ymm15, %ymm15 vandps %ymm9, %ymm0, %ymm3 vpslld $13, %xmm3, %xmm8 vextractf128 $1, %ymm3, %xmm3 vpslld $13, %xmm3, %xmm7 vinsertf128 $1, %xmm7, %ymm8, %ymm9 vpaddd %xmm5, %xmm8, %xmm3 vmovdqu %ymm3, 1152(%rsp) ## 32-byte Spill vpaddd %xmm4, %xmm8, %xmm3 vmovdqu %ymm3, 1184(%rsp) ## 32-byte Spill vandps %ymm13, %ymm0, %ymm4 vpmovzxbd %xmm6, %xmm0 ## xmm0 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero vmovdqu %ymm0, 224(%rsp) ## 32-byte Spill vpinsrb $0, (%r14), %xmm0, %xmm0 vpinsrb $1, (%rcx), %xmm0, %xmm0 vpinsrb $2, (%rdi), %xmm0, %xmm0 vpinsrb $3, (%rbx), %xmm0, %xmm0 vpmovzxbd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero vmovdqu %ymm0, 64(%rsp) ## 32-byte Spill vpinsrb $0, (%rbp), %xmm0, %xmm0 vpinsrb $1, (%rax), %xmm0, %xmm0 vpinsrb $2, (%rdx), %xmm0, %xmm0 vpinsrb $3, (%rsi), %xmm0, %xmm0 vpmovzxbd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero vmovdqu %ymm0, 384(%rsp) ## 32-byte Spill movl 2768(%rsp), %edi testl %edi, %edi vmovaps LCPI2_9(%rip), %ymm0 ## ymm0 = [260046848,260046848,260046848,260046848,260046848,260046848,260046848,260046848] vandps %ymm0, %ymm12, %ymm11 vpxor %xmm3, %xmm3, %xmm3 vpcmpeqd %xmm3, %xmm11, %xmm6 vmovdqu %ymm6, 736(%rsp) ## 32-byte Spill vmovups %ymm1, 576(%rsp) ## 32-byte Spill vpslld $16, %xmm1, %xmm1 vmovdqa LCPI2_15(%rip), %xmm13 ## xmm13 = [260046848,260046848,260046848,260046848] vpcmpeqd %xmm13, %xmm11, %xmm6 vmovdqu %ymm6, 1024(%rsp) ## 32-byte Spill vandps %ymm0, %ymm2, %ymm6 vpcmpeqd %xmm3, %xmm6, %xmm3 vmovdqu %ymm3, 1056(%rsp) ## 32-byte Spill vpxor %xmm8, %xmm8, %xmm8 vmovups %ymm15, 640(%rsp) ## 32-byte Spill vpslld $16, %xmm15, %xmm3 vmovdqu %ymm3, 1280(%rsp) ## 32-byte Spill vpcmpeqd %xmm13, %xmm6, %xmm3 vmovdqu %ymm3, 1088(%rsp) ## 32-byte Spill vmovaps LCPI2_8(%rip), %ymm3 ## ymm3 = [32767,32767,32767,32767,32767,32767,32767,32767] vandps (%rsp), %ymm3, %ymm15 ## 32-byte Folded Reload vpslld $13, %xmm15, %xmm3 vmovdqu %ymm3, 1216(%rsp) ## 32-byte Spill vmovups %ymm9, 1248(%rsp) ## 32-byte Spill vandps %ymm0, %ymm9, %ymm3 vpcmpeqd %xmm8, %xmm3, %xmm8 vmovups %ymm4, 288(%rsp) ## 32-byte Spill vpslld $16, %xmm4, %xmm0 vpcmpeqd %xmm13, %xmm3, %xmm4 jle LBB2_355 ## BB#360: ## %cif_mask_all3715.lr.ph ## in Loop: Header=BB2_354 Depth=2 vmovdqu %ymm4, 960(%rsp) ## 32-byte Spill vpaddd %xmm5, %xmm14, %xmm5 vmovdqu %ymm1, 992(%rsp) ## 32-byte Spill vmovups 768(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm1, %ymm1 vmovdqa LCPI2_12(%rip), %xmm13 ## xmm13 = [947912704,947912704,947912704,947912704] vpaddd %xmm13, %xmm14, %xmm5 vmovups 704(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm4, %ymm4 vextractf128 $1, %ymm11, %xmm5 vmovdqa %ymm8, %ymm13 vpxor %xmm8, %xmm8, %xmm8 vmovdqu %ymm0, 768(%rsp) ## 32-byte Spill vpcmpeqd %xmm8, %xmm5, %xmm0 vmovups 736(%rsp), %ymm8 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm8, %ymm0 vmovaps LCPI2_13(%rip), %ymm14 ## ymm14 = [-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05] vaddps %ymm14, %ymm4, %ymm4 vblendvps %ymm0, %ymm4, %ymm1, %ymm0 vmovdqa LCPI2_15(%rip), %xmm9 ## xmm9 = [260046848,260046848,260046848,260046848] vpcmpeqd %xmm9, %xmm5, %xmm1 vmovups 1024(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm4, %ymm1 vmovaps LCPI2_11(%rip), %ymm11 ## ymm11 = [1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192] vorps %ymm11, %ymm12, %ymm4 vblendvps %ymm1, %ymm4, %ymm0, %ymm0 vmovups %ymm0, 704(%rsp) ## 32-byte Spill vmovdqa LCPI2_10(%rip), %xmm12 ## xmm12 = [939524096,939524096,939524096,939524096] vpaddd %xmm12, %xmm10, %xmm0 vmovups 1120(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vmovaps %ymm15, %ymm8 vmovdqa LCPI2_12(%rip), %xmm15 ## xmm15 = [947912704,947912704,947912704,947912704] vpaddd %xmm15, %xmm10, %xmm1 vmovups 832(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm4, %ymm1 vextractf128 $1, %ymm6, %xmm4 vpxor %xmm6, %xmm6, %xmm6 vpcmpeqd %xmm6, %xmm4, %xmm5 vmovups 1056(%rsp), %ymm10 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm10, %ymm5 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm5, %ymm1, %ymm0, %ymm0 vpcmpeqd %xmm9, %xmm4, %xmm1 vmovups 1088(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm4, %ymm1 vorps %ymm11, %ymm2, %ymm2 vblendvps %ymm1, %ymm2, %ymm0, %ymm10 vpaddd %xmm12, %xmm7, %xmm0 vmovdqa %xmm12, %xmm4 vmovups 1152(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vpaddd %xmm15, %xmm7, %xmm1 vmovups 1184(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vextractf128 $1, %ymm3, %xmm2 vpcmpeqd %xmm6, %xmm2, %xmm3 vpxor %xmm6, %xmm6, %xmm6 vinsertf128 $1, %xmm3, %ymm13, %ymm3 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm3, %ymm1, %ymm0, %ymm0 vpcmpeqd %xmm9, %xmm2, %xmm1 vmovups 960(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps 1248(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm0, %ymm12 vextractf128 $1, %ymm8, %xmm0 vpslld $13, %xmm0, %xmm0 vmovups 1216(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmovdqa %xmm4, %xmm3 vpaddd %xmm3, %xmm1, %xmm2 vpaddd %xmm3, %xmm0, %xmm3 vinsertf128 $1, %xmm2, %ymm3, %ymm2 vpaddd %xmm15, %xmm1, %xmm1 vpaddd %xmm15, %xmm0, %xmm3 vinsertf128 $1, %xmm1, %ymm3, %ymm1 vandps LCPI2_9(%rip), %ymm0, %ymm3 vextractf128 $1, %ymm3, %xmm4 vpcmpeqd %xmm6, %xmm4, %xmm5 vpcmpeqd %xmm6, %xmm3, %xmm6 vinsertf128 $1, %xmm5, %ymm6, %ymm5 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm5, %ymm1, %ymm2, %ymm1 vpcmpeqd %xmm9, %xmm4, %xmm2 vpcmpeqd %xmm9, %xmm3, %xmm3 vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps %ymm11, %ymm0, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm14 vmovups -96(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, 128(%rsp), %ymm0, %ymm1 ## 16-byte Folded Reload vmovdqa -64(%rsp), %xmm0 ## 16-byte Reload vpslld $2, %xmm0, %xmm2 vpmovsxdq %xmm2, %xmm3 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm2 vmovdqa 320(%rsp), %xmm4 ## 16-byte Reload vpaddq %xmm2, %xmm4, %xmm2 vpaddq %xmm3, %xmm4, %xmm3 vmovq %xmm3, %rax vpextrq $1, %xmm3, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vmovss (%rax), %xmm2 ## xmm2 = mem[0],zero,zero,zero vinsertps $16, (%rcx), %xmm2, %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] vinsertps $32, (%rdx), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] vinsertps $48, (%rsi), %xmm2, %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] vmovups 512(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vcvtdq2ps %ymm1, %ymm1 vmovaps LCPI2_5(%rip), %ymm5 ## ymm5 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vaddps %ymm5, %ymm1, %ymm1 vmulps 1408(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmovaps LCPI2_6(%rip), %ymm3 ## ymm3 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] vmovaps %ymm3, %ymm7 vaddps %ymm7, %ymm1, %ymm1 vsubps 1856(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmovups 1888(%rsp), %ymm3 ## 32-byte Reload vdivps %ymm2, %ymm3, %ymm13 vmulps %ymm13, %ymm1, %ymm1 vdivps 1824(%rsp), %ymm1, %ymm4 ## 32-byte Folded Reload vmulps 1376(%rsp), %ymm13, %ymm1 ## 32-byte Folded Reload vdivps 1792(%rsp), %ymm1, %ymm11 ## 32-byte Folded Reload vmulps %ymm4, %ymm4, %ymm1 vmulps %ymm11, %ymm11, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vmulps %ymm13, %ymm13, %ymm2 vaddps %ymm1, %ymm2, %ymm1 vrsqrtps %ymm1, %ymm2 vmulps %ymm1, %ymm2, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vmovaps LCPI2_7(%rip), %ymm9 ## ymm9 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm1, %ymm9, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vmovups 576(%rsp), %ymm2 ## 32-byte Reload vextractf128 $1, %ymm2, %xmm2 vpslld $16, %xmm2, %xmm2 vmovups 992(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps 704(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmovups 640(%rsp), %ymm3 ## 32-byte Reload vextractf128 $1, %ymm3, %xmm3 vpslld $16, %xmm3, %xmm3 vmovups 1280(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 vorps %ymm10, %ymm3, %ymm3 vmovaps %ymm4, %ymm8 vmulps %ymm2, %ymm2, %ymm4 vsubps %ymm4, %ymm2, %ymm4 vmulps %ymm3, %ymm3, %ymm6 vsubps %ymm6, %ymm3, %ymm6 vaddps %ymm6, %ymm4, %ymm4 vmovaps LCPI2_16(%rip), %ymm6 ## ymm6 = [4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00] vmovaps %ymm6, %ymm10 vmulps %ymm10, %ymm4, %ymm6 vaddps %ymm7, %ymm6, %ymm6 vsqrtps %ymm6, %ymm6 vmulps %ymm10, %ymm2, %ymm2 vmovaps LCPI2_17(%rip), %ymm7 ## ymm7 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00] vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm6, %ymm2, %ymm2 vmovups %ymm2, -96(%rsp) ## 32-byte Spill vmulps %ymm10, %ymm3, %ymm2 vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm2, %ymm6, %ymm2 vmovups %ymm2, 128(%rsp) ## 32-byte Spill vmovups 288(%rsp), %ymm2 ## 32-byte Reload vextractf128 $1, %ymm2, %xmm2 vpslld $16, %xmm2, %xmm2 vmovups 768(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps %ymm12, %ymm2, %ymm10 vpmovsxdq %xmm0, %xmm2 vmovdqa 96(%rsp), %xmm6 ## 16-byte Reload vpaddq %xmm2, %xmm6, %xmm3 vmovq %xmm3, %rax vpextrq $1, %xmm3, %rcx vpshufd $78, %xmm0, %xmm3 ## xmm3 = xmm0[2,3,0,1] vpmovsxdq %xmm3, %xmm3 vpaddq %xmm3, %xmm6, %xmm6 vmovq %xmm6, %rdx vpextrq $1, %xmm6, %rsi vpinsrb $0, (%rax), %xmm0, %xmm6 vpinsrb $1, (%rcx), %xmm6, %xmm6 vpinsrb $2, (%rdx), %xmm6, %xmm6 vpinsrb $3, (%rsi), %xmm6, %xmm6 vpmovzxbd %xmm6, %xmm6 ## xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero vmovups 224(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm0, %ymm6 vmovdqa 192(%rsp), %xmm0 ## 16-byte Reload vpaddq %xmm2, %xmm0, %xmm7 vmovq %xmm7, %rax vpextrq $1, %xmm7, %rcx vpaddq %xmm3, %xmm0, %xmm7 vmovq %xmm7, %rdx vpextrq $1, %xmm7, %rsi vpinsrb $0, (%rax), %xmm0, %xmm7 vpinsrb $1, (%rcx), %xmm7, %xmm7 vpinsrb $2, (%rdx), %xmm7, %xmm7 vpinsrb $3, (%rsi), %xmm7, %xmm7 vpmovzxbd %xmm7, %xmm7 ## xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero vmovups 64(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm0, %ymm7 vmulps %ymm5, %ymm1, %ymm1 vmovdqa 256(%rsp), %xmm0 ## 16-byte Reload vpaddq %xmm3, %xmm0, %xmm3 vpaddq %xmm2, %xmm0, %xmm2 vmulps %ymm1, %ymm8, %ymm5 vmovups %ymm5, 192(%rsp) ## 32-byte Spill vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vmulps %ymm1, %ymm11, %ymm2 vmovups %ymm2, 256(%rsp) ## 32-byte Spill vmulps %ymm1, %ymm13, %ymm1 vmovups %ymm1, 64(%rsp) ## 32-byte Spill vmulps LCPI2_18(%rip), %ymm4, %ymm1 vsubps %ymm1, %ymm9, %ymm9 vcvtdq2ps %ymm6, %ymm1 vmovaps LCPI2_19(%rip), %ymm2 ## ymm2 = [3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03] vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 224(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 288(%rsp) ## 32-byte Spill vmovq %xmm3, %rdx vpextrq $1, %xmm3, %rsi vpinsrb $0, (%rax), %xmm0, %xmm1 vpinsrb $1, (%rcx), %xmm1, %xmm1 vpinsrb $2, (%rdx), %xmm1, %xmm1 vpinsrb $3, (%rsi), %xmm1, %xmm1 vpmovzxbd %xmm1, %xmm1 ## xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero vmovups 384(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm0, %ymm1 vcvtdq2ps %ymm1, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 384(%rsp) ## 32-byte Spill vmovaps LCPI2_14(%rip), %ymm0 ## ymm0 = [32768,32768,32768,32768,32768,32768,32768,32768] vandps (%rsp), %ymm0, %ymm1 ## 32-byte Folded Reload vpslld $16, %xmm1, %xmm2 vextractf128 $1, %ymm1, %xmm1 vpslld $16, %xmm1, %xmm1 vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps %ymm14, %ymm1, %ymm0 movq 64(%r15), %r14 movq 72(%r15), %r12 movq 80(%r15), %r13 movq 120(%r15), %rdx vmovups %ymm10, 96(%rsp) ## 32-byte Spill vaddps LCPI2_20(%rip), %ymm10, %ymm1 vmulps LCPI2_21(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vmovups %ymm0, 576(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovaps %ymm11, %ymm0 movl %edi, %ecx movq 2760(%rsp), %rbx vxorps %ymm1, %ymm1, %ymm1 vmovups %ymm1, -64(%rsp) ## 32-byte Spill vxorps %ymm10, %ymm10, %ymm10 vxorps %ymm15, %ymm15, %ymm15 vmovups %ymm13, 640(%rsp) ## 32-byte Spill vmovups %ymm8, 320(%rsp) ## 32-byte Spill vmovups %ymm0, 512(%rsp) ## 32-byte Spill .p2align 4, 0x90 LBB2_361: ## %cif_mask_all3715 ## Parent Loop BB2_349 Depth=1 ## Parent Loop BB2_354 Depth=2 ## => This Inner Loop Header: Depth=3 movslq (%rbx), %rax vmovss (%rdx,%rax,4), %xmm4 ## xmm4 = mem[0],zero,zero,zero vbroadcastss (%r14,%rax,4), %ymm1 vsubps %ymm8, %ymm1, %ymm6 vbroadcastss (%r12,%rax,4), %ymm1 vsubps %ymm0, %ymm1, %ymm1 vbroadcastss (%r13,%rax,4), %ymm2 vsubps %ymm13, %ymm2, %ymm3 vmulps %ymm6, %ymm6, %ymm2 vmulps %ymm1, %ymm1, %ymm7 vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm2, %ymm2 vmulss %xmm4, %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vcmpnleps %ymm2, %ymm7, %ymm7 vmovmskps %ymm7, %esi cmpl $255, %esi je LBB2_365 ## BB#362: ## %cif_mask_all3715 ## in Loop: Header=BB2_361 Depth=3 testl %esi, %esi je LBB2_363 ## BB#370: ## %cif_test_mixed4148 ## in Loop: Header=BB2_361 Depth=3 vsqrtps %ymm2, %ymm2 vrcpps %ymm2, %ymm11 vmulps %ymm11, %ymm2, %ymm12 vmovaps LCPI2_20(%rip), %ymm5 ## ymm5 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm12, %ymm5, %ymm12 vmulps %ymm12, %ymm11, %ymm11 vmulps %ymm11, %ymm6, %ymm12 vblendvps %ymm7, %ymm12, %ymm6, %ymm12 vmulps %ymm11, %ymm1, %ymm6 vblendvps %ymm7, %ymm6, %ymm1, %ymm1 vmulps %ymm11, %ymm3, %ymm6 vblendvps %ymm7, %ymm6, %ymm3, %ymm3 vmovups -96(%rsp), %ymm5 ## 32-byte Reload vmulps %ymm12, %ymm5, %ymm6 vmovaps %ymm9, %ymm14 vmovups 128(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm1, %ymm9, %ymm11 vaddps %ymm11, %ymm6, %ymm6 vmulps %ymm3, %ymm14, %ymm11 vaddps %ymm11, %ymm6, %ymm11 vcmpnleps %ymm15, %ymm11, %ymm6 vblendvps %ymm7, %ymm6, %ymm15, %ymm6 vmovmskps %ymm6, %esi testl %esi, %esi je LBB2_371 ## BB#372: ## %safe_if_run_true4449 ## in Loop: Header=BB2_361 Depth=3 movq 88(%r15), %rsi vsubss (%rsi,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm4 vsubps 192(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 64(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vrsqrtps %ymm7, %ymm12 vmulps %ymm12, %ymm7, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps %ymm5, %ymm0 vmovaps LCPI2_7(%rip), %ymm5 ## ymm5 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm5, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI2_5(%rip), %ymm5 ## ymm5 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vxorps %ymm13, %ymm13, %ymm13 vmovaps %ymm5, %ymm15 vmulps %ymm15, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm12 vblendvps %ymm6, %ymm12, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm12 vblendvps %ymm6, %ymm12, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm7 vblendvps %ymm6, %ymm7, %ymm3, %ymm3 vmulps %ymm2, %ymm0, %ymm2 vmulps %ymm1, %ymm9, %ymm1 vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm14, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vmaxps %ymm13, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm13, %ymm1 vcmpnltps %ymm3, %ymm13, %ymm2 vmovups 640(%rsp), %ymm13 ## 32-byte Reload vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vmovdqa LCPI2_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm5, %xmm5 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vmovups 1984(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm6, %ymm5, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm3, %ymm3 vorps %ymm15, %ymm3, %ymm3 vxorps %ymm15, %ymm15, %ymm15 vmovups 2016(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm6, %ymm3, %ymm0, %ymm0 vmovups %ymm0, 2016(%rsp) ## 32-byte Spill vsubps %ymm0, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm5 vaddps LCPI2_26(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_27(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_28(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_29(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_30(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_31(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_32(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_33(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps %ymm8, %ymm5, %ymm5 vsubps %ymm3, %ymm15, %ymm3 vmulps %ymm5, %ymm3, %ymm3 vmovups %ymm7, 1984(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm5 vmulps LCPI2_34(%rip), %ymm5, %ymm5 vaddps %ymm3, %ymm5, %ymm3 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 96(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm5 vsubps %ymm5, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps LCPI2_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI2_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm5 vpaddd %xmm0, %xmm5, %xmm7 vpcmpgtd %xmm0, %xmm5, %xmm5 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm5, %ymm3, %ymm3 vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm5 vpcmpgtd %xmm2, %xmm0, %xmm12 vinsertf128 $1, %xmm5, %ymm12, %ymm5 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm5, %ymm15, %ymm1, %ymm1 vminps %ymm8, %ymm4, %ymm2 vmulps %ymm2, %ymm11, %ymm2 movq 96(%r15), %rsi vmulps 576(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmovups 320(%rsp), %ymm8 ## 32-byte Reload vmulps %ymm1, %ymm2, %ymm1 movq 104(%r15), %rdi movq 112(%r15), %rbp vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 224(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm2 vblendvps %ymm6, %ymm2, %ymm10, %ymm10 vbroadcastss (%rdi,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm2 vblendvps %ymm6, %ymm2, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 384(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups (%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm1 vblendvps %ymm6, %ymm1, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovups 512(%rsp), %ymm0 ## 32-byte Reload vmovaps %ymm14, %ymm9 jmp LBB2_363 .p2align 4, 0x90 LBB2_365: ## %cif_mask_all3757 ## in Loop: Header=BB2_361 Depth=3 vsqrtps %ymm2, %ymm2 vrcpps %ymm2, %ymm7 vmulps %ymm7, %ymm2, %ymm11 vmovaps LCPI2_20(%rip), %ymm5 ## ymm5 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm11, %ymm5, %ymm11 vmulps %ymm11, %ymm7, %ymm7 vmulps %ymm7, %ymm6, %ymm12 vmulps %ymm7, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm3 vmulps -96(%rsp), %ymm12, %ymm6 ## 32-byte Folded Reload vmulps 128(%rsp), %ymm1, %ymm7 ## 32-byte Folded Reload vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm9, %ymm7 vaddps %ymm6, %ymm7, %ymm6 vcmpnleps %ymm15, %ymm6, %ymm11 vmovmskps %ymm11, %esi testl %esi, %esi je LBB2_363 ## BB#366: ## %cif_mask_all3757 ## in Loop: Header=BB2_361 Depth=3 cmpl $255, %esi jne LBB2_369 ## BB#367: ## %cif_test_all3766 ## in Loop: Header=BB2_361 Depth=3 movq 88(%r15), %rsi vsubss (%rsi,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm15 vsubps 192(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 64(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm11 vaddps %ymm11, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm11 vaddps %ymm7, %ymm11, %ymm7 vrsqrtps %ymm7, %ymm11 vmulps %ymm11, %ymm7, %ymm7 vmulps %ymm7, %ymm11, %ymm7 vmovaps LCPI2_7(%rip), %ymm4 ## ymm4 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm4, %ymm7 vmulps %ymm7, %ymm11, %ymm7 vmovaps LCPI2_5(%rip), %ymm4 ## ymm4 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm4, %ymm5 vmulps %ymm5, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm3 vmulps -96(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps 128(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm9, %ymm2 vaddps %ymm1, %ymm2, %ymm1 vxorps %ymm12, %ymm12, %ymm12 vmaxps %ymm12, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm12, %ymm1 vcmpnltps %ymm3, %ymm12, %ymm2 vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm4, %xmm4 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vandps LCPI2_24(%rip), %ymm3, %ymm3 vorps %ymm5, %ymm3, %ymm3 vsubps %ymm3, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm7 vaddps LCPI2_26(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_27(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_28(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_29(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_30(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_31(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_32(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI2_33(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps %ymm8, %ymm7, %ymm7 vsubps %ymm3, %ymm12, %ymm3 vmulps %ymm7, %ymm3, %ymm3 vcvtdq2ps %ymm4, %ymm4 vmulps LCPI2_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 96(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps LCPI2_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI2_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm0, %xmm4, %xmm7 vpcmpgtd %xmm0, %xmm4, %xmm4 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm4 vpcmpgtd %xmm2, %xmm0, %xmm11 vinsertf128 $1, %xmm4, %ymm11, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm12, %ymm1, %ymm1 vminps %ymm8, %ymm15, %ymm2 vxorps %ymm15, %ymm15, %ymm15 vmulps %ymm2, %ymm6, %ymm2 movq 96(%r15), %rsi vmulps 576(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 movq 104(%r15), %rdi movq 112(%r15), %rbp vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 224(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm10 vbroadcastss (%rdi,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 384(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups (%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm0 jmp LBB2_368 LBB2_371: ## in Loop: Header=BB2_361 Depth=3 vmovaps %ymm14, %ymm9 jmp LBB2_363 LBB2_369: ## %cif_test_mixed3883 ## in Loop: Header=BB2_361 Depth=3 movq 88(%r15), %rsi vsubss (%rsi,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm15 vsubps 192(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 64(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm12 vaddps %ymm7, %ymm12, %ymm7 vrsqrtps %ymm7, %ymm12 vmulps %ymm12, %ymm7, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI2_7(%rip), %ymm4 ## ymm4 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm4, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI2_5(%rip), %ymm4 ## ymm4 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm4, %ymm5 vmulps %ymm5, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm12 vblendvps %ymm11, %ymm12, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm12 vblendvps %ymm11, %ymm12, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm7 vblendvps %ymm11, %ymm7, %ymm3, %ymm3 vmulps -96(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps 128(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm9, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vxorps %ymm13, %ymm13, %ymm13 vmaxps %ymm13, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm13, %ymm1 vcmpnltps %ymm3, %ymm13, %ymm2 vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm4, %xmm4 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vmovups 1920(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm11, %ymm4, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm3, %ymm3 vorps %ymm5, %ymm3, %ymm3 vmovups 1952(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm11, %ymm3, %ymm0, %ymm0 vmovups %ymm0, 1952(%rsp) ## 32-byte Spill vsubps %ymm0, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm4 vaddps LCPI2_26(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_27(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_28(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_29(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_30(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_31(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_32(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_33(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm8, %ymm4, %ymm4 vsubps %ymm3, %ymm13, %ymm3 vmulps %ymm4, %ymm3, %ymm3 vmovups %ymm7, 1920(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm4 vmulps LCPI2_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 96(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps LCPI2_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI2_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI2_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm0, %xmm4, %xmm7 vpcmpgtd %xmm0, %xmm4, %xmm4 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm4 vpcmpgtd %xmm2, %xmm0, %xmm12 vinsertf128 $1, %xmm4, %ymm12, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm13, %ymm1, %ymm1 vmovups 640(%rsp), %ymm13 ## 32-byte Reload vminps %ymm8, %ymm15, %ymm2 vxorps %ymm15, %ymm15, %ymm15 vmulps %ymm2, %ymm6, %ymm2 movq 96(%r15), %rsi vmulps 576(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 movq 104(%r15), %rdi movq 112(%r15), %rbp vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 224(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm2 vblendvps %ymm11, %ymm2, %ymm10, %ymm10 vbroadcastss (%rdi,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm2 vblendvps %ymm11, %ymm2, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 384(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups (%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm1 vblendvps %ymm11, %ymm1, %ymm0, %ymm0 LBB2_368: ## %cif_done3717 ## in Loop: Header=BB2_361 Depth=3 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovups 320(%rsp), %ymm8 ## 32-byte Reload vmovups 512(%rsp), %ymm0 ## 32-byte Reload LBB2_363: ## %cif_done3717 ## in Loop: Header=BB2_361 Depth=3 addq $4, %rbx decl %ecx jne LBB2_361 jmp LBB2_364 .p2align 4, 0x90 LBB2_355: ## in Loop: Header=BB2_354 Depth=2 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vxorps %ymm10, %ymm10, %ymm10 vxorps %ymm15, %ymm15, %ymm15 LBB2_364: ## %for_exit3646 ## in Loop: Header=BB2_354 Depth=2 vmaxps %ymm15, %ymm10, %ymm0 vmovaps LCPI2_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vminps %ymm8, %ymm0, %ymm2 vcmpnleps %ymm2, %ymm15, %ymm0 vcmpnltps %ymm2, %ymm15, %ymm1 vblendvps %ymm1, %ymm8, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm9 ## xmm9 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm9, %xmm4, %xmm4 vpaddd %xmm9, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovaps LCPI2_24(%rip), %ymm10 ## ymm10 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm10, %ymm2, %ymm2 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm7, %ymm2, %ymm2 vsubps %ymm2, %ymm8, %ymm2 vmovaps LCPI2_25(%rip), %ymm11 ## ymm11 = [1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00] vmulps %ymm11, %ymm2, %ymm4 vmovaps LCPI2_26(%rip), %ymm12 ## ymm12 = [-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00] vaddps %ymm12, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_27(%rip), %ymm13 ## ymm13 = [1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00] vaddps %ymm13, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_28(%rip), %ymm7 ## ymm7 = [-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01] vaddps %ymm7, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_29(%rip), %ymm10 ## ymm10 = [3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01] vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_30(%rip), %ymm11 ## ymm11 = [1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01] vaddps %ymm11, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_31(%rip), %ymm5 ## ymm5 = [2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_32(%rip), %ymm5 ## ymm5 = [3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI2_33(%rip), %ymm5 ## ymm5 = [5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vaddps %ymm8, %ymm4, %ymm4 vsubps %ymm2, %ymm15, %ymm2 vmulps %ymm4, %ymm2, %ymm2 vcvtdq2ps %ymm3, %ymm3 vmovaps LCPI2_34(%rip), %ymm4 ## ymm4 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm4, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI2_35(%rip), %ymm3 ## ymm3 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vmovaps %ymm3, %ymm4 vmovaps LCPI2_36(%rip), %ymm3 ## ymm3 = [nan,nan,nan,nan,nan,nan,nan,nan] vblendvps %ymm0, %ymm3, %ymm4, %ymm0 vblendvps %ymm1, %ymm0, %ymm2, %ymm0 vmovaps LCPI2_49(%rip), %ymm1 ## ymm1 = [4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01] vmulps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_37(%rip), %ymm1 ## ymm1 = [1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00] vmulps %ymm1, %ymm0, %ymm1 vroundps $9, %ymm1, %ymm1 vcvttps2dq %ymm1, %ymm2 vmovaps LCPI2_38(%rip), %ymm3 ## ymm3 = [6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01] vmulps %ymm3, %ymm1, %ymm3 vsubps %ymm3, %ymm0, %ymm0 vmovaps LCPI2_39(%rip), %ymm3 ## ymm3 = [1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06] vmulps %ymm3, %ymm1, %ymm1 vsubps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_40(%rip), %ymm1 ## ymm1 = [2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04] vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_41(%rip), %ymm14 ## ymm14 = [1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03] vaddps %ymm14, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_42(%rip), %ymm13 ## ymm13 = [8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03] vaddps %ymm13, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_43(%rip), %ymm12 ## ymm12 = [4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02] vaddps %ymm12, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_44(%rip), %ymm11 ## ymm11 = [1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01] vaddps %ymm11, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_45(%rip), %ymm6 ## ymm6 = [4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01] vaddps %ymm6, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vaddps %ymm8, %ymm0, %ymm0 vmovdqa LCPI2_46(%rip), %xmm7 ## xmm7 = [127,127,127,127] vpaddd %xmm7, %xmm2, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpaddd %xmm7, %xmm3, %xmm4 vpcmpgtd %xmm7, %xmm3, %xmm3 vpcmpgtd %xmm7, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vmovdqa LCPI2_47(%rip), %xmm10 ## xmm10 = [1,1,1,1] vpcmpgtd %xmm4, %xmm10, %xmm3 vpcmpgtd %xmm1, %xmm10, %xmm5 vinsertf128 $1, %xmm3, %ymm5, %ymm3 vpslld $23, %xmm1, %xmm1 vpslld $23, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm1, %ymm1 vmulps %ymm0, %ymm1, %ymm0 vmovaps LCPI2_48(%rip), %ymm1 ## ymm1 = [inf,inf,inf,inf,inf,inf,inf,inf] vblendvps %ymm2, %ymm1, %ymm0, %ymm0 vblendvps %ymm3, %ymm15, %ymm0, %ymm0 vmovups %ymm0, -96(%rsp) ## 32-byte Spill vmovups -64(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm15, %ymm1, %ymm1 vminps %ymm8, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm15, %ymm1 vcmpnltps %ymm3, %ymm15, %ymm2 vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm4 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vpaddd %xmm9, %xmm5, %xmm5 vpaddd %xmm9, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovaps LCPI2_24(%rip), %ymm0 ## ymm0 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm0, %ymm3, %ymm3 vorps LCPI2_5(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm8, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm5 vaddps LCPI2_26(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_27(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_28(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_29(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_30(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_31(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_32(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI2_33(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps %ymm8, %ymm5, %ymm5 vsubps %ymm3, %ymm15, %ymm3 vmulps %ymm5, %ymm3, %ymm3 vcvtdq2ps %ymm4, %ymm4 vmulps LCPI2_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm4 ## ymm4 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm4, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps LCPI2_49(%rip), %ymm1, %ymm1 vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI2_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI2_40(%rip), %ymm1, %ymm2 vaddps %ymm14, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm13, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm12, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm11, %ymm2, %ymm2 vmovaps %ymm11, %ymm12 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm6, %ymm2, %ymm2 vmovaps %ymm6, %ymm11 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vaddps %ymm8, %ymm1, %ymm1 vpaddd %xmm7, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm7, %xmm4, %xmm5 vpcmpgtd %xmm7, %xmm4, %xmm4 vpcmpgtd %xmm7, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vpcmpgtd %xmm5, %xmm10, %xmm4 vpcmpgtd %xmm2, %xmm10, %xmm6 vinsertf128 $1, %xmm4, %ymm6, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm5, %xmm5 vinsertf128 $1, %xmm5, %ymm2, %ymm2 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI2_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm15, %ymm1, %ymm1 vmovups (%rsp), %ymm2 ## 32-byte Reload vmaxps %ymm15, %ymm2, %ymm2 vminps %ymm8, %ymm2, %ymm4 vcmpnleps %ymm4, %ymm15, %ymm2 vcmpnltps %ymm4, %ymm15, %ymm3 vblendvps %ymm3, %ymm8, %ymm4, %ymm4 vpsrad $23, %xmm4, %xmm5 vextractf128 $1, %ymm4, %xmm6 vpsrad $23, %xmm6, %xmm6 vpaddd %xmm9, %xmm6, %xmm6 vpaddd %xmm9, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vandps %ymm0, %ymm4, %ymm4 vorps LCPI2_5(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm8, %ymm4 vmulps LCPI2_25(%rip), %ymm4, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps %ymm8, %ymm6, %ymm6 vsubps %ymm4, %ymm15, %ymm4 vmulps %ymm6, %ymm4, %ymm4 vcvtdq2ps %ymm5, %ymm5 vmulps LCPI2_34(%rip), %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm2, LCPI2_36(%rip), %ymm0, %ymm2 vblendvps %ymm3, %ymm2, %ymm4, %ymm2 vmulps LCPI2_49(%rip), %ymm2, %ymm2 vmulps LCPI2_37(%rip), %ymm2, %ymm3 vroundps $9, %ymm3, %ymm3 vcvttps2dq %ymm3, %ymm4 vmulps LCPI2_38(%rip), %ymm3, %ymm5 vsubps %ymm5, %ymm2, %ymm2 vmulps LCPI2_39(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm2, %ymm2 vmulps LCPI2_40(%rip), %ymm2, %ymm3 vaddps %ymm14, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm13, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_43(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm12, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm8, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm2 vpaddd %xmm7, %xmm4, %xmm3 vextractf128 $1, %ymm4, %xmm5 vpaddd %xmm7, %xmm5, %xmm6 vpcmpgtd %xmm7, %xmm5, %xmm5 vpcmpgtd %xmm7, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vpcmpgtd %xmm6, %xmm10, %xmm5 vpcmpgtd %xmm3, %xmm10, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vpslld $23, %xmm3, %xmm3 vpslld $23, %xmm6, %xmm6 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm3, %ymm2 vblendvps %ymm4, LCPI2_48(%rip), %ymm2, %ymm2 vblendvps %ymm5, %ymm15, %ymm2, %ymm2 vmovaps LCPI2_50(%rip), %ymm3 ## ymm3 = [2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02] vmovaps %ymm3, %ymm4 vmulps -96(%rsp), %ymm4, %ymm0 ## 32-byte Folded Reload vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm3 vmovdqa LCPI2_51(%rip), %xmm5 ## xmm5 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm5, %xmm3, %xmm3 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm3, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm3[0] leal (%r11,%r8), %eax cltq vmovdqa LCPI2_52(%rip), %xmm3 ## xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> vpshufb %xmm3, %xmm0, %xmm0 movq 2784(%rsp), %rcx vmovq %xmm0, (%rcx,%rax) vmulps %ymm4, %ymm1, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] vpshufb %xmm3, %xmm0, %xmm0 vmovq %xmm0, (%r10,%rax) vmulps %ymm4, %ymm2, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] vpshufb %xmm3, %xmm0, %xmm0 movq 2800(%rsp), %rcx vmovq %xmm0, (%rcx,%rax) addl $8, %r11d cmpl %r9d, %r11d vxorps %xmm8, %xmm8, %xmm8 jl LBB2_354 jmp LBB2_351 .p2align 4, 0x90 LBB2_350: ## in Loop: Header=BB2_349 Depth=1 movl %edi, %r11d LBB2_351: ## %partial_inner_all_outer3482 ## in Loop: Header=BB2_349 Depth=1 cmpl -108(%rsp), %r11d ## 4-byte Folded Reload jge LBB2_352 ## BB#373: ## %partial_inner_only5090 ## in Loop: Header=BB2_349 Depth=1 vmovd %r11d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI2_3(%rip), %xmm0, %xmm8 vpaddd LCPI2_4(%rip), %xmm0, %xmm9 vmovups 2624(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm0 vpcmpgtd %xmm9, %xmm0, %xmm0 vpcmpgtd %xmm8, %xmm1, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm7 movl -100(%rsp), %eax ## 4-byte Reload movl %eax, %ecx imull -112(%rsp), %ecx ## 4-byte Folded Reload vmovd %ecx, %xmm0 vpshufd $0, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,0,0] vpaddd %xmm1, %xmm9, %xmm0 vpaddd %xmm1, %xmm8, %xmm1 vpslld $2, %xmm0, %xmm5 vpslld $2, %xmm1, %xmm2 vpmovsxdq %xmm2, %xmm4 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm6 vpmovsxdq %xmm5, %xmm2 vmovq (%r15), %xmm3 ## xmm3 = mem[0],zero vpshufd $68, %xmm3, %xmm3 ## xmm3 = xmm3[0,1,0,1] vpaddq %xmm2, %xmm3, %xmm2 vpaddq %xmm6, %xmm3, %xmm6 vpaddq %xmm4, %xmm3, %xmm4 vinsertf128 $1, %xmm6, %ymm4, %ymm4 vmovups %ymm7, 512(%rsp) ## 32-byte Spill vmovmskps %ymm7, %r9d movq %r9, %r8 andq $1, %r8 jne LBB2_375 ## BB#374: ## in Loop: Header=BB2_349 Depth=1 movl 2768(%rsp), %r13d vmovups 544(%rsp), %ymm7 ## 32-byte Reload jmp LBB2_376 .p2align 4, 0x90 LBB2_352: ## in Loop: Header=BB2_349 Depth=1 movl -100(%rsp), %edx ## 4-byte Reload movl -20(%rsp), %edi ## 4-byte Reload vmovdqu 608(%rsp), %ymm3 ## 32-byte Reload vmovups 480(%rsp), %ymm4 ## 32-byte Reload vmovups 448(%rsp), %ymm10 ## 32-byte Reload vmovups 544(%rsp), %ymm5 ## 32-byte Reload vmovdqa 48(%rsp), %xmm6 ## 16-byte Reload vmovdqa 352(%rsp), %xmm7 ## 16-byte Reload jmp LBB2_358 .p2align 4, 0x90 LBB2_375: ## %pl_dolane.i14658 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm4, %rax vmovss (%rax), %xmm6 ## xmm6 = mem[0],zero,zero,zero vmovups 544(%rsp), %ymm7 ## 32-byte Reload vblendps $1, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0],ymm7[1,2,3,4,5,6,7] movl 2768(%rsp), %r13d LBB2_376: ## %pl_loopend.i14661 ## in Loop: Header=BB2_349 Depth=1 vpshufd $78, %xmm5, %xmm5 ## xmm5 = xmm5[2,3,0,1] movq %r9, %rsi andq $2, %rsi je LBB2_378 ## BB#377: ## %pl_dolane.1.i14665 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm4, %rax vinsertps $16, (%rax), %xmm7, %xmm6 ## xmm6 = xmm7[0],mem[0],xmm7[2,3] vblendps $15, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0,1,2,3],ymm7[4,5,6,7] LBB2_378: ## %pl_loopend.1.i14668 ## in Loop: Header=BB2_349 Depth=1 vmovaps LCPI2_9(%rip), %ymm10 ## ymm10 = [260046848,260046848,260046848,260046848,260046848,260046848,260046848,260046848] vmovaps LCPI2_14(%rip), %ymm13 ## ymm13 = [32768,32768,32768,32768,32768,32768,32768,32768] vmovdqa LCPI2_15(%rip), %xmm14 ## xmm14 = [260046848,260046848,260046848,260046848] vpmovsxdq %xmm5, %xmm5 movq %r9, %rdi andq $4, %rdi movl %ecx, 832(%rsp) ## 4-byte Spill je LBB2_380 ## BB#379: ## %pl_dolane.2.i14671 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm4, %xmm6 vmovq %xmm6, %rax vinsertps $32, (%rax), %xmm7, %xmm6 ## xmm6 = xmm7[0,1],mem[0],xmm7[3] vblendps $15, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0,1,2,3],ymm7[4,5,6,7] LBB2_380: ## %pl_loopend.2.i14674 ## in Loop: Header=BB2_349 Depth=1 vxorps %xmm11, %xmm11, %xmm11 vpaddq %xmm5, %xmm3, %xmm3 movq %r9, %rbp andq $8, %rbp je LBB2_382 ## BB#381: ## %pl_dolane.3.i14677 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm4, %xmm4 vpextrq $1, %xmm4, %rax vinsertps $48, (%rax), %xmm7, %xmm4 ## xmm4 = xmm7[0,1,2],mem[0] vblendps $15, %ymm4, %ymm7, %ymm7 ## ymm7 = ymm4[0,1,2,3],ymm7[4,5,6,7] LBB2_382: ## %pl_loopend.3.i14680 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm3, %ymm2, %ymm2 movq %r9, %rbx andq $16, %rbx je LBB2_384 ## BB#383: ## %pl_dolane.4.i14683 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm2, %rax vextractf128 $1, %ymm7, %xmm3 vmovss (%rax), %xmm4 ## xmm4 = mem[0],zero,zero,zero vblendps $1, %xmm4, %xmm3, %xmm3 ## xmm3 = xmm4[0],xmm3[1,2,3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB2_384: ## %pl_loopend.4.i14686 ## in Loop: Header=BB2_349 Depth=1 movq %r9, %rdx andq $32, %rdx je LBB2_386 ## BB#385: ## %pl_dolane.5.i14689 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm2, %rax vextractf128 $1, %ymm7, %xmm3 vinsertps $16, (%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB2_386: ## %pl_loopend.5.i14692 ## in Loop: Header=BB2_349 Depth=1 movq %r9, %rcx andq $64, %rcx vmovdqa %xmm9, -96(%rsp) ## 16-byte Spill je LBB2_388 ## BB#387: ## %pl_dolane.6.i14695 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm2, %xmm3 vmovq %xmm3, %rax vextractf128 $1, %ymm7, %xmm3 vinsertps $32, (%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB2_388: ## %pl_loopend.6.i14697 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm0, %ymm1, %ymm9 testb %r9b, %r9b vmovdqu %ymm8, -64(%rsp) ## 32-byte Spill jns LBB2_390 ## BB#389: ## %pl_dolane.7.i14700 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm2, %xmm0 vpextrq $1, %xmm0, %rax vextractf128 $1, %ymm7, %xmm0 vinsertps $48, (%rax), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[0] vinsertf128 $1, %xmm0, %ymm7, %ymm7 LBB2_390: ## %__gather64_float.exit ## in Loop: Header=BB2_349 Depth=1 vmovups %ymm7, 544(%rsp) ## 32-byte Spill vmovaps 896(%rsp), %xmm0 ## 16-byte Reload vpermilps $0, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,0,0] vpaddd %xmm9, %xmm9, %xmm3 vextractf128 $1, %ymm9, %xmm12 vpaddd %xmm12, %xmm12, %xmm4 vpmovsxdq %xmm4, %xmm0 vpmovsxdq %xmm3, %xmm2 vpshufd $78, %xmm3, %xmm3 ## xmm3 = xmm3[2,3,0,1] vpmovsxdq %xmm3, %xmm3 vmovq 8(%r15), %xmm5 ## xmm5 = mem[0],zero vpshufd $68, %xmm5, %xmm6 ## xmm6 = xmm5[0,1,0,1] vpaddq %xmm0, %xmm6, %xmm5 vpaddq %xmm3, %xmm6, %xmm7 vpaddq %xmm2, %xmm6, %xmm8 vinsertf128 $1, %xmm7, %ymm8, %ymm7 testq %r8, %r8 jne LBB2_392 ## BB#391: ## in Loop: Header=BB2_349 Depth=1 vmovdqa 48(%rsp), %xmm8 ## 16-byte Reload jmp LBB2_393 .p2align 4, 0x90 LBB2_392: ## %pl_dolane.i14584 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm7, %rax vmovdqa 48(%rsp), %xmm8 ## 16-byte Reload vpinsrw $0, (%rax), %xmm8, %xmm8 LBB2_393: ## %pl_loopend.i14587 ## in Loop: Header=BB2_349 Depth=1 vpshufd $78, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,0,1] testq %rsi, %rsi je LBB2_395 ## BB#394: ## %pl_dolane.1.i14593 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm7, %rax vpinsrw $1, (%rax), %xmm8, %xmm8 LBB2_395: ## %pl_loopend.1.i14596 ## in Loop: Header=BB2_349 Depth=1 vpmovsxdq %xmm4, %xmm4 testq %rdi, %rdi vmovups %ymm1, (%rsp) ## 32-byte Spill je LBB2_397 ## BB#396: ## %pl_dolane.2.i14601 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm7, %xmm1 vmovq %xmm1, %rax vpinsrw $2, (%rax), %xmm8, %xmm8 LBB2_397: ## %pl_loopend.2.i14604 ## in Loop: Header=BB2_349 Depth=1 vpaddq %xmm4, %xmm6, %xmm6 testq %rbp, %rbp je LBB2_399 ## BB#398: ## %pl_dolane.3.i14609 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm7, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $3, (%rax), %xmm8, %xmm8 LBB2_399: ## %pl_loopend.3.i14612 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm6, %ymm5, %ymm5 testq %rbx, %rbx je LBB2_401 ## BB#400: ## %pl_dolane.4.i14617 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm5, %rax vpinsrw $4, (%rax), %xmm8, %xmm8 LBB2_401: ## %pl_loopend.4.i14620 ## in Loop: Header=BB2_349 Depth=1 testq %rdx, %rdx je LBB2_403 ## BB#402: ## %pl_dolane.5.i14625 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm5, %rax vpinsrw $5, (%rax), %xmm8, %xmm8 LBB2_403: ## %pl_loopend.5.i14628 ## in Loop: Header=BB2_349 Depth=1 testq %rcx, %rcx je LBB2_405 ## BB#404: ## %pl_dolane.6.i14633 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm8, %xmm8 LBB2_405: ## %pl_loopend.6.i14635 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm4, %ymm0, %ymm0 vinsertf128 $1, %xmm3, %ymm2, %ymm4 testb %r9b, %r9b jns LBB2_407 ## BB#406: ## %pl_dolane.7.i14640 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm8, %xmm8 LBB2_407: ## %__gather64_i16.exit14641 ## in Loop: Header=BB2_349 Depth=1 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vpunpckhwd %xmm11, %xmm8, %xmm1 ## xmm1 = xmm8[4],xmm11[4],xmm8[5],xmm11[5],xmm8[6],xmm11[6],xmm8[7],xmm11[7] vpmovzxwd %xmm8, %xmm2 ## xmm2 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI2_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 384(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 256(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm2 vpaddd %xmm5, %xmm3, %xmm5 vmovdqu %ymm5, 288(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm3, %xmm3 vmovdqu %ymm3, 1088(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm2, %xmm3 vmovdqu %ymm3, 1120(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm6 vpslld $16, %xmm6, %xmm1 vmovdqu %ymm1, 320(%rsp) ## 32-byte Spill vmovups %ymm2, 224(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm2, %xmm1 vmovdqu %ymm1, 64(%rsp) ## 32-byte Spill vmovq 16(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vextractf128 $1, %ymm4, %xmm7 vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm5 vpaddq %xmm0, %xmm3, %xmm2 testq %r8, %r8 je LBB2_409 ## BB#408: ## %pl_dolane.i14508 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm5, %rax vmovdqa 416(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 416(%rsp) ## 16-byte Spill LBB2_409: ## %pl_loopend.i14511 ## in Loop: Header=BB2_349 Depth=1 testq %rsi, %rsi vmovups %ymm6, 128(%rsp) ## 32-byte Spill je LBB2_411 ## BB#410: ## %pl_dolane.1.i14517 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm5, %rax vmovdqa 416(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 416(%rsp) ## 16-byte Spill LBB2_411: ## %pl_loopend.1.i14520 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm0, %xmm6 testq %rdi, %rdi je LBB2_413 ## BB#412: ## %pl_dolane.2.i14525 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vmovdqa 416(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 416(%rsp) ## 16-byte Spill LBB2_413: ## %pl_loopend.2.i14528 ## in Loop: Header=BB2_349 Depth=1 vpaddq %xmm6, %xmm3, %xmm3 testq %rbp, %rbp je LBB2_415 ## BB#414: ## %pl_dolane.3.i14533 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa 416(%rsp), %xmm1 ## 16-byte Reload vpinsrw $3, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 416(%rsp) ## 16-byte Spill LBB2_415: ## %pl_loopend.3.i14536 ## in Loop: Header=BB2_349 Depth=1 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vinsertf128 $1, %xmm3, %ymm2, %ymm2 testq %rbx, %rbx jne LBB2_417 ## BB#416: ## in Loop: Header=BB2_349 Depth=1 vmovdqa 416(%rsp), %xmm3 ## 16-byte Reload jmp LBB2_418 .p2align 4, 0x90 LBB2_417: ## %pl_dolane.4.i14541 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm2, %rax vmovdqa 416(%rsp), %xmm3 ## 16-byte Reload vpinsrw $4, (%rax), %xmm3, %xmm3 LBB2_418: ## %pl_loopend.4.i14544 ## in Loop: Header=BB2_349 Depth=1 testq %rdx, %rdx je LBB2_420 ## BB#419: ## %pl_dolane.5.i14549 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm2, %rax vpinsrw $5, (%rax), %xmm3, %xmm3 LBB2_420: ## %pl_loopend.5.i14552 ## in Loop: Header=BB2_349 Depth=1 testq %rcx, %rcx je LBB2_422 ## BB#421: ## %pl_dolane.6.i14557 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm2, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm3, %xmm3 LBB2_422: ## %pl_loopend.6.i14559 ## in Loop: Header=BB2_349 Depth=1 testb %r9b, %r9b jns LBB2_424 ## BB#423: ## %pl_dolane.7.i14564 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm2, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm3, %xmm3 LBB2_424: ## %__gather64_i16.exit14565 ## in Loop: Header=BB2_349 Depth=1 vpunpckhwd %xmm11, %xmm3, %xmm1 ## xmm1 = xmm3[4],xmm11[4],xmm3[5],xmm11[5],xmm3[6],xmm11[6],xmm3[7],xmm11[7] vmovdqa %xmm3, 416(%rsp) ## 16-byte Spill vpmovzxwd %xmm3, %xmm2 ## xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI2_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 736(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 704(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm2 vpaddd %xmm5, %xmm3, %xmm5 vmovdqu %ymm5, 992(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm3, %xmm3 vmovdqu %ymm3, 928(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm2, %xmm3 vmovdqu %ymm3, 1376(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm1 vmovups %ymm1, 1056(%rsp) ## 32-byte Spill vpslld $16, %xmm1, %xmm1 vmovdqu %ymm1, 768(%rsp) ## 32-byte Spill vmovups %ymm2, 1024(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm2, %xmm1 vmovdqu %ymm1, 960(%rsp) ## 32-byte Spill vmovq 24(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm5 vpaddq %xmm0, %xmm3, %xmm2 testq %r8, %r8 je LBB2_426 ## BB#425: ## %pl_dolane.i14432 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm5, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_426: ## %pl_loopend.i14435 ## in Loop: Header=BB2_349 Depth=1 testq %rsi, %rsi je LBB2_428 ## BB#427: ## %pl_dolane.1.i14441 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm5, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_428: ## %pl_loopend.1.i14444 ## in Loop: Header=BB2_349 Depth=1 testq %rdi, %rdi je LBB2_430 ## BB#429: ## %pl_dolane.2.i14449 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_430: ## %pl_loopend.2.i14452 ## in Loop: Header=BB2_349 Depth=1 vpaddq %xmm6, %xmm3, %xmm3 testq %rbp, %rbp je LBB2_432 ## BB#431: ## %pl_dolane.3.i14457 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa -16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $3, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, -16(%rsp) ## 16-byte Spill LBB2_432: ## %pl_loopend.3.i14460 ## in Loop: Header=BB2_349 Depth=1 vmovdqa LCPI2_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vinsertf128 $1, %xmm3, %ymm2, %ymm2 testq %rbx, %rbx jne LBB2_434 ## BB#433: ## in Loop: Header=BB2_349 Depth=1 vmovdqa -16(%rsp), %xmm3 ## 16-byte Reload jmp LBB2_435 .p2align 4, 0x90 LBB2_434: ## %pl_dolane.4.i14465 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm2, %rax vmovdqa -16(%rsp), %xmm3 ## 16-byte Reload vpinsrw $4, (%rax), %xmm3, %xmm3 LBB2_435: ## %pl_loopend.4.i14468 ## in Loop: Header=BB2_349 Depth=1 testq %rdx, %rdx je LBB2_437 ## BB#436: ## %pl_dolane.5.i14473 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm2, %rax vpinsrw $5, (%rax), %xmm3, %xmm3 LBB2_437: ## %pl_loopend.5.i14476 ## in Loop: Header=BB2_349 Depth=1 testq %rcx, %rcx je LBB2_439 ## BB#438: ## %pl_dolane.6.i14481 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm2, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm3, %xmm3 LBB2_439: ## %pl_loopend.6.i14483 ## in Loop: Header=BB2_349 Depth=1 testb %r9b, %r9b jns LBB2_441 ## BB#440: ## %pl_dolane.7.i14488 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm2, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm3, %xmm3 LBB2_441: ## %__gather64_i16.exit14489 ## in Loop: Header=BB2_349 Depth=1 vpunpckhwd %xmm11, %xmm3, %xmm1 ## xmm1 = xmm3[4],xmm11[4],xmm3[5],xmm11[5],xmm3[6],xmm11[6],xmm3[7],xmm11[7] vmovdqa %xmm3, -16(%rsp) ## 16-byte Spill vpmovzxwd %xmm3, %xmm2 ## xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI2_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 2464(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 2592(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm15 vpaddd %xmm5, %xmm3, %xmm2 vmovdqu %ymm2, 2528(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm3, %xmm2 vmovdqu %ymm2, 2400(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm15, %xmm2 vmovdqu %ymm2, 2432(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm1 vmovups %ymm1, 896(%rsp) ## 32-byte Spill vpslld $16, %xmm1, %xmm1 vmovdqu %ymm1, 1440(%rsp) ## 32-byte Spill vmovq 32(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm4 vmovups %ymm15, 2560(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm15, %xmm1 vpaddq %xmm0, %xmm3, %xmm0 testq %r8, %r8 vmovdqu %ymm1, 2496(%rsp) ## 32-byte Spill je LBB2_443 ## BB#442: ## %pl_dolane.i14357 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm4, %rax vmovdqa 176(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 176(%rsp) ## 16-byte Spill LBB2_443: ## %pl_loopend.i14360 ## in Loop: Header=BB2_349 Depth=1 testq %rsi, %rsi je LBB2_445 ## BB#444: ## %pl_dolane.1.i14366 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm4, %rax vmovdqa 176(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 176(%rsp) ## 16-byte Spill LBB2_445: ## %pl_loopend.1.i14369 ## in Loop: Header=BB2_349 Depth=1 testq %rdi, %rdi je LBB2_447 ## BB#446: ## %pl_dolane.2.i14374 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm4, %xmm1 vmovq %xmm1, %rax vmovdqa 176(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 176(%rsp) ## 16-byte Spill LBB2_447: ## %pl_loopend.2.i14377 ## in Loop: Header=BB2_349 Depth=1 vpaddq %xmm6, %xmm3, %xmm2 testq %rbp, %rbp jne LBB2_449 ## BB#448: ## in Loop: Header=BB2_349 Depth=1 vmovdqa 176(%rsp), %xmm6 ## 16-byte Reload jmp LBB2_450 .p2align 4, 0x90 LBB2_449: ## %pl_dolane.3.i14382 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm4, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa 176(%rsp), %xmm6 ## 16-byte Reload vpinsrw $3, (%rax), %xmm6, %xmm6 LBB2_450: ## %pl_loopend.3.i14385 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm2, %ymm0, %ymm0 testq %rbx, %rbx je LBB2_452 ## BB#451: ## %pl_dolane.4.i14390 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm0, %rax vpinsrw $4, (%rax), %xmm6, %xmm6 LBB2_452: ## %pl_loopend.4.i14393 ## in Loop: Header=BB2_349 Depth=1 testq %rdx, %rdx je LBB2_454 ## BB#453: ## %pl_dolane.5.i14398 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm0, %rax vpinsrw $5, (%rax), %xmm6, %xmm6 LBB2_454: ## %pl_loopend.5.i14401 ## in Loop: Header=BB2_349 Depth=1 testq %rcx, %rcx je LBB2_456 ## BB#455: ## %pl_dolane.6.i14406 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm0, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm6, %xmm6 LBB2_456: ## %pl_loopend.6.i14408 ## in Loop: Header=BB2_349 Depth=1 testb %r9b, %r9b vmovdqa %xmm8, 48(%rsp) ## 16-byte Spill jns LBB2_458 ## BB#457: ## %pl_dolane.7.i14413 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm0, %xmm0 vpextrq $1, %xmm0, %rax vpinsrw $7, (%rax), %xmm6, %xmm6 LBB2_458: ## %__gather64_i16.exit ## in Loop: Header=BB2_349 Depth=1 vmovups -64(%rsp), %ymm8 ## 32-byte Reload vpunpckhwd %xmm11, %xmm6, %xmm0 ## xmm0 = xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7] vpmovzxwd %xmm6, %xmm1 ## xmm1 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero vinsertf128 $1, %xmm0, %ymm1, %ymm0 vandps LCPI2_8(%rip), %ymm0, %ymm1 vpslld $13, %xmm1, %xmm2 vextractf128 $1, %ymm1, %xmm1 vpslld $13, %xmm1, %xmm1 vmovdqa %xmm1, 2208(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm1, %ymm2, %ymm15 vandps %ymm10, %ymm15, %ymm1 vpaddd %xmm5, %xmm2, %xmm3 vmovdqu %ymm3, 2176(%rsp) ## 32-byte Spill vpaddd LCPI2_12(%rip), %xmm2, %xmm2 vmovdqu %ymm2, 2144(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm1, %xmm2 vmovdqu %ymm2, 2048(%rsp) ## 32-byte Spill vandps %ymm13, %ymm0, %ymm0 vpslld $16, %xmm0, %xmm11 vmovups %ymm1, 2240(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm1, %xmm13 vpmovsxdq %xmm12, %xmm10 vpmovsxdq %xmm9, %xmm4 vpshufd $78, %xmm9, %xmm1 ## xmm1 = xmm9[2,3,0,1] vpmovsxdq %xmm1, %xmm5 vmovq 40(%r15), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm7 ## xmm7 = xmm1[0,1,0,1] vpaddq %xmm5, %xmm7, %xmm1 vpaddq %xmm4, %xmm7, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm9 vpaddq %xmm10, %xmm7, %xmm3 testq %r8, %r8 vmovups %ymm0, 2336(%rsp) ## 32-byte Spill jne LBB2_460 ## BB#459: ## in Loop: Header=BB2_349 Depth=1 vmovups 448(%rsp), %ymm14 ## 32-byte Reload vmovdqa 1344(%rsp), %xmm0 ## 16-byte Reload jmp LBB2_461 .p2align 4, 0x90 LBB2_460: ## %pl_dolane.i14281 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm9, %rax movzbl (%rax), %eax vmovdqa 1344(%rsp), %xmm0 ## 16-byte Reload vpinsrw $0, %eax, %xmm0, %xmm0 vmovups 448(%rsp), %ymm14 ## 32-byte Reload LBB2_461: ## %pl_loopend.i14284 ## in Loop: Header=BB2_349 Depth=1 vpshufd $78, %xmm12, %xmm1 ## xmm1 = xmm12[2,3,0,1] testq %rsi, %rsi je LBB2_463 ## BB#462: ## %pl_dolane.1.i14290 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm9, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm0, %xmm0 LBB2_463: ## %pl_loopend.1.i14293 ## in Loop: Header=BB2_349 Depth=1 vpmovsxdq %xmm1, %xmm1 testq %rdi, %rdi je LBB2_465 ## BB#464: ## %pl_dolane.2.i14298 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm9, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $2, %eax, %xmm0, %xmm0 LBB2_465: ## %pl_loopend.2.i14301 ## in Loop: Header=BB2_349 Depth=1 vpaddq %xmm1, %xmm7, %xmm2 testq %rbp, %rbp je LBB2_467 ## BB#466: ## %pl_dolane.3.i14306 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm9, %xmm7 vpextrq $1, %xmm7, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm0, %xmm0 LBB2_467: ## %pl_loopend.3.i14309 ## in Loop: Header=BB2_349 Depth=1 vmovaps -96(%rsp), %xmm7 ## 16-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm3 testq %rbx, %rbx je LBB2_469 ## BB#468: ## %pl_dolane.4.i14314 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm3, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm0, %xmm0 LBB2_469: ## %pl_loopend.4.i14317 ## in Loop: Header=BB2_349 Depth=1 testq %rdx, %rdx je LBB2_471 ## BB#470: ## %pl_dolane.5.i14322 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm0, %xmm0 LBB2_471: ## %pl_loopend.5.i14325 ## in Loop: Header=BB2_349 Depth=1 testq %rcx, %rcx vmovdqu %ymm11, 2304(%rsp) ## 32-byte Spill je LBB2_473 ## BB#472: ## %pl_dolane.6.i14330 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm3, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm0, %xmm0 LBB2_473: ## %pl_loopend.6.i14332 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm7, %ymm8, %ymm7 vinsertf128 $1, %xmm1, %ymm10, %ymm11 vinsertf128 $1, %xmm5, %ymm4, %ymm4 testb %r9b, %r9b vmovups %ymm15, 2368(%rsp) ## 32-byte Spill jns LBB2_475 ## BB#474: ## %pl_dolane.7.i14337 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm3, %xmm1 vpextrq $1, %xmm1, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm0, %xmm0 LBB2_475: ## %__gather64_i8.exit14338 ## in Loop: Header=BB2_349 Depth=1 vmovdqa 1312(%rsp), %xmm5 ## 16-byte Reload vcvtdq2ps %ymm7, %ymm9 vpand LCPI2_53(%rip), %xmm0, %xmm1 vpmovzxwd %xmm1, %xmm10 ## xmm10 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero vmovq 48(%r15), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm3 ## xmm3 = xmm2[0,1,0,1] vpaddq %xmm11, %xmm3, %xmm7 vextractf128 $1, %ymm4, %xmm8 vpaddq %xmm8, %xmm3, %xmm2 vpaddq %xmm4, %xmm3, %xmm12 vinsertf128 $1, %xmm2, %ymm12, %ymm15 testq %r8, %r8 vmovdqa %xmm1, -64(%rsp) ## 16-byte Spill je LBB2_477 ## BB#476: ## %pl_dolane.i14213 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm15, %rax movzbl (%rax), %eax vpinsrw $0, %eax, %xmm5, %xmm5 LBB2_477: ## %pl_loopend.i14216 ## in Loop: Header=BB2_349 Depth=1 vmovups 544(%rsp), %ymm1 ## 32-byte Reload vaddps LCPI2_5(%rip), %ymm9, %ymm2 testq %rsi, %rsi vmovdqu %ymm13, 2272(%rsp) ## 32-byte Spill je LBB2_479 ## BB#478: ## %pl_dolane.1.i14222 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm15, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm5, %xmm5 LBB2_479: ## %pl_loopend.1.i14225 ## in Loop: Header=BB2_349 Depth=1 vmulps 1408(%rsp), %ymm2, %ymm9 ## 32-byte Folded Reload vsubps 1856(%rsp), %ymm1, %ymm13 ## 32-byte Folded Reload vextractf128 $1, %ymm11, %xmm12 testq %rdi, %rdi je LBB2_481 ## BB#480: ## %pl_dolane.2.i14230 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm15, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $2, %eax, %xmm5, %xmm5 LBB2_481: ## %pl_loopend.2.i14233 ## in Loop: Header=BB2_349 Depth=1 vaddps LCPI2_6(%rip), %ymm9, %ymm9 vmovups 1888(%rsp), %ymm1 ## 32-byte Reload vdivps %ymm13, %ymm1, %ymm1 vmovups %ymm1, 192(%rsp) ## 32-byte Spill vmovups (%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm1, %ymm13 vpaddq %xmm12, %xmm3, %xmm3 testq %rbp, %rbp je LBB2_483 ## BB#482: ## %pl_dolane.3.i14238 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm15, %xmm2 vpextrq $1, %xmm2, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm5, %xmm5 LBB2_483: ## %pl_loopend.3.i14241 ## in Loop: Header=BB2_349 Depth=1 vmovups 192(%rsp), %ymm1 ## 32-byte Reload vmulps %ymm1, %ymm9, %ymm9 vmulps %ymm1, %ymm13, %ymm13 vinsertf128 $1, %xmm3, %ymm7, %ymm3 testq %rbx, %rbx je LBB2_485 ## BB#484: ## %pl_dolane.4.i14246 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm3, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm5, %xmm5 LBB2_485: ## %pl_loopend.4.i14249 ## in Loop: Header=BB2_349 Depth=1 vdivps 1824(%rsp), %ymm9, %ymm1 ## 32-byte Folded Reload vmovups %ymm1, 96(%rsp) ## 32-byte Spill vdivps 1792(%rsp), %ymm13, %ymm1 ## 32-byte Folded Reload vmovups %ymm1, (%rsp) ## 32-byte Spill testq %rdx, %rdx vmovdqa %xmm0, 1344(%rsp) ## 16-byte Spill vmovdqu %ymm10, -96(%rsp) ## 32-byte Spill je LBB2_487 ## BB#486: ## %pl_dolane.5.i14254 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm5, %xmm5 LBB2_487: ## %pl_loopend.5.i14257 ## in Loop: Header=BB2_349 Depth=1 vmovups 480(%rsp), %ymm10 ## 32-byte Reload vmovups 96(%rsp), %ymm0 ## 32-byte Reload vmulps %ymm0, %ymm0, %ymm7 vmovups (%rsp), %ymm0 ## 32-byte Reload vmulps %ymm0, %ymm0, %ymm2 testq %rcx, %rcx vmovdqa %xmm6, 176(%rsp) ## 16-byte Spill je LBB2_489 ## BB#488: ## %pl_dolane.6.i14262 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm3, %xmm6 vmovq %xmm6, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm5, %xmm5 LBB2_489: ## %pl_loopend.6.i14264 ## in Loop: Header=BB2_349 Depth=1 vaddps %ymm2, %ymm7, %ymm7 vmovups 192(%rsp), %ymm0 ## 32-byte Reload vmulps %ymm0, %ymm0, %ymm2 testb %r9b, %r9b js LBB2_491 ## BB#490: ## in Loop: Header=BB2_349 Depth=1 vmovdqa %xmm5, %xmm0 jmp LBB2_492 LBB2_491: ## %pl_dolane.7.i14269 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm3, %xmm3 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vmovdqa %xmm5, %xmm0 vpinsrw $7, %eax, %xmm0, %xmm0 LBB2_492: ## %__gather64_i8.exit14270 ## in Loop: Header=BB2_349 Depth=1 vaddps %ymm7, %ymm2, %ymm9 vmovdqa %xmm0, 1312(%rsp) ## 16-byte Spill vpand LCPI2_53(%rip), %xmm0, %xmm13 vpmovzxwd %xmm13, %xmm15 ## xmm15 = xmm13[0],zero,xmm13[1],zero,xmm13[2],zero,xmm13[3],zero vmovq 56(%r15), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm3 ## xmm3 = xmm2[0,1,0,1] vpaddq %xmm11, %xmm3, %xmm0 vpaddq %xmm8, %xmm3, %xmm2 vpaddq %xmm4, %xmm3, %xmm4 vinsertf128 $1, %xmm2, %ymm4, %ymm5 testq %r8, %r8 jne LBB2_494 ## BB#493: ## in Loop: Header=BB2_349 Depth=1 vmovdqa 352(%rsp), %xmm7 ## 16-byte Reload jmp LBB2_495 .p2align 4, 0x90 LBB2_494: ## %pl_dolane.i14168 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm5, %rax movzbl (%rax), %eax vmovdqa 352(%rsp), %xmm7 ## 16-byte Reload vpinsrw $0, %eax, %xmm7, %xmm7 LBB2_495: ## %pl_loopend.i14171 ## in Loop: Header=BB2_349 Depth=1 vmovups 192(%rsp), %ymm8 ## 32-byte Reload vrsqrtps %ymm9, %ymm4 testq %rsi, %rsi je LBB2_497 ## BB#496: ## %pl_dolane.1.i14173 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm5, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm7, %xmm7 LBB2_497: ## %pl_loopend.1.i14176 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm9, %ymm4, %ymm2 testq %rdi, %rdi je LBB2_499 ## BB#498: ## %pl_dolane.2.i14178 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm6 vmovq %xmm6, %rax movzbl (%rax), %eax vpinsrw $2, %eax, %xmm7, %xmm7 LBB2_499: ## %pl_loopend.2.i14181 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm2, %ymm4, %ymm9 vpaddq %xmm12, %xmm3, %xmm2 testq %rbp, %rbp je LBB2_501 ## BB#500: ## %pl_dolane.3.i14183 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm5, %xmm3 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm7, %xmm7 LBB2_501: ## %pl_loopend.3.i14186 ## in Loop: Header=BB2_349 Depth=1 vmovups 608(%rsp), %ymm6 ## 32-byte Reload vmovaps LCPI2_7(%rip), %ymm1 ## ymm1 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm9, %ymm1, %ymm3 vinsertf128 $1, %xmm2, %ymm0, %ymm0 testq %rbx, %rbx je LBB2_503 ## BB#502: ## %pl_dolane.4.i14188 ## in Loop: Header=BB2_349 Depth=1 vmovq %xmm0, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm7, %xmm7 LBB2_503: ## %pl_loopend.4.i14191 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm3, %ymm4, %ymm2 testq %rdx, %rdx je LBB2_505 ## BB#504: ## %pl_dolane.5.i14193 ## in Loop: Header=BB2_349 Depth=1 vpextrq $1, %xmm0, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm7, %xmm7 LBB2_505: ## %pl_loopend.5.i14196 ## in Loop: Header=BB2_349 Depth=1 vmulps LCPI2_5(%rip), %ymm2, %ymm3 testq %rcx, %rcx je LBB2_507 ## BB#506: ## %pl_dolane.6.i14198 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm0, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm7, %xmm7 LBB2_507: ## %pl_loopend.6.i14200 ## in Loop: Header=BB2_349 Depth=1 vmulps 96(%rsp), %ymm3, %ymm4 ## 32-byte Folded Reload vmulps (%rsp), %ymm3, %ymm5 ## 32-byte Folded Reload vmulps %ymm3, %ymm8, %ymm3 testb %r9b, %r9b jns LBB2_509 ## BB#508: ## %pl_dolane.7.i14202 ## in Loop: Header=BB2_349 Depth=1 vextractf128 $1, %ymm0, %xmm0 vpextrq $1, %xmm0, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm7, %xmm7 LBB2_509: ## %__gather64_i8.exit ## in Loop: Header=BB2_349 Depth=1 vmovups 512(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm0, %ymm4, %ymm14, %ymm14 vmovups %ymm14, 448(%rsp) ## 32-byte Spill vblendvps %ymm0, %ymm5, %ymm10, %ymm10 vmovups %ymm10, 480(%rsp) ## 32-byte Spill vblendvps %ymm0, %ymm3, %ymm6, %ymm6 vmovups %ymm6, 608(%rsp) ## 32-byte Spill vpand LCPI2_53(%rip), %xmm7, %xmm10 vpmovzxwd %xmm10, %xmm0 ## xmm0 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero testl %r13d, %r13d vmovdqa %xmm7, 352(%rsp) ## 16-byte Spill movq %r8, 576(%rsp) ## 8-byte Spill movq %rsi, 640(%rsp) ## 8-byte Spill movq %rdi, 1280(%rsp) ## 8-byte Spill movq %rbp, 1248(%rsp) ## 8-byte Spill movq %rbx, 1216(%rsp) ## 8-byte Spill movq %rdx, 1184(%rsp) ## 8-byte Spill movq %rcx, 1152(%rsp) ## 8-byte Spill jle LBB2_510 ## BB#558: ## %for_loop5259.lr.ph ## in Loop: Header=BB2_349 Depth=1 vmovdqu %ymm0, 2656(%rsp) ## 32-byte Spill vmovdqa LCPI2_12(%rip), %xmm0 ## xmm0 = [947912704,947912704,947912704,947912704] vmovdqa %xmm0, %xmm5 vmovdqa 384(%rsp), %xmm0 ## 16-byte Reload vpaddd %xmm5, %xmm0, %xmm2 vmovups 1088(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm1, %ymm2 vmovdqa LCPI2_10(%rip), %xmm4 ## xmm4 = [939524096,939524096,939524096,939524096] vmovdqa %xmm4, %xmm8 vpaddd %xmm8, %xmm0, %xmm3 vmovups 288(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm0, %ymm3 vmovups 224(%rsp), %ymm0 ## 32-byte Reload vextractf128 $1, %ymm0, %xmm4 vxorps %xmm0, %xmm0, %xmm0 vpcmpeqd %xmm0, %xmm4, %xmm6 vmovups 1120(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vmovaps LCPI2_13(%rip), %ymm14 ## ymm14 = [-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05] vaddps %ymm14, %ymm2, %ymm2 vblendvps %ymm6, %ymm2, %ymm3, %ymm2 vmovdqa 736(%rsp), %xmm1 ## 16-byte Reload vpaddd %xmm5, %xmm1, %xmm3 vmovups 928(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 vpaddd %xmm8, %xmm1, %xmm6 vmovups 992(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vmovups 1024(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm1 vpcmpeqd %xmm0, %xmm1, %xmm7 vmovups 1376(%rsp), %ymm11 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm11, %ymm7 vaddps %ymm14, %ymm3, %ymm3 vblendvps %ymm7, %ymm3, %ymm6, %ymm6 vpcmpeqd LCPI2_15(%rip), %xmm4, %xmm3 vmovups 64(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm4, %ymm3 vmovdqu %ymm15, 64(%rsp) ## 32-byte Spill vmovdqa %xmm5, %xmm15 vmovaps LCPI2_11(%rip), %ymm11 ## ymm11 = [1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192] vorps 256(%rsp), %ymm11, %ymm5 ## 32-byte Folded Reload vblendvps %ymm3, %ymm5, %ymm2, %ymm12 vpcmpeqd LCPI2_15(%rip), %xmm1, %xmm1 vmovups 960(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps 704(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm6, %ymm9 vmovdqa 2464(%rsp), %xmm2 ## 16-byte Reload vpaddd %xmm15, %xmm2, %xmm1 vmovups 2400(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm3, %ymm1 vpaddd %xmm8, %xmm2, %xmm2 vmovups 2528(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups 2560(%rsp), %ymm3 ## 32-byte Reload vextractf128 $1, %ymm3, %xmm6 vpcmpeqd %xmm0, %xmm6, %xmm7 vmovups 2432(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm3, %ymm7 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm7, %ymm1, %ymm2, %ymm1 vmovdqa 2208(%rsp), %xmm3 ## 16-byte Reload vpaddd %xmm15, %xmm3, %xmm2 vmovups 2144(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm4, %ymm2 vpaddd %xmm8, %xmm3, %xmm7 vmovups 2176(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm3, %ymm7 vmovups 2240(%rsp), %ymm3 ## 32-byte Reload vextractf128 $1, %ymm3, %xmm4 vpcmpeqd %xmm0, %xmm4, %xmm3 vmovups 2048(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm5, %ymm3 vaddps %ymm14, %ymm2, %ymm2 vmovdqa LCPI2_15(%rip), %xmm5 ## xmm5 = [260046848,260046848,260046848,260046848] vmovdqa %xmm5, %xmm14 vblendvps %ymm3, %ymm2, %ymm7, %ymm2 vpcmpeqd %xmm14, %xmm6, %xmm3 vmovups 2496(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm5, %ymm3 vorps 2592(%rsp), %ymm11, %ymm6 ## 32-byte Folded Reload vblendvps %ymm3, %ymm6, %ymm1, %ymm8 vmovups 128(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm3 vpslld $16, %xmm3, %xmm3 vmovups 320(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm1, %ymm3 vmovups 1056(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm6 vpslld $16, %xmm6, %xmm6 vmovups 768(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vorps %ymm3, %ymm12, %ymm3 vorps %ymm6, %ymm9, %ymm5 vpcmpeqd %xmm14, %xmm4, %xmm4 vmovups 2272(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm4, %ymm1, %ymm4 vorps 2368(%rsp), %ymm11, %ymm6 ## 32-byte Folded Reload vblendvps %ymm4, %ymm6, %ymm2, %ymm2 vmulps %ymm3, %ymm3, %ymm4 vmulps %ymm5, %ymm5, %ymm6 vsubps %ymm4, %ymm3, %ymm4 vsubps %ymm6, %ymm5, %ymm6 vaddps %ymm6, %ymm4, %ymm4 vmovdqa -64(%rsp), %xmm1 ## 16-byte Reload vpunpckhwd %xmm0, %xmm1, %xmm6 ## xmm6 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] vmovups -96(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vpunpckhwd %xmm0, %xmm13, %xmm7 ## xmm7 = xmm13[4],xmm0[4],xmm13[5],xmm0[5],xmm13[6],xmm0[6],xmm13[7],xmm0[7] vxorps %xmm12, %xmm12, %xmm12 vmovups 64(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm0, %ymm7 vmovups 896(%rsp), %ymm0 ## 32-byte Reload vextractf128 $1, %ymm0, %xmm0 vpslld $16, %xmm0, %xmm0 vmovups 1440(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vmovups 2336(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm1 vpslld $16, %xmm1, %xmm1 vmovups 2304(%rsp), %ymm11 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm11, %ymm1 vmovaps LCPI2_16(%rip), %ymm11 ## ymm11 = [4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00] vmovaps %ymm11, %ymm13 vmulps %ymm13, %ymm4, %ymm11 vaddps LCPI2_6(%rip), %ymm11, %ymm11 vmulps %ymm13, %ymm3, %ymm3 vmulps %ymm13, %ymm5, %ymm5 vsqrtps %ymm11, %ymm11 vmovaps LCPI2_17(%rip), %ymm13 ## ymm13 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00] vaddps %ymm13, %ymm3, %ymm3 vaddps %ymm13, %ymm5, %ymm5 vmulps %ymm11, %ymm3, %ymm3 vmovups %ymm3, 128(%rsp) ## 32-byte Spill vmulps %ymm5, %ymm11, %ymm14 vmovups 192(%rsp), %ymm11 ## 32-byte Reload vmulps LCPI2_18(%rip), %ymm4, %ymm3 vcvtdq2ps %ymm6, %ymm4 vcvtdq2ps %ymm7, %ymm5 vmovaps LCPI2_7(%rip), %ymm6 ## ymm6 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm3, %ymm6, %ymm3 vmovups %ymm3, 320(%rsp) ## 32-byte Spill vorps %ymm0, %ymm8, %ymm0 vorps %ymm1, %ymm2, %ymm13 vmovaps LCPI2_19(%rip), %ymm1 ## ymm1 = [3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03] vmovaps %ymm1, %ymm2 vmulps %ymm2, %ymm4, %ymm1 vmovups %ymm1, 256(%rsp) ## 32-byte Spill vmulps %ymm2, %ymm5, %ymm1 vmovups %ymm1, 64(%rsp) ## 32-byte Spill vpunpckhwd %xmm12, %xmm10, %xmm1 ## xmm1 = xmm10[4],xmm12[4],xmm10[5],xmm12[5],xmm10[6],xmm12[6],xmm10[7],xmm12[7] vmovups 2656(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm3, %ymm1 vcvtdq2ps %ymm1, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 224(%rsp) ## 32-byte Spill movq 64(%r15), %r14 movq 72(%r15), %r8 movq 80(%r15), %rcx movq 120(%r15), %rdi vaddps LCPI2_20(%rip), %ymm13, %ymm1 vmulps LCPI2_21(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vmovups %ymm0, 288(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill movq 2760(%rsp), %r12 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -96(%rsp) ## 32-byte Spill vxorps %ymm15, %ymm15, %ymm15 movl 832(%rsp), %ebp ## 4-byte Reload vmovups 96(%rsp), %ymm10 ## 32-byte Reload .p2align 4, 0x90 LBB2_559: ## %for_loop5259 ## Parent Loop BB2_349 Depth=1 ## => This Inner Loop Header: Depth=2 movslq (%r12), %rsi vmovss (%rdi,%rsi,4), %xmm1 ## xmm1 = mem[0],zero,zero,zero vbroadcastss (%r14,%rsi,4), %ymm0 vsubps %ymm10, %ymm0, %ymm3 vbroadcastss (%r8,%rsi,4), %ymm0 vsubps (%rsp), %ymm0, %ymm2 ## 32-byte Folded Reload vbroadcastss (%rcx,%rsi,4), %ymm0 vsubps %ymm11, %ymm0, %ymm0 vmulps %ymm3, %ymm3, %ymm4 vmulps %ymm2, %ymm2, %ymm5 vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm0, %ymm0, %ymm5 vaddps %ymm5, %ymm4, %ymm4 vmulss %xmm1, %xmm1, %xmm5 vpermilps $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 cmpl $255, %r9d jne LBB2_566 ## BB#560: ## %cif_mask_all5329 ## in Loop: Header=BB2_559 Depth=2 vcmpnleps %ymm4, %ymm5, %ymm6 vmovmskps %ymm6, %eax testl %eax, %eax je LBB2_567 ## BB#561: ## %cif_mask_all5329 ## in Loop: Header=BB2_559 Depth=2 cmpl $255, %eax jne LBB2_571 ## BB#562: ## %cif_mask_all5371 ## in Loop: Header=BB2_559 Depth=2 vsqrtps %ymm4, %ymm6 vrcpps %ymm6, %ymm4 vmulps %ymm4, %ymm6, %ymm5 vmovaps LCPI2_20(%rip), %ymm7 ## ymm7 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm5, %ymm7, %ymm5 vmulps %ymm5, %ymm4, %ymm7 vmulps %ymm7, %ymm3, %ymm5 vmulps %ymm7, %ymm2, %ymm4 vmulps %ymm7, %ymm0, %ymm3 vmovups 128(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm5, %ymm9, %ymm0 vmulps %ymm4, %ymm14, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 320(%rsp), %ymm12 ## 32-byte Reload vmulps %ymm3, %ymm12, %ymm2 vaddps %ymm0, %ymm2, %ymm0 vcmpnleps LCPI2_55(%rip), %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB2_567 ## BB#563: ## %cif_mask_all5371 ## in Loop: Header=BB2_559 Depth=2 cmpl $255, %eax jne LBB2_568 ## BB#564: ## %cif_test_all5380 ## in Loop: Header=BB2_559 Depth=2 movq 88(%r15), %rax vsubss (%rax,%rsi,4), %xmm1, %xmm2 vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm2, %ymm1, %ymm10 vsubps 448(%rsp), %ymm5, %ymm2 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 608(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm5 vmulps %ymm4, %ymm4, %ymm6 vaddps %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm3, %ymm6 vaddps %ymm5, %ymm6, %ymm5 jmp LBB2_565 .p2align 4, 0x90 LBB2_566: ## %cif_mask_mixed5330 ## in Loop: Header=BB2_559 Depth=2 vcmpnleps %ymm4, %ymm5, %ymm5 vandps 512(%rsp), %ymm5, %ymm7 ## 32-byte Folded Reload vmovmskps %ymm7, %eax testl %eax, %eax je LBB2_567 ## BB#574: ## %safe_if_run_true6201 ## in Loop: Header=BB2_559 Depth=2 vsqrtps %ymm4, %ymm6 vrcpps %ymm6, %ymm4 vmulps %ymm4, %ymm6, %ymm5 vmovaps LCPI2_20(%rip), %ymm8 ## ymm8 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm5, %ymm8, %ymm5 vmulps %ymm5, %ymm4, %ymm8 vmulps %ymm8, %ymm3, %ymm4 vblendvps %ymm7, %ymm4, %ymm3, %ymm5 vmulps %ymm8, %ymm2, %ymm3 vblendvps %ymm7, %ymm3, %ymm2, %ymm4 vmulps %ymm8, %ymm0, %ymm2 vblendvps %ymm7, %ymm2, %ymm0, %ymm3 vmovups 128(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm5, %ymm9, %ymm0 vmulps %ymm4, %ymm14, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 320(%rsp), %ymm12 ## 32-byte Reload vmulps %ymm3, %ymm12, %ymm2 vaddps %ymm2, %ymm0, %ymm0 cmpl $255, %eax jne LBB2_578 ## BB#575: ## %cif_mask_all6241 ## in Loop: Header=BB2_559 Depth=2 vcmpnleps LCPI2_55(%rip), %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB2_567 ## BB#576: ## %cif_mask_all6241 ## in Loop: Header=BB2_559 Depth=2 cmpl $255, %eax jne LBB2_581 ## BB#577: ## %cif_test_all6250 ## in Loop: Header=BB2_559 Depth=2 movq 88(%r15), %rax vsubss (%rax,%rsi,4), %xmm1, %xmm2 vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm2, %ymm1, %ymm10 vsubps 448(%rsp), %ymm5, %ymm2 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 608(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm5 vmulps %ymm4, %ymm4, %ymm6 vaddps %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm3, %ymm6 vaddps %ymm6, %ymm5, %ymm5 LBB2_565: ## %cif_done5331 ## in Loop: Header=BB2_559 Depth=2 vrsqrtps %ymm5, %ymm6 vmulps %ymm6, %ymm5, %ymm5 vmulps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_7(%rip), %ymm7 ## ymm7 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm5, %ymm7, %ymm5 vmulps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_5(%rip), %ymm6 ## ymm6 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm6, %ymm7 vmulps %ymm7, %ymm5, %ymm5 vmulps %ymm5, %ymm2, %ymm2 vmulps %ymm5, %ymm4, %ymm4 vmulps %ymm5, %ymm3, %ymm3 vmulps %ymm2, %ymm9, %ymm2 vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm2, %ymm2 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vxorps %ymm8, %ymm8, %ymm8 vmaxps %ymm8, %ymm2, %ymm4 vcmpnleps %ymm4, %ymm8, %ymm2 vcmpnltps %ymm4, %ymm8, %ymm3 vmovaps LCPI2_22(%rip), %ymm9 ## ymm9 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm3, %ymm9, %ymm4, %ymm4 vpsrad $23, %xmm4, %xmm5 vextractf128 $1, %ymm4, %xmm6 vpsrad $23, %xmm6, %xmm6 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm6, %xmm6 vpaddd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vandps LCPI2_24(%rip), %ymm4, %ymm4 vorps %ymm7, %ymm4, %ymm4 vsubps %ymm4, %ymm9, %ymm4 vmulps LCPI2_25(%rip), %ymm4, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps %ymm9, %ymm6, %ymm6 vsubps %ymm4, %ymm8, %ymm4 vmulps %ymm6, %ymm4, %ymm4 vcvtdq2ps %ymm5, %ymm5 vmulps LCPI2_34(%rip), %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm2, LCPI2_36(%rip), %ymm1, %ymm2 vblendvps %ymm3, %ymm2, %ymm4, %ymm2 vmulps %ymm2, %ymm13, %ymm2 vmulps LCPI2_37(%rip), %ymm2, %ymm3 vroundps $9, %ymm3, %ymm3 vcvttps2dq %ymm3, %ymm4 vmulps LCPI2_38(%rip), %ymm3, %ymm5 vsubps %ymm5, %ymm2, %ymm2 vmulps LCPI2_39(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm2, %ymm2 vmulps LCPI2_40(%rip), %ymm2, %ymm3 vaddps LCPI2_41(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_42(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_43(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_44(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_45(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm9, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm2 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm4, %xmm3 vextractf128 $1, %ymm4, %xmm5 vpaddd %xmm1, %xmm5, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vpcmpgtd %xmm1, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm6, %xmm1, %xmm5 vpcmpgtd %xmm3, %xmm1, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vpslld $23, %xmm3, %xmm3 vpslld $23, %xmm6, %xmm6 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vaddps %ymm9, %ymm2, %ymm2 vmulps %ymm2, %ymm3, %ymm2 vblendvps %ymm4, LCPI2_48(%rip), %ymm2, %ymm2 vblendvps %ymm5, %ymm8, %ymm2, %ymm2 vminps %ymm9, %ymm10, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 96(%r15), %rax vmulps 288(%rsp), %ymm2, %ymm1 ## 32-byte Folded Reload vaddps %ymm9, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%rsi,4), %ymm1 vmulps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%rsi,4), %ymm1 vmulps 64(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -96(%rsp), %ymm2 ## 32-byte Reload vaddps %ymm1, %ymm2, %ymm2 vmovups %ymm2, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%rsi,4), %ymm1 vmulps 224(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -64(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm1 jmp LBB2_570 LBB2_571: ## %cif_test_mixed5762 ## in Loop: Header=BB2_559 Depth=2 vmovups %ymm15, 384(%rsp) ## 32-byte Spill vmovaps %ymm13, %ymm15 vsqrtps %ymm4, %ymm4 vrcpps %ymm4, %ymm5 vmulps %ymm5, %ymm4, %ymm7 vmovaps LCPI2_20(%rip), %ymm8 ## ymm8 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm7, %ymm8, %ymm7 vmulps %ymm7, %ymm5, %ymm7 vmulps %ymm7, %ymm3, %ymm5 vblendvps %ymm6, %ymm5, %ymm3, %ymm3 vmulps %ymm7, %ymm2, %ymm5 vblendvps %ymm6, %ymm5, %ymm2, %ymm5 vmulps %ymm7, %ymm0, %ymm2 vblendvps %ymm6, %ymm2, %ymm0, %ymm7 vmovaps %ymm14, %ymm12 vmovups 128(%rsp), %ymm14 ## 32-byte Reload vmulps %ymm3, %ymm14, %ymm0 vmulps %ymm5, %ymm12, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 320(%rsp), %ymm13 ## 32-byte Reload vmulps %ymm7, %ymm13, %ymm2 vaddps %ymm2, %ymm0, %ymm2 vxorps %ymm9, %ymm9, %ymm9 vcmpnleps %ymm9, %ymm2, %ymm0 vblendvps %ymm6, %ymm0, %ymm9, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB2_572 ## BB#573: ## %safe_if_run_true6063 ## in Loop: Header=BB2_559 Depth=2 movq 88(%r15), %rax vsubss (%rax,%rsi,4), %xmm1, %xmm6 vpermilps $0, %xmm6, %xmm6 ## xmm6 = xmm6[0,0,0,0] vinsertf128 $1, %xmm6, %ymm6, %ymm6 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm4, %ymm1, %ymm1 vdivps %ymm6, %ymm1, %ymm11 vsubps 448(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm5, %ymm4 ## 32-byte Folded Reload vsubps 608(%rsp), %ymm7, %ymm5 ## 32-byte Folded Reload vmulps %ymm3, %ymm3, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm5, %ymm5, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm3, %ymm7 vblendvps %ymm0, %ymm7, %ymm3, %ymm3 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm0, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm5, %ymm6 vblendvps %ymm0, %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm14, %ymm3 vmulps %ymm4, %ymm12, %ymm4 vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm5, %ymm13, %ymm4 vaddps %ymm4, %ymm3, %ymm3 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 672(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm0, %ymm6, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 800(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm0, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 800(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 672(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm6 vmulps LCPI2_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI2_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmovaps %ymm15, %ymm13 vmulps %ymm3, %ymm13, %ymm3 vmulps LCPI2_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI2_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI2_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI2_40(%rip), %ymm3, %ymm4 vaddps LCPI2_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI2_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 192(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm2, %ymm1 movq 96(%r15), %rax vmulps 288(%rsp), %ymm3, %ymm2 ## 32-byte Folded Reload vaddps %ymm10, %ymm2, %ymm2 vmovups 96(%rsp), %ymm10 ## 32-byte Reload vmulps %ymm2, %ymm1, %ymm1 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%rsi,4), %ymm2 vmulps 256(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups 384(%rsp), %ymm15 ## 32-byte Reload vaddps %ymm2, %ymm15, %ymm2 vblendvps %ymm0, %ymm2, %ymm15, %ymm15 vbroadcastss (%rdx,%rsi,4), %ymm2 vmulps 64(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -96(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm2, %ymm3, %ymm2 vblendvps %ymm0, %ymm2, %ymm3, %ymm3 vmovups %ymm3, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%rsi,4), %ymm2 vmulps 224(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups -64(%rsp), %ymm2 ## 32-byte Reload vaddps %ymm1, %ymm2, %ymm1 vblendvps %ymm0, %ymm1, %ymm2, %ymm2 vmovups %ymm2, -64(%rsp) ## 32-byte Spill vmovaps %ymm12, %ymm14 jmp LBB2_567 LBB2_578: ## %cif_mask_mixed6242 ## in Loop: Header=BB2_559 Depth=2 vxorps %ymm9, %ymm9, %ymm9 vcmpnleps %ymm9, %ymm0, %ymm2 vblendvps %ymm2, %ymm7, %ymm9, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB2_567 ## BB#579: ## %safe_if_run_true6499 ## in Loop: Header=BB2_559 Depth=2 movq 88(%r15), %rax vsubss (%rax,%rsi,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 448(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 608(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps 128(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1760(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm1, %ymm1 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1728(%rsp), %ymm6 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm6, %ymm6 vmovups %ymm6, 1728(%rsp) ## 32-byte Spill vsubps %ymm6, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm1, 1760(%rsp) ## 32-byte Spill vcvtdq2ps %ymm1, %ymm6 vmulps LCPI2_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI2_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmulps %ymm3, %ymm13, %ymm3 vmulps LCPI2_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI2_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI2_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI2_40(%rip), %ymm3, %ymm4 vaddps LCPI2_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI2_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 192(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 96(%r15), %rax vmulps 288(%rsp), %ymm3, %ymm1 ## 32-byte Folded Reload vaddps %ymm10, %ymm1, %ymm1 vmovups 96(%rsp), %ymm10 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%rsi,4), %ymm1 vmulps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm1 vblendvps %ymm2, %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%rsi,4), %ymm1 vmulps 64(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -96(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm1, %ymm3, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm3 vmovups %ymm3, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%rsi,4), %ymm1 vmulps 224(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -64(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm1 vmovups %ymm1, -64(%rsp) ## 32-byte Spill jmp LBB2_567 LBB2_572: ## in Loop: Header=BB2_559 Depth=2 vmovaps %ymm12, %ymm14 vmovaps %ymm15, %ymm13 vmovups 384(%rsp), %ymm15 ## 32-byte Reload jmp LBB2_567 LBB2_568: ## %cif_test_mixed5497 ## in Loop: Header=BB2_559 Depth=2 movq 88(%r15), %rax vsubss (%rax,%rsi,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 448(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 608(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm6, %ymm7, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps %ymm5, %ymm9, %ymm5 vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vxorps %ymm9, %ymm9, %ymm9 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 864(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1696(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1696(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 864(%rsp) ## 32-byte Spill jmp LBB2_569 LBB2_581: ## %cif_test_mixed6370 ## in Loop: Header=BB2_559 Depth=2 movq 88(%r15), %rax vsubss (%rax,%rsi,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 448(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 608(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps %ymm5, %ymm9, %ymm5 vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vxorps %ymm9, %ymm9, %ymm9 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI2_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI2_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1632(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI2_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1664(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1664(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI2_25(%rip), %ymm5, %ymm6 vaddps LCPI2_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI2_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1632(%rsp) ## 32-byte Spill LBB2_569: ## %cif_done5331 ## in Loop: Header=BB2_559 Depth=2 vcvtdq2ps %ymm7, %ymm6 vmulps LCPI2_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI2_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI2_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmulps %ymm3, %ymm13, %ymm3 vmulps LCPI2_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI2_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI2_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI2_40(%rip), %ymm3, %ymm4 vaddps LCPI2_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI2_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI2_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI2_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 192(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 96(%r15), %rax vmulps 288(%rsp), %ymm3, %ymm1 ## 32-byte Folded Reload vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 104(%r15), %rdx movq 112(%r15), %rbx vbroadcastss (%rax,%rsi,4), %ymm1 vmulps 256(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm1 vblendvps %ymm2, %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%rsi,4), %ymm1 vmulps 64(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -96(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm1, %ymm3, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm3 vmovups %ymm3, -96(%rsp) ## 32-byte Spill vbroadcastss (%rbx,%rsi,4), %ymm1 vmulps 224(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -64(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm1 LBB2_570: ## %cif_done5331 ## in Loop: Header=BB2_559 Depth=2 vmovups %ymm1, -64(%rsp) ## 32-byte Spill vmovups 96(%rsp), %ymm10 ## 32-byte Reload LBB2_567: ## %cif_done5331 ## in Loop: Header=BB2_559 Depth=2 addq $4, %r12 decl %r13d jne LBB2_559 jmp LBB2_511 LBB2_510: ## in Loop: Header=BB2_349 Depth=1 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -96(%rsp) ## 32-byte Spill vxorps %ymm15, %ymm15, %ymm15 movl 832(%rsp), %ebp ## 4-byte Reload LBB2_511: ## %for_exit5260 ## in Loop: Header=BB2_349 Depth=1 addl %ebp, %r11d vxorps %ymm0, %ymm0, %ymm0 vmaxps %ymm0, %ymm15, %ymm0 vxorps %ymm9, %ymm9, %ymm9 vmovaps LCPI2_22(%rip), %ymm11 ## ymm11 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vminps %ymm11, %ymm0, %ymm2 vcmpnleps %ymm2, %ymm9, %ymm0 vcmpnltps %ymm2, %ymm9, %ymm1 vblendvps %ymm1, %ymm11, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI2_23(%rip), %xmm12 ## xmm12 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm12, %xmm4, %xmm4 vpaddd %xmm12, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovups 1472(%rsp), %ymm5 ## 32-byte Reload vmovups 512(%rsp), %ymm15 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm5, %ymm5 vmovaps LCPI2_24(%rip), %ymm3 ## ymm3 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm3, %ymm2, %ymm2 vmovaps LCPI2_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm7, %ymm2, %ymm2 vmovups 1504(%rsp), %ymm3 ## 32-byte Reload vblendvps %ymm15, %ymm2, %ymm3, %ymm3 vmovups %ymm3, 1504(%rsp) ## 32-byte Spill vsubps %ymm3, %ymm11, %ymm2 vmovaps LCPI2_25(%rip), %ymm3 ## ymm3 = [1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00] vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_26(%rip), %ymm4 ## ymm4 = [-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_27(%rip), %ymm4 ## ymm4 = [1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_28(%rip), %ymm4 ## ymm4 = [-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_29(%rip), %ymm4 ## ymm4 = [3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_30(%rip), %ymm4 ## ymm4 = [1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_31(%rip), %ymm4 ## ymm4 = [2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_32(%rip), %ymm4 ## ymm4 = [3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI2_33(%rip), %ymm4 ## ymm4 = [5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vsubps %ymm2, %ymm9, %ymm2 vmulps %ymm3, %ymm2, %ymm2 vmovups %ymm5, 1472(%rsp) ## 32-byte Spill vcvtdq2ps %ymm5, %ymm3 vmovaps LCPI2_34(%rip), %ymm4 ## ymm4 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm4, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI2_35(%rip), %ymm3 ## ymm3 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vmovaps LCPI2_36(%rip), %ymm7 ## ymm7 = [nan,nan,nan,nan,nan,nan,nan,nan] vblendvps %ymm0, %ymm7, %ymm3, %ymm0 vblendvps %ymm1, %ymm0, %ymm2, %ymm0 vmovaps LCPI2_49(%rip), %ymm1 ## ymm1 = [4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01] vmulps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_37(%rip), %ymm1 ## ymm1 = [1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00] vmulps %ymm1, %ymm0, %ymm1 vroundps $9, %ymm1, %ymm1 vcvttps2dq %ymm1, %ymm2 vmovaps LCPI2_38(%rip), %ymm3 ## ymm3 = [6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01] vmulps %ymm3, %ymm1, %ymm3 vsubps %ymm3, %ymm0, %ymm0 vmovaps LCPI2_39(%rip), %ymm3 ## ymm3 = [1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06] vmulps %ymm3, %ymm1, %ymm1 vsubps %ymm1, %ymm0, %ymm0 vmovaps LCPI2_40(%rip), %ymm1 ## ymm1 = [2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04] vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_41(%rip), %ymm3 ## ymm3 = [1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03] vaddps %ymm3, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_42(%rip), %ymm6 ## ymm6 = [8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03] vaddps %ymm6, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_43(%rip), %ymm8 ## ymm8 = [4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02] vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_44(%rip), %ymm10 ## ymm10 = [1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01] vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI2_45(%rip), %ymm13 ## ymm13 = [4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01] vaddps %ymm13, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm11, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vaddps %ymm11, %ymm0, %ymm0 vmovdqa LCPI2_46(%rip), %xmm13 ## xmm13 = [127,127,127,127] vpaddd %xmm13, %xmm2, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpaddd %xmm13, %xmm3, %xmm4 vpcmpgtd %xmm13, %xmm3, %xmm3 vpcmpgtd %xmm13, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vmovdqa LCPI2_47(%rip), %xmm6 ## xmm6 = [1,1,1,1] vpcmpgtd %xmm4, %xmm6, %xmm3 vpcmpgtd %xmm1, %xmm6, %xmm5 vinsertf128 $1, %xmm3, %ymm5, %ymm3 vpslld $23, %xmm1, %xmm1 vpslld $23, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm1, %ymm1 vmulps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, LCPI2_48(%rip), %ymm0, %ymm0 vblendvps %ymm3, %ymm9, %ymm0, %ymm0 vmovups %ymm0, (%rsp) ## 32-byte Spill vmovups -96(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm9, %ymm1, %ymm1 vminps %ymm11, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm9, %ymm1 vcmpnltps %ymm3, %ymm9, %ymm2 vblendvps %ymm2, %ymm11, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm4 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vpaddd %xmm12, %xmm5, %xmm5 vpaddd %xmm12, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovups 1536(%rsp), %ymm5 ## 32-byte Reload vblendvps %ymm15, %ymm4, %ymm5, %ymm5 vmovaps LCPI2_24(%rip), %ymm7 ## ymm7 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm7, %ymm3, %ymm3 vmovaps LCPI2_5(%rip), %ymm0 ## ymm0 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm0, %ymm3, %ymm3 vmovups 1568(%rsp), %ymm4 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm4, %ymm4 vmovups %ymm4, 1568(%rsp) ## 32-byte Spill vsubps %ymm4, %ymm11, %ymm3 vmulps LCPI2_25(%rip), %ymm3, %ymm4 vaddps LCPI2_26(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_27(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_28(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_29(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_30(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_31(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_32(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI2_33(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm11, %ymm4, %ymm4 vsubps %ymm3, %ymm9, %ymm3 vmulps %ymm4, %ymm3, %ymm3 vmovups %ymm5, 1536(%rsp) ## 32-byte Spill vcvtdq2ps %ymm5, %ymm4 vmovaps LCPI2_34(%rip), %ymm8 ## ymm8 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm8, %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI2_35(%rip), %ymm4 ## ymm4 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm4, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps LCPI2_49(%rip), %ymm1, %ymm1 vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm14 vmulps LCPI2_38(%rip), %ymm2, %ymm3 vsubps %ymm3, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm5 vmulps LCPI2_40(%rip), %ymm5, %ymm1 vaddps LCPI2_41(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI2_42(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI2_43(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI2_45(%rip), %ymm1, %ymm10 vpaddd %xmm13, %xmm14, %xmm1 vpcmpgtd %xmm13, %xmm14, %xmm2 vmovdqu %ymm2, 128(%rsp) ## 32-byte Spill vpcmpgtd %xmm1, %xmm6, %xmm2 vmovdqu %ymm2, -96(%rsp) ## 32-byte Spill vpslld $23, %xmm1, %xmm1 vmovdqu %ymm1, 320(%rsp) ## 32-byte Spill vmovups -64(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm9, %ymm1, %ymm1 vminps %ymm11, %ymm1, %ymm2 vcmpnleps %ymm2, %ymm9, %ymm1 vcmpnltps %ymm2, %ymm9, %ymm4 vblendvps %ymm4, %ymm11, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm6 vpsrad $23, %xmm6, %xmm6 vpaddd %xmm12, %xmm6, %xmm6 vpaddd %xmm12, %xmm3, %xmm3 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vmovups 2080(%rsp), %ymm6 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm6, %ymm6 vandps %ymm7, %ymm2, %ymm2 vorps %ymm0, %ymm2, %ymm2 vmovups 1600(%rsp), %ymm3 ## 32-byte Reload vblendvps %ymm15, %ymm2, %ymm3, %ymm3 vxorps %ymm15, %ymm15, %ymm15 vmovups %ymm3, 1600(%rsp) ## 32-byte Spill vsubps %ymm3, %ymm11, %ymm2 vmulps LCPI2_25(%rip), %ymm2, %ymm3 vaddps LCPI2_26(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_27(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_28(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_29(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_30(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_31(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_32(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI2_33(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vsubps %ymm2, %ymm15, %ymm2 vmulps %ymm3, %ymm2, %ymm2 vmovups %ymm6, 2080(%rsp) ## 32-byte Spill vcvtdq2ps %ymm6, %ymm3 vmulps %ymm8, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI2_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI2_36(%rip), %ymm0, %ymm1 vblendvps %ymm4, %ymm1, %ymm2, %ymm1 vmulps LCPI2_49(%rip), %ymm1, %ymm1 vmulps LCPI2_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm7 vmulps LCPI2_38(%rip), %ymm2, %ymm3 vsubps %ymm3, %ymm1, %ymm1 vmulps LCPI2_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm4 vmulps LCPI2_40(%rip), %ymm4, %ymm1 vaddps LCPI2_41(%rip), %ymm1, %ymm2 vpaddd %xmm13, %xmm7, %xmm3 vpcmpgtd %xmm13, %xmm7, %xmm11 vmovups (%rsp), %ymm0 ## 32-byte Reload vmulps LCPI2_50(%rip), %ymm0, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmovdqa LCPI2_51(%rip), %xmm6 ## xmm6 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm6, %xmm1, %xmm1 vpshufb %xmm6, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm1 ## xmm1 = xmm0[0],xmm1[0] vmovdqa LCPI2_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm3, %xmm0, %xmm12 vpslld $23, %xmm3, %xmm13 movslq %r11d, %rax movq 576(%rsp), %rbx ## 8-byte Reload testq %rbx, %rbx movq 2784(%rsp), %rsi je LBB2_513 ## BB#512: ## %pl_dolane.i14053 ## in Loop: Header=BB2_349 Depth=1 vpextrb $0, %xmm1, (%rsi,%rax) LBB2_513: ## %pl_loopend.i14056 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm2, %ymm4, %ymm3 vmulps %ymm10, %ymm5, %ymm0 vextractf128 $1, %ymm14, %xmm2 movq 640(%rsp), %rbp ## 8-byte Reload testq %rbp, %rbp movl -100(%rsp), %edx ## 4-byte Reload movl -20(%rsp), %edi ## 4-byte Reload movq 2800(%rsp), %rcx movq %rcx, %r8 movq 1280(%rsp), %rcx ## 8-byte Reload movq 1248(%rsp), %r11 ## 8-byte Reload movq 1216(%rsp), %r14 ## 8-byte Reload movq 1184(%rsp), %r12 ## 8-byte Reload movq 1152(%rsp), %r13 ## 8-byte Reload je LBB2_515 ## BB#514: ## %pl_dolane.1.i14059 ## in Loop: Header=BB2_349 Depth=1 vpextrb $2, %xmm1, 1(%rsi,%rax) LBB2_515: ## %pl_loopend.1.i14062 ## in Loop: Header=BB2_349 Depth=1 vaddps LCPI2_42(%rip), %ymm3, %ymm9 vaddps LCPI2_22(%rip), %ymm0, %ymm3 vpaddd LCPI2_46(%rip), %xmm2, %xmm0 testq %rcx, %rcx vmovups 448(%rsp), %ymm10 ## 32-byte Reload je LBB2_517 ## BB#516: ## %pl_dolane.2.i14065 ## in Loop: Header=BB2_349 Depth=1 vpextrb $4, %xmm1, 2(%rsi,%rax) LBB2_517: ## %pl_loopend.2.i14068 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm9, %ymm4, %ymm9 vmulps %ymm3, %ymm5, %ymm5 vpslld $23, %xmm0, %xmm3 testq %r11, %r11 je LBB2_519 ## BB#518: ## %pl_dolane.3.i14071 ## in Loop: Header=BB2_349 Depth=1 vpextrb $6, %xmm1, 3(%rsi,%rax) LBB2_519: ## %pl_loopend.3.i14074 ## in Loop: Header=BB2_349 Depth=1 vaddps LCPI2_43(%rip), %ymm9, %ymm9 vaddps LCPI2_22(%rip), %ymm5, %ymm5 vpcmpgtd LCPI2_46(%rip), %xmm2, %xmm2 vmovups 320(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 testq %r14, %r14 je LBB2_521 ## BB#520: ## %pl_dolane.4.i14077 ## in Loop: Header=BB2_349 Depth=1 vpextrb $8, %xmm1, 4(%rsi,%rax) LBB2_521: ## %pl_loopend.4.i14080 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm9, %ymm4, %ymm8 vmovups 128(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm6, %ymm2 vmovdqa LCPI2_47(%rip), %xmm6 ## xmm6 = [1,1,1,1] vpcmpgtd %xmm0, %xmm6, %xmm0 vmulps %ymm5, %ymm3, %ymm3 testq %r12, %r12 je LBB2_523 ## BB#522: ## %pl_dolane.5.i14083 ## in Loop: Header=BB2_349 Depth=1 vpextrb $10, %xmm1, 5(%rsi,%rax) LBB2_523: ## %pl_loopend.5.i14086 ## in Loop: Header=BB2_349 Depth=1 vaddps LCPI2_44(%rip), %ymm8, %ymm5 vmovups -96(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm6, %ymm0 vblendvps %ymm2, LCPI2_48(%rip), %ymm3, %ymm2 testq %r13, %r13 je LBB2_525 ## BB#524: ## %pl_dolane.6.i14089 ## in Loop: Header=BB2_349 Depth=1 vpextrb $12, %xmm1, 6(%rsi,%rax) LBB2_525: ## %pl_loopend.6.i14091 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm5, %ymm4, %ymm3 vblendvps %ymm0, %ymm15, %ymm2, %ymm2 testb %r9b, %r9b vmovdqa 48(%rsp), %xmm6 ## 16-byte Reload jns LBB2_527 ## BB#526: ## %pl_dolane.7.i14094 ## in Loop: Header=BB2_349 Depth=1 vpextrb $14, %xmm1, 7(%rsi,%rax) LBB2_527: ## %__masked_store_i8.exit14095 ## in Loop: Header=BB2_349 Depth=1 vaddps LCPI2_45(%rip), %ymm3, %ymm0 vmulps LCPI2_50(%rip), %ymm2, %ymm1 vcvttps2dq %ymm1, %ymm1 vextractf128 $1, %ymm1, %xmm2 vmovdqa LCPI2_51(%rip), %xmm3 ## xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm3, %xmm2, %xmm2 vpshufb %xmm3, %xmm1, %xmm1 vpunpcklqdq %xmm2, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm2[0] testq %rbx, %rbx je LBB2_529 ## BB#528: ## %pl_dolane.i14003 ## in Loop: Header=BB2_349 Depth=1 vpextrb $0, %xmm1, (%r10,%rax) LBB2_529: ## %pl_loopend.i14006 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm0, %ymm4, %ymm0 vextractf128 $1, %ymm7, %xmm2 testq %rbp, %rbp je LBB2_531 ## BB#530: ## %pl_dolane.1.i14009 ## in Loop: Header=BB2_349 Depth=1 vpextrb $2, %xmm1, 1(%r10,%rax) LBB2_531: ## %pl_loopend.1.i14012 ## in Loop: Header=BB2_349 Depth=1 vaddps LCPI2_22(%rip), %ymm0, %ymm3 vpaddd LCPI2_46(%rip), %xmm2, %xmm0 testq %rcx, %rcx vmovdqa 352(%rsp), %xmm7 ## 16-byte Reload je LBB2_533 ## BB#532: ## %pl_dolane.2.i14015 ## in Loop: Header=BB2_349 Depth=1 vpextrb $4, %xmm1, 2(%r10,%rax) LBB2_533: ## %pl_loopend.2.i14018 ## in Loop: Header=BB2_349 Depth=1 vmulps %ymm3, %ymm4, %ymm3 vpslld $23, %xmm0, %xmm4 testq %r11, %r11 je LBB2_535 ## BB#534: ## %pl_dolane.3.i14021 ## in Loop: Header=BB2_349 Depth=1 vpextrb $6, %xmm1, 3(%r10,%rax) LBB2_535: ## %pl_loopend.3.i14024 ## in Loop: Header=BB2_349 Depth=1 vaddps LCPI2_22(%rip), %ymm3, %ymm3 vpcmpgtd LCPI2_46(%rip), %xmm2, %xmm2 vinsertf128 $1, %xmm4, %ymm13, %ymm4 testq %r14, %r14 je LBB2_537 ## BB#536: ## %pl_dolane.4.i14027 ## in Loop: Header=BB2_349 Depth=1 vpextrb $8, %xmm1, 4(%r10,%rax) LBB2_537: ## %pl_loopend.4.i14030 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm2, %ymm11, %ymm2 vmovdqa LCPI2_47(%rip), %xmm5 ## xmm5 = [1,1,1,1] vpcmpgtd %xmm0, %xmm5, %xmm0 vmulps %ymm3, %ymm4, %ymm3 testq %r12, %r12 vmovups 480(%rsp), %ymm4 ## 32-byte Reload je LBB2_539 ## BB#538: ## %pl_dolane.5.i14033 ## in Loop: Header=BB2_349 Depth=1 vpextrb $10, %xmm1, 5(%r10,%rax) LBB2_539: ## %pl_loopend.5.i14036 ## in Loop: Header=BB2_349 Depth=1 vinsertf128 $1, %xmm0, %ymm12, %ymm0 vblendvps %ymm2, LCPI2_48(%rip), %ymm3, %ymm2 testq %r13, %r13 vxorps %xmm8, %xmm8, %xmm8 je LBB2_541 ## BB#540: ## %pl_dolane.6.i14039 ## in Loop: Header=BB2_349 Depth=1 vpextrb $12, %xmm1, 6(%r10,%rax) LBB2_541: ## %pl_loopend.6.i14041 ## in Loop: Header=BB2_349 Depth=1 vblendvps %ymm0, %ymm15, %ymm2, %ymm0 testb %r9b, %r9b vmovdqu 608(%rsp), %ymm3 ## 32-byte Reload vmovups 544(%rsp), %ymm5 ## 32-byte Reload jns LBB2_543 ## BB#542: ## %pl_dolane.7.i14044 ## in Loop: Header=BB2_349 Depth=1 vpextrb $14, %xmm1, 7(%r10,%rax) LBB2_543: ## %__masked_store_i8.exit14045 ## in Loop: Header=BB2_349 Depth=1 vmulps LCPI2_50(%rip), %ymm0, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmovdqa LCPI2_51(%rip), %xmm2 ## xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm2, %xmm1, %xmm1 vpshufb %xmm2, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] testq %rbx, %rbx je LBB2_545 ## BB#544: ## %pl_dolane.i13960 ## in Loop: Header=BB2_349 Depth=1 vpextrb $0, %xmm0, (%r8,%rax) LBB2_545: ## %pl_loopend.i13963 ## in Loop: Header=BB2_349 Depth=1 testq %rbp, %rbp je LBB2_547 ## BB#546: ## %pl_dolane.1.i13965 ## in Loop: Header=BB2_349 Depth=1 vpextrb $2, %xmm0, 1(%r8,%rax) LBB2_547: ## %pl_loopend.1.i13968 ## in Loop: Header=BB2_349 Depth=1 testq %rcx, %rcx je LBB2_549 ## BB#548: ## %pl_dolane.2.i13970 ## in Loop: Header=BB2_349 Depth=1 vpextrb $4, %xmm0, 2(%r8,%rax) LBB2_549: ## %pl_loopend.2.i13973 ## in Loop: Header=BB2_349 Depth=1 testq %r11, %r11 je LBB2_551 ## BB#550: ## %pl_dolane.3.i13975 ## in Loop: Header=BB2_349 Depth=1 vpextrb $6, %xmm0, 3(%r8,%rax) LBB2_551: ## %pl_loopend.3.i13978 ## in Loop: Header=BB2_349 Depth=1 testq %r14, %r14 je LBB2_553 ## BB#552: ## %pl_dolane.4.i13980 ## in Loop: Header=BB2_349 Depth=1 vpextrb $8, %xmm0, 4(%r8,%rax) LBB2_553: ## %pl_loopend.4.i13983 ## in Loop: Header=BB2_349 Depth=1 testq %r12, %r12 je LBB2_555 ## BB#554: ## %pl_dolane.5.i13985 ## in Loop: Header=BB2_349 Depth=1 vpextrb $10, %xmm0, 5(%r8,%rax) LBB2_555: ## %pl_loopend.5.i13988 ## in Loop: Header=BB2_349 Depth=1 testq %r13, %r13 je LBB2_557 ## BB#556: ## %pl_dolane.6.i13990 ## in Loop: Header=BB2_349 Depth=1 vpextrb $12, %xmm0, 6(%r8,%rax) LBB2_557: ## %pl_loopend.6.i13992 ## in Loop: Header=BB2_349 Depth=1 testb %r9b, %r9b jns LBB2_357 ## BB#356: ## %pl_dolane.7.i13994 ## in Loop: Header=BB2_349 Depth=1 vpextrb $14, %xmm0, 7(%r8,%rax) LBB2_357: ## %foreach_reset3451 ## in Loop: Header=BB2_349 Depth=1 movl 380(%rsp), %r9d ## 4-byte Reload LBB2_358: ## %foreach_reset3451 ## in Loop: Header=BB2_349 Depth=1 incl %edx cmpl -104(%rsp), %edx ## 4-byte Folded Reload jne LBB2_349 LBB2_359: ## %if_exit addq $2696, %rsp ## imm = 0xA88 popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .globl _RenderTile___uniuniREFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_ .p2align 4, 0x90 _RenderTile___uniuniREFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_: ## @RenderTile___uniuniREFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_ ## BB#0: ## %allocas pushq %rbp movq %rsp, %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx andq $-32, %rsp subq $8352, %rsp ## imm = 0x20A0 movq 8(%rdi), %rbx movq 16(%rdi), %r13 movl 24(%rdi), %eax movl %eax, 36(%rsp) ## 4-byte Spill movq 32(%rdi), %rax movq %rax, 88(%rsp) ## 8-byte Spill movq 40(%rdi), %rax movq %rax, 80(%rsp) ## 8-byte Spill movq 48(%rdi), %rax movq %rax, 72(%rsp) ## 8-byte Spill vmovaps 64(%rdi), %ymm4 vmovmskps %ymm4, %r10d xorl %edx, %edx movl %ecx, %eax divl (%rdi) movl %eax, %r14d movl %edx, %r12d shll $4, %r12d shll $4, %r14d leal 16(%r12), %esi leal 16(%r14), %r15d movl 72(%rbx), %r9d movl 76(%rbx), %eax movl %eax, 32(%rsp) ## 4-byte Spill vmovss (%rbx), %xmm0 ## xmm0 = mem[0],zero,zero,zero vmovss %xmm0, 4(%rsp) ## 4-byte Spill vmovss 20(%rbx), %xmm0 ## xmm0 = mem[0],zero,zero,zero vmovss %xmm0, 8(%rsp) ## 4-byte Spill vmovss 40(%rbx), %xmm0 ## xmm0 = mem[0],zero,zero,zero vmovss 56(%rbx), %xmm1 ## xmm1 = mem[0],zero,zero,zero movq (%r13), %r8 movq 64(%r13), %rax movq %rax, 64(%rsp) ## 8-byte Spill vmovss 64(%rbx), %xmm2 ## xmm2 = mem[0],zero,zero,zero vmovss 68(%rbx), %xmm3 ## xmm3 = mem[0],zero,zero,zero movq 72(%r13), %rax movq %rax, 56(%rsp) ## 8-byte Spill cmpl $255, %r10d movq 80(%r13), %rax movq %rax, 48(%rsp) ## 8-byte Spill movq 120(%r13), %rax movq %rax, 40(%rsp) ## 8-byte Spill vmovss %xmm1, 28(%rsp) ## 4-byte Spill vmovss %xmm0, 24(%rsp) ## 4-byte Spill movl %r15d, (%rsp) ## 4-byte Spill movl %esi, 20(%rsp) ## 4-byte Spill leaq 12(%rsp), %rax leaq 16(%rsp), %r10 jne LBB3_2 ## BB#1: ## %all_on vpcmpeqd %xmm4, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmovaps %ymm4, 96(%rsp) ## 32-byte Spill movl %r12d, %edi movl %r14d, %edx movl %r15d, %ecx movl %esi, %ebx pushq %rax pushq %r10 movl %r9d, %r15d callq _ComputeZBounds___uniuniuniuniun_3C_unf_3E_uniunfunfunfunfREFunfREFunf addq $16, %rsp vmovss 16(%rsp), %xmm0 ## xmm0 = mem[0],zero,zero,zero vmovss 12(%rsp), %xmm1 ## xmm1 = mem[0],zero,zero,zero leaq 4224(%rsp), %rax movl %r12d, %edi movl %ebx, %esi movl %r14d, %edx movl (%rsp), %ecx ## 4-byte Reload movl %r15d, %r8d movl 32(%rsp), %ebx ## 4-byte Reload movl %ebx, %r9d vmovss 4(%rsp), %xmm2 ## 4-byte Reload ## xmm2 = mem[0],zero,zero,zero vmovss 8(%rsp), %xmm3 ## 4-byte Reload ## xmm3 = mem[0],zero,zero,zero vmovaps 96(%rsp), %ymm4 ## 32-byte Reload pushq %rax pushq 48(%rsp) ## 8-byte Folded Reload pushq 64(%rsp) ## 8-byte Folded Reload pushq 80(%rsp) ## 8-byte Folded Reload pushq 96(%rsp) ## 8-byte Folded Reload pushq $1024 ## imm = 0x400 callq _IntersectLightsWithTileMinMax___uniuniuniuniunfunfuniuniunfunfuniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_ addq $48, %rsp cmpl $0, 36(%rsp) ## 4-byte Folded Reload setne %cl subq $8, %rsp movzbl %cl, %r10d movl %r12d, %edi movl 28(%rsp), %esi ## 4-byte Reload movl %r14d, %edx movl 8(%rsp), %ecx ## 4-byte Reload movl %r15d, %r8d movl %ebx, %r9d vmovss 12(%rsp), %xmm0 ## 4-byte Reload ## xmm0 = mem[0],zero,zero,zero vmovss 16(%rsp), %xmm1 ## 4-byte Reload ## xmm1 = mem[0],zero,zero,zero vmovss 32(%rsp), %xmm2 ## 4-byte Reload ## xmm2 = mem[0],zero,zero,zero vmovss 36(%rsp), %xmm3 ## 4-byte Reload ## xmm3 = mem[0],zero,zero,zero vmovaps 104(%rsp), %ymm4 ## 32-byte Reload pushq 80(%rsp) ## 8-byte Folded Reload pushq 96(%rsp) ## 8-byte Folded Reload pushq 112(%rsp) ## 8-byte Folded Reload pushq %r10 pushq %rax leaq 4272(%rsp), %rax jmp LBB3_3 LBB3_2: ## %some_on movl %r12d, %edi movl %r14d, %edx movl %r15d, %ecx movl %esi, %ebx vmovaps %ymm4, 96(%rsp) ## 32-byte Spill pushq %rax pushq %r10 movl %r9d, %r15d callq _ComputeZBounds___uniuniuniuniun_3C_unf_3E_uniunfunfunfunfREFunfREFunf addq $16, %rsp vmovss 16(%rsp), %xmm0 ## xmm0 = mem[0],zero,zero,zero vmovss 12(%rsp), %xmm1 ## xmm1 = mem[0],zero,zero,zero leaq 128(%rsp), %rax movl %r12d, %edi movl %ebx, %esi movl %r14d, %edx movl (%rsp), %ecx ## 4-byte Reload movl %r15d, %r8d movl 32(%rsp), %ebx ## 4-byte Reload movl %ebx, %r9d vmovss 4(%rsp), %xmm2 ## 4-byte Reload ## xmm2 = mem[0],zero,zero,zero vmovss 8(%rsp), %xmm3 ## 4-byte Reload ## xmm3 = mem[0],zero,zero,zero vmovaps 96(%rsp), %ymm4 ## 32-byte Reload pushq %rax pushq 48(%rsp) ## 8-byte Folded Reload pushq 64(%rsp) ## 8-byte Folded Reload pushq 80(%rsp) ## 8-byte Folded Reload pushq 96(%rsp) ## 8-byte Folded Reload pushq $1024 ## imm = 0x400 callq _IntersectLightsWithTileMinMax___uniuniuniuniunfunfuniuniunfunfuniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_ addq $48, %rsp cmpl $0, 36(%rsp) ## 4-byte Folded Reload setne %cl subq $8, %rsp movzbl %cl, %r10d movl %r12d, %edi movl 28(%rsp), %esi ## 4-byte Reload movl %r14d, %edx movl 8(%rsp), %ecx ## 4-byte Reload movl %r15d, %r8d movl %ebx, %r9d vmovss 12(%rsp), %xmm0 ## 4-byte Reload ## xmm0 = mem[0],zero,zero,zero vmovss 16(%rsp), %xmm1 ## 4-byte Reload ## xmm1 = mem[0],zero,zero,zero vmovss 32(%rsp), %xmm2 ## 4-byte Reload ## xmm2 = mem[0],zero,zero,zero vmovss 36(%rsp), %xmm3 ## 4-byte Reload ## xmm3 = mem[0],zero,zero,zero vmovaps 104(%rsp), %ymm4 ## 32-byte Reload pushq 80(%rsp) ## 8-byte Folded Reload pushq 96(%rsp) ## 8-byte Folded Reload pushq 112(%rsp) ## 8-byte Folded Reload pushq %r10 pushq %rax leaq 176(%rsp), %rax LBB3_3: ## %some_on pushq %rax pushq %r13 callq _ShadeTile___uniuniuniuniuniuniREFs_5B_unInputDataArrays_5D_unfunfunfunfun_3C_uni_3E_uniunbun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_ addq $64, %rsp leaq -40(%rbp), %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .globl _RenderStatic___REFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_ .p2align 4, 0x90 _RenderStatic___REFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_: ## @RenderStatic___REFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_ ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $104, %rsp movq %r9, 56(%rsp) ## 8-byte Spill movq %r8, 48(%rsp) ## 8-byte Spill movq %rcx, 40(%rsp) ## 8-byte Spill movl %edx, %r13d movq %rsi, %rbp movq %rdi, %r14 movq $0, 24(%rsp) vmovups %ymm0, 64(%rsp) ## 32-byte Spill vmovmskps %ymm0, %eax movl %eax, 36(%rsp) ## 4-byte Spill movl 72(%r14), %eax movl 76(%r14), %ecx leal 15(%rax), %edx sarl $31, %edx shrl $28, %edx leal 15(%rax,%rdx), %r12d sarl $4, %r12d leal 15(%rcx), %eax sarl $31, %eax shrl $28, %eax leal 15(%rcx,%rax), %ebx sarl $4, %ebx movl %ebx, %r15d imull %r12d, %r15d leaq 24(%rsp), %rdi movl $96, %esi movl $32, %edx vzeroupper callq _ISPCAlloc movl %r12d, (%rax) movl %ebx, 4(%rax) movq %r14, 8(%rax) movq %rbp, 16(%rax) movl %r13d, 24(%rax) movq 40(%rsp), %rcx ## 8-byte Reload movq %rcx, 32(%rax) movq 48(%rsp), %rcx ## 8-byte Reload movq %rcx, 40(%rax) movq 56(%rsp), %rcx ## 8-byte Reload movq %rcx, 48(%rax) cmpl $255, 36(%rsp) ## 4-byte Folded Reload jne LBB4_5 ## BB#1: ## %all_on vpcmpeqd %xmm0, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm0, %ymm0 jmp LBB4_2 LBB4_5: ## %some_on vmovups 64(%rsp), %ymm0 ## 32-byte Reload LBB4_2: ## %all_on vmovaps %ymm0, 64(%rax) leaq _RenderTile___uniuniREFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_(%rip), %rsi leaq 24(%rsp), %rdi movl $1, %r8d movl $1, %r9d movq %rax, %rdx movl %r15d, %ecx vzeroupper callq _ISPCLaunch movq 24(%rsp), %rdi testq %rdi, %rdi je LBB4_4 ## BB#3: ## %call_sync callq _ISPCSync movq $0, 24(%rsp) LBB4_4: ## %post_sync addq $104, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp retq .globl _ComputeZBoundsRow___uniuniuniuniuniun_3C_unf_3E_uniunfunfunfunfun_3C_unf_3E_un_3C_unf_3E_ .p2align 4, 0x90 _ComputeZBoundsRow___uniuniuniuniuniun_3C_unf_3E_uniunfunfunfunfun_3C_unf_3E_un_3C_unf_3E_: ## @ComputeZBoundsRow___uniuniuniuniuniun_3C_unf_3E_uniunfunfunfunfun_3C_unf_3E_un_3C_unf_3E_ ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $104, %rsp vmovss %xmm3, 28(%rsp) ## 4-byte Spill vmovss %xmm2, 24(%rsp) ## 4-byte Spill vmovss %xmm1, 20(%rsp) ## 4-byte Spill movq %r9, 56(%rsp) ## 8-byte Spill movl %ecx, %ebp movl %edx, %ecx movl %edi, %r14d movq 176(%rsp), %r15 movq 168(%rsp), %r12 vmovmskps %ymm4, %eax cmpl $255, %eax vmovss %xmm0, 16(%rsp) ## 4-byte Spill movq %rsi, 48(%rsp) ## 8-byte Spill jne LBB5_1 ## BB#4: ## %for_test.preheader testl %ebp, %ebp jle LBB5_7 ## BB#5: ## %for_loop.lr.ph imull %ecx, %r14d addl %r14d, %ecx xorl %edi, %edi vpcmpeqd %xmm1, %xmm1, %xmm1 vinsertf128 $1, %xmm1, %ymm1, %ymm1 vmovups %ymm1, 64(%rsp) ## 32-byte Spill .p2align 4, 0x90 LBB5_6: ## %for_loop ## =>This Inner Loop Header: Depth=1 leal (%rsi,%rdi), %r13d movl %r13d, %esi movl %r14d, %edx movl %ecx, %ebx movq 56(%rsp), %r8 ## 8-byte Reload movl 160(%rsp), %r9d vmovss 20(%rsp), %xmm1 ## 4-byte Reload ## xmm1 = mem[0],zero,zero,zero vmovss 24(%rsp), %xmm2 ## 4-byte Reload ## xmm2 = mem[0],zero,zero,zero vmovss 28(%rsp), %xmm3 ## 4-byte Reload ## xmm3 = mem[0],zero,zero,zero vmovups 64(%rsp), %ymm4 ## 32-byte Reload leaq 40(%rsp), %rax pushq %rax leaq 52(%rsp), %rax pushq %rax callq _ComputeZBounds___uniuniuniuniun_3C_unf_3E_uniunfunfunfunfREFunfREFunf movq 64(%rsp), %rsi ## 8-byte Reload movl %ebx, %ecx vmovss 32(%rsp), %xmm0 ## 4-byte Reload ## xmm0 = mem[0],zero,zero,zero addq $16, %rsp movl 44(%rsp), %eax movl %eax, (%r12) movl 40(%rsp), %eax movl %eax, (%r15) addq $4, %r12 addq $4, %r15 movl %r13d, %edi decl %ebp jne LBB5_6 jmp LBB5_7 LBB5_1: ## %for_test37.preheader testl %ebp, %ebp jle LBB5_7 ## BB#2: ## %for_loop39.lr.ph imull %ecx, %r14d addl %r14d, %ecx xorl %edi, %edi vmovups %ymm4, 64(%rsp) ## 32-byte Spill .p2align 4, 0x90 LBB5_3: ## %for_loop39 ## =>This Inner Loop Header: Depth=1 leal (%rsi,%rdi), %ebx movl %ebx, %esi movl %r14d, %edx movl %ecx, %r13d movq 56(%rsp), %r8 ## 8-byte Reload movl 160(%rsp), %r9d vmovss 20(%rsp), %xmm1 ## 4-byte Reload ## xmm1 = mem[0],zero,zero,zero vmovss 24(%rsp), %xmm2 ## 4-byte Reload ## xmm2 = mem[0],zero,zero,zero vmovss 28(%rsp), %xmm3 ## 4-byte Reload ## xmm3 = mem[0],zero,zero,zero vmovups 64(%rsp), %ymm4 ## 32-byte Reload leaq 32(%rsp), %rax pushq %rax leaq 44(%rsp), %rax pushq %rax callq _ComputeZBounds___uniuniuniuniun_3C_unf_3E_uniunfunfunfunfREFunfREFunf movq 64(%rsp), %rsi ## 8-byte Reload movl %r13d, %ecx vmovss 32(%rsp), %xmm0 ## 4-byte Reload ## xmm0 = mem[0],zero,zero,zero addq $16, %rsp movl 36(%rsp), %eax movl %eax, (%r12) movl 32(%rsp), %eax movl %eax, (%r15) addq $4, %r12 addq $4, %r15 movl %ebx, %edi decl %ebp jne LBB5_3 LBB5_7: ## %for_exit addq $104, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .section __TEXT,__literal4,4byte_literals .p2align 2 LCPI6_0: .long 1056964608 ## float 0.5 LCPI6_1: .long 1077936128 ## float 3 .section __TEXT,__const .p2align 5 LCPI6_2: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 LCPI6_3: .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .section __TEXT,__literal16,16byte_literals .p2align 4 LCPI6_4: .long 0 ## 0x0 .long 1 ## 0x1 .long 2 ## 0x2 .long 3 ## 0x3 LCPI6_5: .long 4 ## 0x4 .long 5 ## 0x5 .long 6 ## 0x6 .long 7 ## 0x7 .section __TEXT,__text,regular,pure_instructions .globl _SplitTileMinMax___uniuniun_3C_unf_3E_un_3C_unf_3E_uniuniunfunfun_3C_uni_3E_uniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_uniun_3C_uni_3E_ .p2align 4, 0x90 _SplitTileMinMax___uniuniun_3C_unf_3E_un_3C_unf_3E_uniuniunfunfun_3C_uni_3E_uniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_uniun_3C_uni_3E_: ## @SplitTileMinMax___uniuniun_3C_unf_3E_un_3C_unf_3E_uniuniunfunfun_3C_uni_3E_uniun_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_unf_3E_un_3C_uni_3E_uniun_3C_uni_3E_ ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $200, %rsp movq %rcx, -72(%rsp) ## 8-byte Spill movq %rdx, -80(%rsp) ## 8-byte Spill movl 312(%rsp), %ecx movq 304(%rsp), %r11 movq 288(%rsp), %r10 movl 264(%rsp), %edx vcvtsi2ssl %r8d, %xmm0, %xmm3 movq 256(%rsp), %r12 vcvtsi2ssl %r9d, %xmm0, %xmm4 vmovmskps %ymm2, %eax vcvtsi2ssl %edi, %xmm0, %xmm2 leal (%rcx,%rcx), %edi vcvtsi2ssl %esi, %xmm0, %xmm5 movl %edx, %esi sarl $31, %esi shrl $29, %esi addl %edx, %esi andl $-8, %esi vmovss LCPI6_0(%rip), %xmm6 ## xmm6 = mem[0],zero,zero,zero vmulss %xmm6, %xmm3, %xmm3 vmulss %xmm6, %xmm4, %xmm4 vmulss %xmm0, %xmm3, %xmm0 vxorps %xmm7, %xmm7, %xmm7 vsubss %xmm0, %xmm7, %xmm0 vmulss %xmm1, %xmm4, %xmm1 vsubss %xmm3, %xmm2, %xmm2 vsubss %xmm4, %xmm5, %xmm3 vmulss %xmm0, %xmm0, %xmm4 vmulss %xmm2, %xmm2, %xmm5 vaddss %xmm4, %xmm5, %xmm4 vrsqrtss %xmm4, %xmm0, %xmm5 vmulss %xmm5, %xmm4, %xmm4 vmulss %xmm4, %xmm5, %xmm4 vmovss LCPI6_1(%rip), %xmm8 ## xmm8 = mem[0],zero,zero,zero vsubss %xmm4, %xmm8, %xmm4 vmulss %xmm4, %xmm5, %xmm4 vmulss %xmm6, %xmm4, %xmm4 vmulss %xmm1, %xmm1, %xmm5 vmulss %xmm3, %xmm3, %xmm7 vaddss %xmm7, %xmm5, %xmm5 vrsqrtss %xmm5, %xmm0, %xmm7 vmulss %xmm7, %xmm5, %xmm5 vmulss %xmm5, %xmm7, %xmm5 vsubss %xmm5, %xmm8, %xmm5 vmulss %xmm5, %xmm7, %xmm5 vmulss %xmm6, %xmm5, %xmm5 vmulss %xmm4, %xmm0, %xmm0 vmovaps %xmm0, 16(%rsp) ## 16-byte Spill vmulss %xmm5, %xmm1, %xmm0 vmovaps %xmm0, 48(%rsp) ## 16-byte Spill vmulss %xmm4, %xmm2, %xmm0 vmovaps %xmm0, (%rsp) ## 16-byte Spill vmulss %xmm5, %xmm3, %xmm0 vmovaps %xmm0, 32(%rsp) ## 16-byte Spill cmpl $255, %eax leal (%rcx,%rcx,2), %eax movl %esi, 64(%rsp) ## 4-byte Spill movl %eax, -84(%rsp) ## 4-byte Spill movl %edi, -88(%rsp) ## 4-byte Spill jne LBB6_103 ## BB#1: ## %outer_not_in_extras.preheader xorl %edx, %edx movq %rdx, -96(%rsp) ## 8-byte Spill movq %rax, -112(%rsp) ## 8-byte Spill movl %edi, %r13d movq %rcx, -104(%rsp) ## 8-byte Spill movl $0, %r9d testl %esi, %esi movq -80(%rsp), %rsi ## 8-byte Reload jle LBB6_100 ## BB#2: ## %foreach_full_body.lr.ph vbroadcastss (%rsi), %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vpermilps $0, 16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 160(%rsp) ## 32-byte Spill vpermilps $0, 32(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 128(%rsp) ## 32-byte Spill vpermilps $0, 48(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 96(%rsp) ## 32-byte Spill xorl %r8d, %r8d movl -84(%rsp), %eax ## 4-byte Reload movq %rax, -112(%rsp) ## 8-byte Spill movl -88(%rsp), %eax ## 4-byte Reload movl %eax, %r13d movl 312(%rsp), %eax movq %rax, -104(%rsp) ## 8-byte Spill xorl %eax, %eax movq %rax, -96(%rsp) ## 8-byte Spill xorl %r9d, %r9d .p2align 4, 0x90 LBB6_3: ## %foreach_full_body ## =>This Inner Loop Header: Depth=1 movslq %r8d, %rax vmovdqu (%r12,%rax), %xmm12 movq %r12, %r10 vmovdqu 16(%r12,%rax), %xmm8 vpslld $2, %xmm8, %xmm1 vpslld $2, %xmm12, %xmm2 vmovq %xmm2, %rax movslq %eax, %r12 vpextrq $1, %xmm2, %rbx movslq %ebx, %r15 sarq $32, %rbx sarq $32, %rax vmovq %xmm1, %rsi movslq %esi, %rcx vpextrq $1, %xmm1, %rdi movslq %edi, %rdx sarq $32, %rdi sarq $32, %rsi movq 272(%rsp), %rbp vmovss (%rbp,%rcx), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rsi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rbp,%rdx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rbp,%rdi), %xmm1, %xmm10 ## xmm10 = xmm1[0,1,2],mem[0] vmovss (%rbp,%r12), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rax), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rbp,%r15), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rbp,%rbx), %xmm1, %xmm13 ## xmm13 = xmm1[0,1,2],mem[0] movq 280(%rsp), %r14 vmovss (%r14,%rcx), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%r14,%rsi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%r14,%rdx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%r14,%rdi), %xmm1, %xmm11 ## xmm11 = xmm1[0,1,2],mem[0] vmovss (%r14,%r12), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%r14,%rax), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%r14,%r15), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%r14,%rbx), %xmm1, %xmm5 ## xmm5 = xmm1[0,1,2],mem[0] movq 288(%rsp), %rbp vmovss (%rbp,%rcx), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rsi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rbp,%rdx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rbp,%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] vmovss (%rbp,%r12), %xmm2 ## xmm2 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rax), %xmm2, %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] vinsertps $32, (%rbp,%r15), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] vinsertps $48, (%rbp,%rbx), %xmm2, %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] vinsertf128 $1, %xmm1, %ymm2, %ymm3 movq 296(%rsp), %rbp vmovss (%rbp,%rcx), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rsi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rbp,%rdx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rbp,%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] vmovss (%rbp,%r12), %xmm2 ## xmm2 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rax), %xmm2, %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] vinsertps $32, (%rbp,%r15), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] vinsertps $48, (%rbp,%rbx), %xmm2, %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] vinsertf128 $1, %xmm1, %ymm2, %ymm9 vxorps %ymm0, %ymm0, %ymm0 vsubps %ymm9, %ymm0, %ymm6 vsubps -32(%rsp), %ymm3, %ymm7 ## 32-byte Folded Reload vcmpnltps %ymm6, %ymm7, %ymm1 vmovaps LCPI6_2(%rip), %ymm2 ## ymm2 = [1,1,1,1,1,1,1,1] vxorps %ymm2, %ymm1, %ymm14 vpslld $31, %xmm14, %xmm0 vpsrad $31, %xmm0, %xmm0 vextractf128 $1, %ymm14, %xmm4 vpslld $31, %xmm4, %xmm4 vpsrad $31, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm0, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax movq -72(%rsp), %rcx ## 8-byte Reload je LBB6_5 ## BB#4: ## %eval_1 ## in Loop: Header=BB6_3 Depth=1 vcmpnltps %ymm6, %ymm7, %ymm0 vbroadcastss (%rcx), %ymm1 vsubps %ymm3, %ymm1, %ymm1 vcmpnltps %ymm6, %ymm1, %ymm1 vandps %ymm1, %ymm0, %ymm1 LBB6_5: ## %logical_op_done ## in Loop: Header=BB6_3 Depth=1 movq -80(%rsp), %rsi ## 8-byte Reload vbroadcastss 4(%rsi), %ymm0 vsubps %ymm0, %ymm3, %ymm7 vcmpnltps %ymm6, %ymm7, %ymm15 vxorps %ymm2, %ymm15, %ymm0 vpslld $31, %xmm0, %xmm4 vpsrad $31, %xmm4, %xmm4 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm4, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax je LBB6_7 ## BB#6: ## %eval_1128 ## in Loop: Header=BB6_3 Depth=1 vcmpnltps %ymm6, %ymm7, %ymm0 vbroadcastss 4(%rcx), %ymm4 vsubps %ymm3, %ymm4, %ymm4 vcmpnltps %ymm6, %ymm4, %ymm4 vandps %ymm4, %ymm0, %ymm15 LBB6_7: ## %logical_op_done129 ## in Loop: Header=BB6_3 Depth=1 vbroadcastss 8(%rsi), %ymm0 vsubps %ymm0, %ymm3, %ymm7 vcmpnltps %ymm6, %ymm7, %ymm14 vxorps %ymm2, %ymm14, %ymm0 vpslld $31, %xmm0, %xmm4 vpsrad $31, %xmm4, %xmm4 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm4, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax movq %r10, %r12 movl 64(%rsp), %edx ## 4-byte Reload je LBB6_9 ## BB#8: ## %eval_1158 ## in Loop: Header=BB6_3 Depth=1 vcmpnltps %ymm6, %ymm7, %ymm0 vbroadcastss 8(%rcx), %ymm4 vsubps %ymm3, %ymm4, %ymm4 vcmpnltps %ymm6, %ymm4, %ymm4 vandps %ymm4, %ymm0, %ymm14 LBB6_9: ## %logical_op_done159 ## in Loop: Header=BB6_3 Depth=1 vinsertf128 $1, %xmm10, %ymm13, %ymm7 vinsertf128 $1, %xmm11, %ymm5, %ymm4 vbroadcastss 12(%rsi), %ymm0 vsubps %ymm0, %ymm3, %ymm5 vcmpnltps %ymm6, %ymm5, %ymm13 vxorps %ymm2, %ymm13, %ymm0 vpslld $31, %xmm0, %xmm2 vpsrad $31, %xmm2, %xmm2 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm2, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax je LBB6_11 ## BB#10: ## %eval_1188 ## in Loop: Header=BB6_3 Depth=1 vcmpnltps %ymm6, %ymm5, %ymm0 vbroadcastss 12(%rcx), %ymm2 vsubps %ymm3, %ymm2, %ymm2 vcmpnltps %ymm6, %ymm2, %ymm2 vandps %ymm2, %ymm0, %ymm13 LBB6_11: ## %cif_mask_all ## in Loop: Header=BB6_3 Depth=1 vmulps -64(%rsp), %ymm3, %ymm0 ## 32-byte Folded Reload vmulps 160(%rsp), %ymm7, %ymm2 ## 32-byte Folded Reload vaddps %ymm0, %ymm2, %ymm5 vmulps 128(%rsp), %ymm3, %ymm0 ## 32-byte Folded Reload vmulps 96(%rsp), %ymm4, %ymm2 ## 32-byte Folded Reload vaddps %ymm0, %ymm2, %ymm3 vmovaps LCPI6_3(%rip), %ymm7 ## ymm7 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] vandps %ymm7, %ymm5, %ymm0 vcmpnleps %ymm9, %ymm0, %ymm4 vmovmskps %ymm4, %eax vxorps %ymm6, %ymm6, %ymm6 testl %eax, %eax je LBB6_15 ## BB#12: ## %cif_mask_all ## in Loop: Header=BB6_3 Depth=1 cmpl $255, %eax jne LBB6_14 ## BB#13: ## %cif_test_all ## in Loop: Header=BB6_3 Depth=1 vcmpnleps %ymm6, %ymm5, %ymm0 vandps %ymm1, %ymm0, %ymm1 vcmpleps %ymm6, %ymm5, %ymm2 vandps %ymm15, %ymm2, %ymm15 vandps %ymm14, %ymm0, %ymm14 vandps %ymm13, %ymm2, %ymm13 jmp LBB6_15 .p2align 4, 0x90 LBB6_14: ## %cif_test_mixed ## in Loop: Header=BB6_3 Depth=1 vcmpnleps %ymm6, %ymm5, %ymm0 vblendvps %ymm0, %ymm1, %ymm6, %ymm2 vblendvps %ymm4, %ymm2, %ymm1, %ymm1 vblendvps %ymm0, %ymm6, %ymm15, %ymm2 vblendvps %ymm4, %ymm2, %ymm15, %ymm15 vblendvps %ymm0, %ymm14, %ymm6, %ymm2 vblendvps %ymm4, %ymm2, %ymm14, %ymm14 vblendvps %ymm0, %ymm6, %ymm13, %ymm0 vblendvps %ymm4, %ymm0, %ymm13, %ymm13 LBB6_15: ## %cif_mask_all369 ## in Loop: Header=BB6_3 Depth=1 vandps %ymm7, %ymm3, %ymm0 vcmpnleps %ymm9, %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB6_19 ## BB#16: ## %cif_mask_all369 ## in Loop: Header=BB6_3 Depth=1 cmpl $255, %eax jne LBB6_18 ## BB#17: ## %cif_test_all378 ## in Loop: Header=BB6_3 Depth=1 vcmpnleps %ymm6, %ymm3, %ymm0 vandps %ymm1, %ymm0, %ymm1 vandps %ymm15, %ymm0, %ymm15 vcmpleps %ymm6, %ymm3, %ymm0 vandps %ymm14, %ymm0, %ymm14 vandps %ymm13, %ymm0, %ymm13 jmp LBB6_19 .p2align 4, 0x90 LBB6_18: ## %cif_test_mixed416 ## in Loop: Header=BB6_3 Depth=1 vcmpnleps %ymm6, %ymm3, %ymm0 vblendvps %ymm0, %ymm1, %ymm6, %ymm3 vblendvps %ymm2, %ymm3, %ymm1, %ymm1 vblendvps %ymm0, %ymm15, %ymm6, %ymm3 vblendvps %ymm2, %ymm3, %ymm15, %ymm15 vblendvps %ymm0, %ymm6, %ymm14, %ymm3 vblendvps %ymm2, %ymm3, %ymm14, %ymm14 vblendvps %ymm0, %ymm6, %ymm13, %ymm0 vblendvps %ymm2, %ymm0, %ymm13, %ymm13 LBB6_19: ## %cif_mask_all524 ## in Loop: Header=BB6_3 Depth=1 vinsertf128 $1, %xmm8, %ymm12, %ymm12 vmovmskps %ymm1, %eax testl %eax, %eax je LBB6_39 ## BB#20: ## %cif_mask_all524 ## in Loop: Header=BB6_3 Depth=1 cmpl $255, %eax jne LBB6_22 ## BB#21: ## %packed_store_active___un_3C_uni_3E_vyi.exit6520 ## in Loop: Header=BB6_3 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r11,%rax,4) vmovups %xmm12, (%r11,%rax,4) addl $8, %eax movq %rax, -96(%rsp) ## 8-byte Spill jmp LBB6_39 .p2align 4, 0x90 LBB6_22: ## %cif_test_mixed556 ## in Loop: Header=BB6_3 Depth=1 movq -96(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%r11,%rcx,4), %r15 xorl %ebx, %ebx testb $1, %al je LBB6_24 ## BB#23: ## %store.i.i6568 ## in Loop: Header=BB6_3 Depth=1 vmovd %xmm12, (%r15) movl $1, %ebx LBB6_24: ## %loopend.i.i6573 ## in Loop: Header=BB6_3 Depth=1 testb $2, %al je LBB6_26 ## BB#25: ## %store.i.i6568.1 ## in Loop: Header=BB6_3 Depth=1 vpextrd $1, %xmm12, (%r15,%rbx,4) incl %ebx LBB6_26: ## %loopend.i.i6573.1 ## in Loop: Header=BB6_3 Depth=1 testb $4, %al je LBB6_28 ## BB#27: ## %store.i.i6568.2 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $2, %xmm12, (%r15,%rbx,4) incl %ebx LBB6_28: ## %loopend.i.i6573.2 ## in Loop: Header=BB6_3 Depth=1 testb $8, %al je LBB6_30 ## BB#29: ## %store.i.i6568.3 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $3, %xmm12, (%r15,%rbx,4) incl %ebx LBB6_30: ## %loopend.i.i6573.3 ## in Loop: Header=BB6_3 Depth=1 testb $16, %al je LBB6_32 ## BB#31: ## %store.i.i6568.4 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vmovd %xmm0, (%r15,%rbx,4) incl %ebx LBB6_32: ## %loopend.i.i6573.4 ## in Loop: Header=BB6_3 Depth=1 testb $32, %al je LBB6_34 ## BB#33: ## %store.i.i6568.5 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $1, %xmm0, (%r15,%rbx,4) incl %ebx LBB6_34: ## %loopend.i.i6573.5 ## in Loop: Header=BB6_3 Depth=1 testb $64, %al je LBB6_36 ## BB#35: ## %store.i.i6568.6 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $2, %xmm0, (%r15,%rbx,4) incl %ebx LBB6_36: ## %loopend.i.i6573.6 ## in Loop: Header=BB6_3 Depth=1 testb %al, %al jns LBB6_38 ## BB#37: ## %store.i.i6568.7 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $3, %xmm0, (%r15,%rbx,4) incl %ebx LBB6_38: ## %loopend.i.i6573.7 ## in Loop: Header=BB6_3 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload addl %eax, %ebx movq %rbx, -96(%rsp) ## 8-byte Spill LBB6_39: ## %cif_mask_all632 ## in Loop: Header=BB6_3 Depth=1 vmovmskps %ymm15, %eax testl %eax, %eax je LBB6_59 ## BB#40: ## %cif_mask_all632 ## in Loop: Header=BB6_3 Depth=1 cmpl $255, %eax jne LBB6_42 ## BB#41: ## %packed_store_active___un_3C_uni_3E_vyi.exit6686 ## in Loop: Header=BB6_3 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r11,%rax,4) vmovups %xmm12, (%r11,%rax,4) addl $8, %eax movq %rax, -104(%rsp) ## 8-byte Spill jmp LBB6_59 .p2align 4, 0x90 LBB6_42: ## %cif_test_mixed665 ## in Loop: Header=BB6_3 Depth=1 movq -104(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%r11,%rcx,4), %rbp xorl %ebx, %ebx testb $1, %al je LBB6_44 ## BB#43: ## %store.i.i6734 ## in Loop: Header=BB6_3 Depth=1 vmovd %xmm12, (%rbp) movl $1, %ebx LBB6_44: ## %loopend.i.i6739 ## in Loop: Header=BB6_3 Depth=1 testb $2, %al je LBB6_46 ## BB#45: ## %store.i.i6734.1 ## in Loop: Header=BB6_3 Depth=1 vpextrd $1, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_46: ## %loopend.i.i6739.1 ## in Loop: Header=BB6_3 Depth=1 testb $4, %al je LBB6_48 ## BB#47: ## %store.i.i6734.2 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $2, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_48: ## %loopend.i.i6739.2 ## in Loop: Header=BB6_3 Depth=1 testb $8, %al je LBB6_50 ## BB#49: ## %store.i.i6734.3 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $3, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_50: ## %loopend.i.i6739.3 ## in Loop: Header=BB6_3 Depth=1 testb $16, %al je LBB6_52 ## BB#51: ## %store.i.i6734.4 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vmovd %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_52: ## %loopend.i.i6739.4 ## in Loop: Header=BB6_3 Depth=1 testb $32, %al je LBB6_54 ## BB#53: ## %store.i.i6734.5 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $1, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_54: ## %loopend.i.i6739.5 ## in Loop: Header=BB6_3 Depth=1 testb $64, %al je LBB6_56 ## BB#55: ## %store.i.i6734.6 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $2, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_56: ## %loopend.i.i6739.6 ## in Loop: Header=BB6_3 Depth=1 testb %al, %al jns LBB6_58 ## BB#57: ## %store.i.i6734.7 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $3, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_58: ## %loopend.i.i6739.7 ## in Loop: Header=BB6_3 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload addl %eax, %ebx movq %rbx, -104(%rsp) ## 8-byte Spill LBB6_59: ## %cif_mask_all741 ## in Loop: Header=BB6_3 Depth=1 vmovmskps %ymm14, %eax testl %eax, %eax je LBB6_79 ## BB#60: ## %cif_mask_all741 ## in Loop: Header=BB6_3 Depth=1 cmpl $255, %eax jne LBB6_62 ## BB#61: ## %packed_store_active___un_3C_uni_3E_vyi.exit6895 ## in Loop: Header=BB6_3 Depth=1 movslq %r13d, %r13 vextractf128 $1, %ymm12, 16(%r11,%r13,4) vmovups %xmm12, (%r11,%r13,4) addl $8, %r13d jmp LBB6_79 .p2align 4, 0x90 LBB6_62: ## %cif_test_mixed774 ## in Loop: Header=BB6_3 Depth=1 movslq %r13d, %rcx leaq (%r11,%rcx,4), %rbp xorl %ebx, %ebx testb $1, %al je LBB6_64 ## BB#63: ## %store.i.i6943 ## in Loop: Header=BB6_3 Depth=1 vmovd %xmm12, (%rbp) movl $1, %ebx LBB6_64: ## %loopend.i.i6948 ## in Loop: Header=BB6_3 Depth=1 testb $2, %al je LBB6_66 ## BB#65: ## %store.i.i6943.1 ## in Loop: Header=BB6_3 Depth=1 vpextrd $1, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_66: ## %loopend.i.i6948.1 ## in Loop: Header=BB6_3 Depth=1 testb $4, %al je LBB6_68 ## BB#67: ## %store.i.i6943.2 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $2, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_68: ## %loopend.i.i6948.2 ## in Loop: Header=BB6_3 Depth=1 testb $8, %al je LBB6_70 ## BB#69: ## %store.i.i6943.3 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $3, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_70: ## %loopend.i.i6948.3 ## in Loop: Header=BB6_3 Depth=1 testb $16, %al je LBB6_72 ## BB#71: ## %store.i.i6943.4 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vmovd %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_72: ## %loopend.i.i6948.4 ## in Loop: Header=BB6_3 Depth=1 testb $32, %al je LBB6_74 ## BB#73: ## %store.i.i6943.5 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $1, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_74: ## %loopend.i.i6948.5 ## in Loop: Header=BB6_3 Depth=1 testb $64, %al je LBB6_76 ## BB#75: ## %store.i.i6943.6 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $2, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_76: ## %loopend.i.i6948.6 ## in Loop: Header=BB6_3 Depth=1 testb %al, %al jns LBB6_78 ## BB#77: ## %store.i.i6943.7 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $3, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_78: ## %loopend.i.i6948.7 ## in Loop: Header=BB6_3 Depth=1 addl %r13d, %ebx movl %ebx, %r13d LBB6_79: ## %cif_mask_all850 ## in Loop: Header=BB6_3 Depth=1 vmovmskps %ymm13, %eax cmpl $255, %eax je LBB6_98 ## BB#80: ## %cif_mask_all850 ## in Loop: Header=BB6_3 Depth=1 testl %eax, %eax je LBB6_99 ## BB#81: ## %cif_test_mixed883 ## in Loop: Header=BB6_3 Depth=1 movq -112(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%r11,%rcx,4), %rbp xorl %ebx, %ebx testb $1, %al je LBB6_83 ## BB#82: ## %store.i.i7109 ## in Loop: Header=BB6_3 Depth=1 vmovd %xmm12, (%rbp) movl $1, %ebx LBB6_83: ## %loopend.i.i7114 ## in Loop: Header=BB6_3 Depth=1 testb $2, %al je LBB6_85 ## BB#84: ## %store.i.i7109.1 ## in Loop: Header=BB6_3 Depth=1 vpextrd $1, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_85: ## %loopend.i.i7114.1 ## in Loop: Header=BB6_3 Depth=1 testb $4, %al je LBB6_87 ## BB#86: ## %store.i.i7109.2 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $2, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_87: ## %loopend.i.i7114.2 ## in Loop: Header=BB6_3 Depth=1 testb $8, %al je LBB6_89 ## BB#88: ## %store.i.i7109.3 ## in Loop: Header=BB6_3 Depth=1 movslq %ebx, %rbx vpextrd $3, %xmm12, (%rbp,%rbx,4) incl %ebx LBB6_89: ## %loopend.i.i7114.3 ## in Loop: Header=BB6_3 Depth=1 testb $16, %al je LBB6_91 ## BB#90: ## %store.i.i7109.4 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vmovd %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_91: ## %loopend.i.i7114.4 ## in Loop: Header=BB6_3 Depth=1 testb $32, %al je LBB6_93 ## BB#92: ## %store.i.i7109.5 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $1, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_93: ## %loopend.i.i7114.5 ## in Loop: Header=BB6_3 Depth=1 testb $64, %al je LBB6_95 ## BB#94: ## %store.i.i7109.6 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $2, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_95: ## %loopend.i.i7114.6 ## in Loop: Header=BB6_3 Depth=1 testb %al, %al jns LBB6_97 ## BB#96: ## %store.i.i7109.7 ## in Loop: Header=BB6_3 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %ebx, %rbx vpextrd $3, %xmm0, (%rbp,%rbx,4) incl %ebx LBB6_97: ## %loopend.i.i7114.7 ## in Loop: Header=BB6_3 Depth=1 movq -112(%rsp), %rax ## 8-byte Reload addl %eax, %ebx movq %rbx, -112(%rsp) ## 8-byte Spill jmp LBB6_99 .p2align 4, 0x90 LBB6_98: ## %packed_store_active___un_3C_uni_3E_vyi.exit7061 ## in Loop: Header=BB6_3 Depth=1 movq -112(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r11,%rax,4) vmovups %xmm12, (%r11,%rax,4) addl $8, %eax movq %rax, -112(%rsp) ## 8-byte Spill LBB6_99: ## %cif_done852 ## in Loop: Header=BB6_3 Depth=1 addl $8, %r9d addl $32, %r8d cmpl %edx, %r9d jl LBB6_3 LBB6_100: ## %partial_inner_all_outer movl 264(%rsp), %eax cmpl %eax, %r9d jge LBB6_205 ## BB#101: ## %partial_inner_only movq %r11, %r14 vmovd %r9d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI6_4(%rip), %xmm0, %xmm1 vpaddd LCPI6_5(%rip), %xmm0, %xmm10 vmovd %eax, %xmm0 vpshufd $0, %xmm0, %xmm8 ## xmm8 = xmm0[0,0,0,0] vpcmpgtd %xmm10, %xmm8, %xmm6 vmovdqu %ymm1, 64(%rsp) ## 32-byte Spill vpcmpgtd %xmm1, %xmm8, %xmm3 vinsertf128 $1, %xmm6, %ymm3, %ymm0 shll $2, %r9d movslq %r9d, %rax vmaskmovps (%r12,%rax), %ymm0, %ymm2 vpslld $2, %xmm2, %xmm1 vmovups %ymm2, -32(%rsp) ## 32-byte Spill vextractf128 $1, %ymm2, %xmm2 vmovaps %xmm2, -64(%rsp) ## 16-byte Spill vpslld $2, %xmm2, %xmm2 vinsertf128 $1, %xmm2, %ymm1, %ymm1 vxorps %ymm2, %ymm2, %ymm2 vblendvps %ymm0, %ymm1, %ymm2, %ymm1 vpextrq $1, %xmm1, %rax movslq %eax, %r11 sarq $32, %rax movq %rsi, %rcx vmovq %xmm1, %rdi movslq %edi, %r15 sarq $32, %rdi vextractf128 $1, %ymm1, %xmm1 vpextrq $1, %xmm1, %rbx movslq %ebx, %rsi sarq $32, %rbx vmovq %xmm1, %rbp movslq %ebp, %r8 sarq $32, %rbp movq 288(%rsp), %rdx vmovss (%rdx,%r8), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbp), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rdx,%rsi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rdx,%rbx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] vmovss (%rdx,%r15), %xmm5 ## xmm5 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rdi), %xmm5, %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] vinsertps $32, (%rdx,%r11), %xmm5, %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] vinsertps $48, (%rdx,%rax), %xmm5, %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] vinsertf128 $1, %xmm1, %ymm5, %ymm1 movq 296(%rsp), %rdx vmovss (%rdx,%r8), %xmm5 ## xmm5 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbp), %xmm5, %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] vinsertps $32, (%rdx,%rsi), %xmm5, %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] vinsertps $48, (%rdx,%rbx), %xmm5, %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] vmovss (%rdx,%r15), %xmm7 ## xmm7 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rdi), %xmm7, %xmm7 ## xmm7 = xmm7[0],mem[0],xmm7[2,3] vinsertps $32, (%rdx,%r11), %xmm7, %xmm7 ## xmm7 = xmm7[0,1],mem[0],xmm7[3] vinsertps $48, (%rdx,%rax), %xmm7, %xmm7 ## xmm7 = xmm7[0,1,2],mem[0] vinsertf128 $1, %xmm5, %ymm7, %ymm12 vsubps %ymm12, %ymm2, %ymm2 vbroadcastss (%rcx), %ymm5 vsubps %ymm5, %ymm1, %ymm9 vcmpltps %ymm2, %ymm9, %ymm5 vandps %ymm0, %ymm5, %ymm5 vextractf128 $1, %ymm5, %xmm7 vpcmpeqd %xmm6, %xmm7, %xmm7 movq 272(%rsp), %rcx vmovss (%rcx,%r8), %xmm6 ## xmm6 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rbp), %xmm6, %xmm6 ## xmm6 = xmm6[0],mem[0],xmm6[2,3] vinsertps $32, (%rcx,%rsi), %xmm6, %xmm6 ## xmm6 = xmm6[0,1],mem[0],xmm6[3] vinsertps $48, (%rcx,%rbx), %xmm6, %xmm6 ## xmm6 = xmm6[0,1,2],mem[0] vpcmpeqd %xmm3, %xmm5, %xmm5 vmovss (%rcx,%r15), %xmm3 ## xmm3 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rdi), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertps $32, (%rcx,%r11), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] vinsertps $48, (%rcx,%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0,1,2],mem[0] vinsertf128 $1, %xmm7, %ymm5, %ymm7 movq 280(%rsp), %rcx movq %rcx, %rdx vmovss (%rdx,%r8), %xmm5 ## xmm5 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbp), %xmm5, %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] vinsertps $32, (%rdx,%rsi), %xmm5, %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] vinsertps $48, (%rdx,%rbx), %xmm5, %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] vmovmskps %ymm7, %ecx vmovss (%rdx,%r15), %xmm7 ## xmm7 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rdi), %xmm7, %xmm7 ## xmm7 = xmm7[0],mem[0],xmm7[2,3] vinsertps $32, (%rdx,%r11), %xmm7, %xmm7 ## xmm7 = xmm7[0,1],mem[0],xmm7[3] vinsertps $48, (%rdx,%rax), %xmm7, %xmm15 ## xmm15 = xmm7[0,1,2],mem[0] cmpl $255, %ecx jne LBB6_207 ## BB#102: vcmpnltps %ymm2, %ymm9, %ymm11 movq -72(%rsp), %rcx ## 8-byte Reload jmp LBB6_208 LBB6_103: ## %outer_not_in_extras2019.preheader xorl %r8d, %r8d movq %rax, -112(%rsp) ## 8-byte Spill movl %edi, %r14d movq %rcx, -104(%rsp) ## 8-byte Spill movl $0, %eax movq %rax, -96(%rsp) ## 8-byte Spill testl %esi, %esi movq -80(%rsp), %rdi ## 8-byte Reload jle LBB6_202 ## BB#104: ## %foreach_full_body1999.lr.ph vbroadcastss (%rdi), %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vpermilps $0, 16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 160(%rsp) ## 32-byte Spill vpermilps $0, 32(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 128(%rsp) ## 32-byte Spill vpermilps $0, 48(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 96(%rsp) ## 32-byte Spill xorl %r9d, %r9d movl -84(%rsp), %eax ## 4-byte Reload movq %rax, -112(%rsp) ## 8-byte Spill movl -88(%rsp), %eax ## 4-byte Reload movl %eax, %r14d movl 312(%rsp), %eax movq %rax, -104(%rsp) ## 8-byte Spill xorl %r8d, %r8d xorl %eax, %eax movq %rax, -96(%rsp) ## 8-byte Spill .p2align 4, 0x90 LBB6_105: ## %foreach_full_body1999 ## =>This Inner Loop Header: Depth=1 movq %r11, %rcx movslq %r9d, %rax vmovdqu (%r12,%rax), %xmm12 vmovdqu 16(%r12,%rax), %xmm8 vpslld $2, %xmm8, %xmm1 vpslld $2, %xmm12, %xmm2 vmovq %xmm2, %rax movslq %eax, %r15 vpextrq $1, %xmm2, %rsi movslq %esi, %rbx sarq $32, %rsi sarq $32, %rax vmovq %xmm1, %rbp movslq %ebp, %r11 vpextrq $1, %xmm1, %rdi movslq %edi, %r13 sarq $32, %rdi sarq $32, %rbp movq 272(%rsp), %rdx vmovss (%rdx,%r11), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbp), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rdx,%r13), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rdx,%rdi), %xmm1, %xmm10 ## xmm10 = xmm1[0,1,2],mem[0] vmovss (%rdx,%r15), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rax), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rdx,%rbx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rdx,%rsi), %xmm1, %xmm13 ## xmm13 = xmm1[0,1,2],mem[0] movq 280(%rsp), %rdx vmovss (%rdx,%r11), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbp), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rdx,%r13), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rdx,%rdi), %xmm1, %xmm11 ## xmm11 = xmm1[0,1,2],mem[0] vmovss (%rdx,%r15), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rax), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rdx,%rbx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rdx,%rsi), %xmm1, %xmm5 ## xmm5 = xmm1[0,1,2],mem[0] vmovss (%r10,%r11), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%r10,%rbp), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%r10,%r13), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%r10,%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] vmovss (%r10,%r15), %xmm2 ## xmm2 = mem[0],zero,zero,zero vinsertps $16, (%r10,%rax), %xmm2, %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] vinsertps $32, (%r10,%rbx), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] vinsertps $48, (%r10,%rsi), %xmm2, %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] vinsertf128 $1, %xmm1, %ymm2, %ymm3 movq 296(%rsp), %rdx vmovss (%rdx,%r11), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbp), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rdx,%r13), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rdx,%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] vmovss (%rdx,%r15), %xmm2 ## xmm2 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rax), %xmm2, %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] vinsertps $32, (%rdx,%rbx), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] vinsertps $48, (%rdx,%rsi), %xmm2, %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] vinsertf128 $1, %xmm1, %ymm2, %ymm9 vxorps %ymm0, %ymm0, %ymm0 vsubps %ymm9, %ymm0, %ymm6 vsubps -32(%rsp), %ymm3, %ymm7 ## 32-byte Folded Reload vcmpnltps %ymm6, %ymm7, %ymm1 vmovaps LCPI6_2(%rip), %ymm2 ## ymm2 = [1,1,1,1,1,1,1,1] vxorps %ymm2, %ymm1, %ymm14 vpslld $31, %xmm14, %xmm0 vpsrad $31, %xmm0, %xmm0 vextractf128 $1, %ymm14, %xmm4 vpslld $31, %xmm4, %xmm4 vpsrad $31, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm0, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax movq -72(%rsp), %rsi ## 8-byte Reload je LBB6_107 ## BB#106: ## %eval_12087 ## in Loop: Header=BB6_105 Depth=1 vcmpnltps %ymm6, %ymm7, %ymm0 vbroadcastss (%rsi), %ymm1 vsubps %ymm3, %ymm1, %ymm1 vcmpnltps %ymm6, %ymm1, %ymm1 vandps %ymm1, %ymm0, %ymm1 LBB6_107: ## %logical_op_done2088 ## in Loop: Header=BB6_105 Depth=1 movq -80(%rsp), %rdi ## 8-byte Reload vbroadcastss 4(%rdi), %ymm0 vsubps %ymm0, %ymm3, %ymm7 vcmpnltps %ymm6, %ymm7, %ymm15 vxorps %ymm2, %ymm15, %ymm0 vpslld $31, %xmm0, %xmm4 vpsrad $31, %xmm4, %xmm4 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm4, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax je LBB6_109 ## BB#108: ## %eval_12117 ## in Loop: Header=BB6_105 Depth=1 vcmpnltps %ymm6, %ymm7, %ymm0 vbroadcastss 4(%rsi), %ymm4 vsubps %ymm3, %ymm4, %ymm4 vcmpnltps %ymm6, %ymm4, %ymm4 vandps %ymm4, %ymm0, %ymm15 LBB6_109: ## %logical_op_done2118 ## in Loop: Header=BB6_105 Depth=1 vbroadcastss 8(%rdi), %ymm0 vsubps %ymm0, %ymm3, %ymm7 vcmpnltps %ymm6, %ymm7, %ymm14 vxorps %ymm2, %ymm14, %ymm0 vpslld $31, %xmm0, %xmm4 vpsrad $31, %xmm4, %xmm4 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm4, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax movq %rcx, %r11 je LBB6_111 ## BB#110: ## %eval_12147 ## in Loop: Header=BB6_105 Depth=1 vcmpnltps %ymm6, %ymm7, %ymm0 vbroadcastss 8(%rsi), %ymm4 vsubps %ymm3, %ymm4, %ymm4 vcmpnltps %ymm6, %ymm4, %ymm4 vandps %ymm4, %ymm0, %ymm14 LBB6_111: ## %logical_op_done2148 ## in Loop: Header=BB6_105 Depth=1 vinsertf128 $1, %xmm10, %ymm13, %ymm7 vinsertf128 $1, %xmm11, %ymm5, %ymm4 vbroadcastss 12(%rdi), %ymm0 vsubps %ymm0, %ymm3, %ymm5 vcmpnltps %ymm6, %ymm5, %ymm13 vxorps %ymm2, %ymm13, %ymm0 vpslld $31, %xmm0, %xmm2 vpsrad $31, %xmm2, %xmm2 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm2, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax je LBB6_113 ## BB#112: ## %eval_12177 ## in Loop: Header=BB6_105 Depth=1 vcmpnltps %ymm6, %ymm5, %ymm0 vbroadcastss 12(%rsi), %ymm2 vsubps %ymm3, %ymm2, %ymm2 vcmpnltps %ymm6, %ymm2, %ymm2 vandps %ymm2, %ymm0, %ymm13 LBB6_113: ## %cif_mask_all2224 ## in Loop: Header=BB6_105 Depth=1 vmulps -64(%rsp), %ymm3, %ymm0 ## 32-byte Folded Reload vmulps 160(%rsp), %ymm7, %ymm2 ## 32-byte Folded Reload vaddps %ymm0, %ymm2, %ymm5 vmulps 128(%rsp), %ymm3, %ymm0 ## 32-byte Folded Reload vmulps 96(%rsp), %ymm4, %ymm2 ## 32-byte Folded Reload vaddps %ymm0, %ymm2, %ymm3 vmovaps LCPI6_3(%rip), %ymm7 ## ymm7 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] vandps %ymm7, %ymm5, %ymm0 vcmpnleps %ymm9, %ymm0, %ymm4 vmovmskps %ymm4, %eax vxorps %ymm6, %ymm6, %ymm6 testl %eax, %eax je LBB6_117 ## BB#114: ## %cif_mask_all2224 ## in Loop: Header=BB6_105 Depth=1 cmpl $255, %eax jne LBB6_116 ## BB#115: ## %cif_test_all2233 ## in Loop: Header=BB6_105 Depth=1 vcmpnleps %ymm6, %ymm5, %ymm0 vandps %ymm1, %ymm0, %ymm1 vcmpleps %ymm6, %ymm5, %ymm2 vandps %ymm15, %ymm2, %ymm15 vandps %ymm14, %ymm0, %ymm14 vandps %ymm13, %ymm2, %ymm13 jmp LBB6_117 .p2align 4, 0x90 LBB6_116: ## %cif_test_mixed2273 ## in Loop: Header=BB6_105 Depth=1 vcmpnleps %ymm6, %ymm5, %ymm0 vblendvps %ymm0, %ymm1, %ymm6, %ymm2 vblendvps %ymm4, %ymm2, %ymm1, %ymm1 vblendvps %ymm0, %ymm6, %ymm15, %ymm2 vblendvps %ymm4, %ymm2, %ymm15, %ymm15 vblendvps %ymm0, %ymm14, %ymm6, %ymm2 vblendvps %ymm4, %ymm2, %ymm14, %ymm14 vblendvps %ymm0, %ymm6, %ymm13, %ymm0 vblendvps %ymm4, %ymm0, %ymm13, %ymm13 LBB6_117: ## %cif_mask_all2384 ## in Loop: Header=BB6_105 Depth=1 vandps %ymm7, %ymm3, %ymm0 vcmpnleps %ymm9, %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB6_121 ## BB#118: ## %cif_mask_all2384 ## in Loop: Header=BB6_105 Depth=1 cmpl $255, %eax jne LBB6_120 ## BB#119: ## %cif_test_all2393 ## in Loop: Header=BB6_105 Depth=1 vcmpnleps %ymm6, %ymm3, %ymm0 vandps %ymm1, %ymm0, %ymm1 vandps %ymm15, %ymm0, %ymm15 vcmpleps %ymm6, %ymm3, %ymm0 vandps %ymm14, %ymm0, %ymm14 vandps %ymm13, %ymm0, %ymm13 jmp LBB6_121 .p2align 4, 0x90 LBB6_120: ## %cif_test_mixed2433 ## in Loop: Header=BB6_105 Depth=1 vcmpnleps %ymm6, %ymm3, %ymm0 vblendvps %ymm0, %ymm1, %ymm6, %ymm3 vblendvps %ymm2, %ymm3, %ymm1, %ymm1 vblendvps %ymm0, %ymm15, %ymm6, %ymm3 vblendvps %ymm2, %ymm3, %ymm15, %ymm15 vblendvps %ymm0, %ymm6, %ymm14, %ymm3 vblendvps %ymm2, %ymm3, %ymm14, %ymm14 vblendvps %ymm0, %ymm6, %ymm13, %ymm0 vblendvps %ymm2, %ymm0, %ymm13, %ymm13 LBB6_121: ## %cif_mask_all2541 ## in Loop: Header=BB6_105 Depth=1 vinsertf128 $1, %xmm8, %ymm12, %ymm12 vmovmskps %ymm1, %esi testl %esi, %esi je LBB6_141 ## BB#122: ## %cif_mask_all2541 ## in Loop: Header=BB6_105 Depth=1 cmpl $255, %esi jne LBB6_124 ## BB#123: ## %packed_store_active___un_3C_uni_3E_vyi.exit6716 ## in Loop: Header=BB6_105 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r11,%rax,4) vmovups %xmm12, (%r11,%rax,4) addl $8, %eax movq %rax, -96(%rsp) ## 8-byte Spill jmp LBB6_141 .p2align 4, 0x90 LBB6_124: ## %cif_test_mixed2574 ## in Loop: Header=BB6_105 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload cltq leaq (%r11,%rax,4), %rbx xorl %r15d, %r15d testb $1, %sil je LBB6_126 ## BB#125: ## %store.i.i6655 ## in Loop: Header=BB6_105 Depth=1 vmovd %xmm12, (%rbx) movl $1, %r15d LBB6_126: ## %loopend.i.i6660 ## in Loop: Header=BB6_105 Depth=1 testb $2, %sil je LBB6_128 ## BB#127: ## %store.i.i6655.1 ## in Loop: Header=BB6_105 Depth=1 vpextrd $1, %xmm12, (%rbx,%r15,4) incl %r15d LBB6_128: ## %loopend.i.i6660.1 ## in Loop: Header=BB6_105 Depth=1 testb $4, %sil je LBB6_130 ## BB#129: ## %store.i.i6655.2 ## in Loop: Header=BB6_105 Depth=1 movslq %r15d, %r15 vpextrd $2, %xmm12, (%rbx,%r15,4) incl %r15d LBB6_130: ## %loopend.i.i6660.2 ## in Loop: Header=BB6_105 Depth=1 testb $8, %sil je LBB6_132 ## BB#131: ## %store.i.i6655.3 ## in Loop: Header=BB6_105 Depth=1 movslq %r15d, %r15 vpextrd $3, %xmm12, (%rbx,%r15,4) incl %r15d LBB6_132: ## %loopend.i.i6660.3 ## in Loop: Header=BB6_105 Depth=1 testb $16, %sil je LBB6_134 ## BB#133: ## %store.i.i6655.4 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r15d, %r15 vmovd %xmm0, (%rbx,%r15,4) incl %r15d LBB6_134: ## %loopend.i.i6660.4 ## in Loop: Header=BB6_105 Depth=1 testb $32, %sil je LBB6_136 ## BB#135: ## %store.i.i6655.5 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r15d, %r15 vpextrd $1, %xmm0, (%rbx,%r15,4) incl %r15d LBB6_136: ## %loopend.i.i6660.5 ## in Loop: Header=BB6_105 Depth=1 testb $64, %sil je LBB6_138 ## BB#137: ## %store.i.i6655.6 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r15d, %r15 vpextrd $2, %xmm0, (%rbx,%r15,4) incl %r15d LBB6_138: ## %loopend.i.i6660.6 ## in Loop: Header=BB6_105 Depth=1 testb %sil, %sil jns LBB6_140 ## BB#139: ## %store.i.i6655.7 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r15d, %r15 vpextrd $3, %xmm0, (%rbx,%r15,4) incl %r15d LBB6_140: ## %loopend.i.i6660.7 ## in Loop: Header=BB6_105 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload addl %eax, %r15d movq %r15, -96(%rsp) ## 8-byte Spill LBB6_141: ## %cif_mask_all2650 ## in Loop: Header=BB6_105 Depth=1 vmovmskps %ymm15, %ebx testl %ebx, %ebx je LBB6_161 ## BB#142: ## %cif_mask_all2650 ## in Loop: Header=BB6_105 Depth=1 cmpl $255, %ebx jne LBB6_144 ## BB#143: ## %packed_store_active___un_3C_uni_3E_vyi.exit6550 ## in Loop: Header=BB6_105 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r11,%rax,4) vmovups %xmm12, (%r11,%rax,4) addl $8, %eax movq %rax, -104(%rsp) ## 8-byte Spill jmp LBB6_161 .p2align 4, 0x90 LBB6_144: ## %cif_test_mixed2683 ## in Loop: Header=BB6_105 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload cltq leaq (%r11,%rax,4), %rax xorl %esi, %esi testb $1, %bl je LBB6_146 ## BB#145: ## %store.i.i6489 ## in Loop: Header=BB6_105 Depth=1 vmovd %xmm12, (%rax) movl $1, %esi LBB6_146: ## %loopend.i.i6494 ## in Loop: Header=BB6_105 Depth=1 testb $2, %bl je LBB6_148 ## BB#147: ## %store.i.i6489.1 ## in Loop: Header=BB6_105 Depth=1 vpextrd $1, %xmm12, (%rax,%rsi,4) incl %esi LBB6_148: ## %loopend.i.i6494.1 ## in Loop: Header=BB6_105 Depth=1 testb $4, %bl je LBB6_150 ## BB#149: ## %store.i.i6489.2 ## in Loop: Header=BB6_105 Depth=1 movslq %esi, %rsi vpextrd $2, %xmm12, (%rax,%rsi,4) incl %esi LBB6_150: ## %loopend.i.i6494.2 ## in Loop: Header=BB6_105 Depth=1 testb $8, %bl je LBB6_152 ## BB#151: ## %store.i.i6489.3 ## in Loop: Header=BB6_105 Depth=1 movslq %esi, %rsi vpextrd $3, %xmm12, (%rax,%rsi,4) incl %esi LBB6_152: ## %loopend.i.i6494.3 ## in Loop: Header=BB6_105 Depth=1 testb $16, %bl je LBB6_154 ## BB#153: ## %store.i.i6489.4 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vmovd %xmm0, (%rax,%rsi,4) incl %esi LBB6_154: ## %loopend.i.i6494.4 ## in Loop: Header=BB6_105 Depth=1 testb $32, %bl je LBB6_156 ## BB#155: ## %store.i.i6489.5 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $1, %xmm0, (%rax,%rsi,4) incl %esi LBB6_156: ## %loopend.i.i6494.5 ## in Loop: Header=BB6_105 Depth=1 testb $64, %bl je LBB6_158 ## BB#157: ## %store.i.i6489.6 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $2, %xmm0, (%rax,%rsi,4) incl %esi LBB6_158: ## %loopend.i.i6494.6 ## in Loop: Header=BB6_105 Depth=1 testb %bl, %bl jns LBB6_160 ## BB#159: ## %store.i.i6489.7 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $3, %xmm0, (%rax,%rsi,4) incl %esi LBB6_160: ## %loopend.i.i6494.7 ## in Loop: Header=BB6_105 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload addl %eax, %esi movq %rsi, -104(%rsp) ## 8-byte Spill LBB6_161: ## %cif_mask_all2759 ## in Loop: Header=BB6_105 Depth=1 vmovmskps %ymm14, %ebx testl %ebx, %ebx je LBB6_181 ## BB#162: ## %cif_mask_all2759 ## in Loop: Header=BB6_105 Depth=1 cmpl $255, %ebx jne LBB6_164 ## BB#163: ## %packed_store_active___un_3C_uni_3E_vyi.exit6434 ## in Loop: Header=BB6_105 Depth=1 movslq %r14d, %r14 vextractf128 $1, %ymm12, 16(%r11,%r14,4) vmovups %xmm12, (%r11,%r14,4) addl $8, %r14d jmp LBB6_181 .p2align 4, 0x90 LBB6_164: ## %cif_test_mixed2792 ## in Loop: Header=BB6_105 Depth=1 movslq %r14d, %rax leaq (%r11,%rax,4), %rax xorl %esi, %esi testb $1, %bl je LBB6_166 ## BB#165: ## %store.i.i6397 ## in Loop: Header=BB6_105 Depth=1 vmovd %xmm12, (%rax) movl $1, %esi LBB6_166: ## %loopend.i.i6402 ## in Loop: Header=BB6_105 Depth=1 testb $2, %bl je LBB6_168 ## BB#167: ## %store.i.i6397.1 ## in Loop: Header=BB6_105 Depth=1 vpextrd $1, %xmm12, (%rax,%rsi,4) incl %esi LBB6_168: ## %loopend.i.i6402.1 ## in Loop: Header=BB6_105 Depth=1 testb $4, %bl je LBB6_170 ## BB#169: ## %store.i.i6397.2 ## in Loop: Header=BB6_105 Depth=1 movslq %esi, %rsi vpextrd $2, %xmm12, (%rax,%rsi,4) incl %esi LBB6_170: ## %loopend.i.i6402.2 ## in Loop: Header=BB6_105 Depth=1 testb $8, %bl je LBB6_172 ## BB#171: ## %store.i.i6397.3 ## in Loop: Header=BB6_105 Depth=1 movslq %esi, %rsi vpextrd $3, %xmm12, (%rax,%rsi,4) incl %esi LBB6_172: ## %loopend.i.i6402.3 ## in Loop: Header=BB6_105 Depth=1 testb $16, %bl je LBB6_174 ## BB#173: ## %store.i.i6397.4 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vmovd %xmm0, (%rax,%rsi,4) incl %esi LBB6_174: ## %loopend.i.i6402.4 ## in Loop: Header=BB6_105 Depth=1 testb $32, %bl je LBB6_176 ## BB#175: ## %store.i.i6397.5 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $1, %xmm0, (%rax,%rsi,4) incl %esi LBB6_176: ## %loopend.i.i6402.5 ## in Loop: Header=BB6_105 Depth=1 testb $64, %bl je LBB6_178 ## BB#177: ## %store.i.i6397.6 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $2, %xmm0, (%rax,%rsi,4) incl %esi LBB6_178: ## %loopend.i.i6402.6 ## in Loop: Header=BB6_105 Depth=1 testb %bl, %bl jns LBB6_180 ## BB#179: ## %store.i.i6397.7 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $3, %xmm0, (%rax,%rsi,4) incl %esi LBB6_180: ## %loopend.i.i6402.7 ## in Loop: Header=BB6_105 Depth=1 addl %r14d, %esi movl %esi, %r14d LBB6_181: ## %cif_mask_all2868 ## in Loop: Header=BB6_105 Depth=1 vmovmskps %ymm13, %ebx cmpl $255, %ebx je LBB6_200 ## BB#182: ## %cif_mask_all2868 ## in Loop: Header=BB6_105 Depth=1 testl %ebx, %ebx je LBB6_201 ## BB#183: ## %cif_test_mixed2901 ## in Loop: Header=BB6_105 Depth=1 movq -112(%rsp), %rax ## 8-byte Reload cltq leaq (%r11,%rax,4), %rax xorl %esi, %esi testb $1, %bl je LBB6_185 ## BB#184: ## %store.i.i6305 ## in Loop: Header=BB6_105 Depth=1 vmovd %xmm12, (%rax) movl $1, %esi LBB6_185: ## %loopend.i.i6310 ## in Loop: Header=BB6_105 Depth=1 testb $2, %bl je LBB6_187 ## BB#186: ## %store.i.i6305.1 ## in Loop: Header=BB6_105 Depth=1 vpextrd $1, %xmm12, (%rax,%rsi,4) incl %esi LBB6_187: ## %loopend.i.i6310.1 ## in Loop: Header=BB6_105 Depth=1 testb $4, %bl je LBB6_189 ## BB#188: ## %store.i.i6305.2 ## in Loop: Header=BB6_105 Depth=1 movslq %esi, %rsi vpextrd $2, %xmm12, (%rax,%rsi,4) incl %esi LBB6_189: ## %loopend.i.i6310.2 ## in Loop: Header=BB6_105 Depth=1 testb $8, %bl je LBB6_191 ## BB#190: ## %store.i.i6305.3 ## in Loop: Header=BB6_105 Depth=1 movslq %esi, %rsi vpextrd $3, %xmm12, (%rax,%rsi,4) incl %esi LBB6_191: ## %loopend.i.i6310.3 ## in Loop: Header=BB6_105 Depth=1 testb $16, %bl je LBB6_193 ## BB#192: ## %store.i.i6305.4 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vmovd %xmm0, (%rax,%rsi,4) incl %esi LBB6_193: ## %loopend.i.i6310.4 ## in Loop: Header=BB6_105 Depth=1 testb $32, %bl je LBB6_195 ## BB#194: ## %store.i.i6305.5 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $1, %xmm0, (%rax,%rsi,4) incl %esi LBB6_195: ## %loopend.i.i6310.5 ## in Loop: Header=BB6_105 Depth=1 testb $64, %bl je LBB6_197 ## BB#196: ## %store.i.i6305.6 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $2, %xmm0, (%rax,%rsi,4) incl %esi LBB6_197: ## %loopend.i.i6310.6 ## in Loop: Header=BB6_105 Depth=1 testb %bl, %bl jns LBB6_199 ## BB#198: ## %store.i.i6305.7 ## in Loop: Header=BB6_105 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %esi, %rsi vpextrd $3, %xmm0, (%rax,%rsi,4) incl %esi LBB6_199: ## %loopend.i.i6310.7 ## in Loop: Header=BB6_105 Depth=1 movq -112(%rsp), %rax ## 8-byte Reload addl %eax, %esi movq %rsi, -112(%rsp) ## 8-byte Spill jmp LBB6_201 .p2align 4, 0x90 LBB6_200: ## %packed_store_active___un_3C_uni_3E_vyi.exit6342 ## in Loop: Header=BB6_105 Depth=1 movq -112(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r11,%rax,4) vmovups %xmm12, (%r11,%rax,4) addl $8, %eax movq %rax, -112(%rsp) ## 8-byte Spill LBB6_201: ## %cif_done2870 ## in Loop: Header=BB6_105 Depth=1 addl $8, %r8d addl $32, %r9d cmpl 64(%rsp), %r8d ## 4-byte Folded Reload jl LBB6_105 LBB6_202: ## %partial_inner_all_outer2037 movl 264(%rsp), %eax cmpl %eax, %r8d jge LBB6_206 ## BB#203: ## %partial_inner_only2981 movq %r14, %r15 movq %r11, %r14 vmovd %r8d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI6_4(%rip), %xmm0, %xmm1 vpaddd LCPI6_5(%rip), %xmm0, %xmm10 vmovd %eax, %xmm0 vpshufd $0, %xmm0, %xmm8 ## xmm8 = xmm0[0,0,0,0] vpcmpgtd %xmm10, %xmm8, %xmm6 vmovdqu %ymm1, 64(%rsp) ## 32-byte Spill vpcmpgtd %xmm1, %xmm8, %xmm3 vinsertf128 $1, %xmm6, %ymm3, %ymm0 shll $2, %r8d movslq %r8d, %rax vmaskmovps (%r12,%rax), %ymm0, %ymm2 vpslld $2, %xmm2, %xmm1 vmovups %ymm2, -32(%rsp) ## 32-byte Spill vextractf128 $1, %ymm2, %xmm2 vmovaps %xmm2, -64(%rsp) ## 16-byte Spill vpslld $2, %xmm2, %xmm2 vinsertf128 $1, %xmm2, %ymm1, %ymm1 vxorps %ymm2, %ymm2, %ymm2 vblendvps %ymm0, %ymm1, %ymm2, %ymm1 vpextrq $1, %xmm1, %rax movslq %eax, %r8 sarq $32, %rax vmovq %xmm1, %rbx movslq %ebx, %r9 sarq $32, %rbx vextractf128 $1, %ymm1, %xmm1 vpextrq $1, %xmm1, %rbp movslq %ebp, %r11 sarq $32, %rbp movq %rdi, %rcx vmovq %xmm1, %rdi movslq %edi, %rsi sarq $32, %rdi vmovss (%r10,%rsi), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%r10,%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%r10,%r11), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%r10,%rbp), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] vmovss (%r10,%r9), %xmm5 ## xmm5 = mem[0],zero,zero,zero vinsertps $16, (%r10,%rbx), %xmm5, %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] vinsertps $32, (%r10,%r8), %xmm5, %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] vinsertps $48, (%r10,%rax), %xmm5, %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] vinsertf128 $1, %xmm1, %ymm5, %ymm1 movq 296(%rsp), %rdx vmovss (%rdx,%rsi), %xmm5 ## xmm5 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rdi), %xmm5, %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] vinsertps $32, (%rdx,%r11), %xmm5, %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] vinsertps $48, (%rdx,%rbp), %xmm5, %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] vmovss (%rdx,%r9), %xmm7 ## xmm7 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbx), %xmm7, %xmm7 ## xmm7 = xmm7[0],mem[0],xmm7[2,3] vinsertps $32, (%rdx,%r8), %xmm7, %xmm7 ## xmm7 = xmm7[0,1],mem[0],xmm7[3] vinsertps $48, (%rdx,%rax), %xmm7, %xmm7 ## xmm7 = xmm7[0,1,2],mem[0] vinsertf128 $1, %xmm5, %ymm7, %ymm12 vsubps %ymm12, %ymm2, %ymm2 vbroadcastss (%rcx), %ymm5 vsubps %ymm5, %ymm1, %ymm9 vcmpltps %ymm2, %ymm9, %ymm5 vandps %ymm0, %ymm5, %ymm5 vextractf128 $1, %ymm5, %xmm7 vpcmpeqd %xmm6, %xmm7, %xmm7 movq 272(%rsp), %rcx vmovss (%rcx,%rsi), %xmm6 ## xmm6 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rdi), %xmm6, %xmm6 ## xmm6 = xmm6[0],mem[0],xmm6[2,3] vinsertps $32, (%rcx,%r11), %xmm6, %xmm6 ## xmm6 = xmm6[0,1],mem[0],xmm6[3] vinsertps $48, (%rcx,%rbp), %xmm6, %xmm6 ## xmm6 = xmm6[0,1,2],mem[0] vpcmpeqd %xmm3, %xmm5, %xmm5 vmovss (%rcx,%r9), %xmm3 ## xmm3 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rbx), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertps $32, (%rcx,%r8), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] vinsertps $48, (%rcx,%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0,1,2],mem[0] vinsertf128 $1, %xmm7, %ymm5, %ymm7 movq 280(%rsp), %rcx movq %rcx, %rdx vmovss (%rdx,%rsi), %xmm5 ## xmm5 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rdi), %xmm5, %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] vinsertps $32, (%rdx,%r11), %xmm5, %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] vinsertps $48, (%rdx,%rbp), %xmm5, %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] vmovmskps %ymm7, %ecx vmovss (%rdx,%r9), %xmm7 ## xmm7 = mem[0],zero,zero,zero vinsertps $16, (%rdx,%rbx), %xmm7, %xmm7 ## xmm7 = xmm7[0],mem[0],xmm7[2,3] vinsertps $32, (%rdx,%r8), %xmm7, %xmm7 ## xmm7 = xmm7[0,1],mem[0],xmm7[3] vinsertps $48, (%rdx,%rax), %xmm7, %xmm15 ## xmm15 = xmm7[0,1,2],mem[0] cmpl $255, %ecx jne LBB6_210 ## BB#204: vcmpnltps %ymm2, %ymm9, %ymm11 movq -72(%rsp), %rcx ## 8-byte Reload jmp LBB6_211 LBB6_205: movq 320(%rsp), %rax movl 312(%rsp), %edi movl -88(%rsp), %ebp ## 4-byte Reload movl -84(%rsp), %ebx ## 4-byte Reload jmp LBB6_407 LBB6_206: movq 320(%rsp), %rax movl 312(%rsp), %edi movl -88(%rsp), %ebp ## 4-byte Reload movl -84(%rsp), %r8d ## 4-byte Reload jmp LBB6_503 LBB6_207: ## %eval_11012 vcmpnltps %ymm2, %ymm9, %ymm7 movq -72(%rsp), %rcx ## 8-byte Reload vbroadcastss (%rcx), %ymm9 vsubps %ymm1, %ymm9, %ymm9 vcmpnltps %ymm2, %ymm9, %ymm9 vandps %ymm7, %ymm9, %ymm7 vandps %ymm0, %ymm7, %ymm11 LBB6_208: ## %logical_op_done1013 movq -80(%rsp), %rax ## 8-byte Reload vbroadcastss 4(%rax), %ymm7 vsubps %ymm7, %ymm1, %ymm9 vcmpltps %ymm2, %ymm9, %ymm7 vandps %ymm0, %ymm7, %ymm7 vextractf128 $1, %ymm7, %xmm4 vextractf128 $1, %ymm0, %xmm13 vpcmpeqd %xmm13, %xmm4, %xmm4 vpcmpeqd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vmovmskps %ymm4, %eax cmpl $255, %eax movl 312(%rsp), %edi movl -88(%rsp), %ebp ## 4-byte Reload movl -84(%rsp), %ebx ## 4-byte Reload movq %r14, %r8 jne LBB6_213 ## BB#209: vcmpnltps %ymm2, %ymm9, %ymm14 jmp LBB6_214 LBB6_210: ## %eval_13037 vcmpnltps %ymm2, %ymm9, %ymm7 movq -72(%rsp), %rcx ## 8-byte Reload vbroadcastss (%rcx), %ymm9 vsubps %ymm1, %ymm9, %ymm9 vcmpnltps %ymm2, %ymm9, %ymm9 vandps %ymm7, %ymm9, %ymm7 vandps %ymm0, %ymm7, %ymm11 LBB6_211: ## %logical_op_done3038 movq -80(%rsp), %rax ## 8-byte Reload vbroadcastss 4(%rax), %ymm7 vsubps %ymm7, %ymm1, %ymm9 vcmpltps %ymm2, %ymm9, %ymm7 vandps %ymm0, %ymm7, %ymm7 vextractf128 $1, %ymm7, %xmm4 vextractf128 $1, %ymm0, %xmm13 vpcmpeqd %xmm13, %xmm4, %xmm4 vpcmpeqd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vmovmskps %ymm4, %eax cmpl $255, %eax movl 312(%rsp), %edi movl -88(%rsp), %ebp ## 4-byte Reload movl -84(%rsp), %r8d ## 4-byte Reload movq %r14, %rbx jne LBB6_216 ## BB#212: vcmpnltps %ymm2, %ymm9, %ymm14 jmp LBB6_217 LBB6_213: ## %eval_11042 vcmpnltps %ymm2, %ymm9, %ymm4 vbroadcastss 4(%rcx), %ymm7 vsubps %ymm1, %ymm7, %ymm7 vcmpnltps %ymm2, %ymm7, %ymm7 vandps %ymm4, %ymm7, %ymm4 vandps %ymm0, %ymm4, %ymm14 LBB6_214: ## %logical_op_done1043 movq -80(%rsp), %rax ## 8-byte Reload vbroadcastss 8(%rax), %ymm4 vsubps %ymm4, %ymm1, %ymm9 vcmpltps %ymm2, %ymm9, %ymm4 vandps %ymm0, %ymm4, %ymm4 vextractf128 $1, %ymm4, %xmm7 vpcmpeqd %xmm13, %xmm7, %xmm7 vpcmpeqd %xmm0, %xmm4, %xmm4 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vmovmskps %ymm4, %eax cmpl $255, %eax jne LBB6_219 ## BB#215: vcmpnltps %ymm2, %ymm9, %ymm9 jmp LBB6_220 LBB6_216: ## %eval_13067 vcmpnltps %ymm2, %ymm9, %ymm4 vbroadcastss 4(%rcx), %ymm7 vsubps %ymm1, %ymm7, %ymm7 vcmpnltps %ymm2, %ymm7, %ymm7 vandps %ymm4, %ymm7, %ymm4 vandps %ymm0, %ymm4, %ymm14 LBB6_217: ## %logical_op_done3068 movq -80(%rsp), %rax ## 8-byte Reload vbroadcastss 8(%rax), %ymm4 vsubps %ymm4, %ymm1, %ymm9 vcmpltps %ymm2, %ymm9, %ymm4 vandps %ymm0, %ymm4, %ymm4 vextractf128 $1, %ymm4, %xmm7 vpcmpeqd %xmm13, %xmm7, %xmm7 vpcmpeqd %xmm0, %xmm4, %xmm4 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vmovmskps %ymm4, %eax cmpl $255, %eax jne LBB6_222 ## BB#218: vcmpnltps %ymm2, %ymm9, %ymm9 jmp LBB6_223 LBB6_219: ## %eval_11072 vcmpnltps %ymm2, %ymm9, %ymm4 vbroadcastss 8(%rcx), %ymm7 vsubps %ymm1, %ymm7, %ymm7 vcmpnltps %ymm2, %ymm7, %ymm7 vandps %ymm4, %ymm7, %ymm4 vandps %ymm0, %ymm4, %ymm9 LBB6_220: ## %logical_op_done1073 vinsertf128 $1, %xmm6, %ymm3, %ymm6 vinsertf128 $1, %xmm5, %ymm15, %ymm3 movq -80(%rsp), %rax ## 8-byte Reload vbroadcastss 12(%rax), %ymm4 vsubps %ymm4, %ymm1, %ymm5 vcmpltps %ymm2, %ymm5, %ymm4 vandps %ymm0, %ymm4, %ymm4 vextractf128 $1, %ymm4, %xmm7 vpcmpeqd %xmm13, %xmm7, %xmm7 vpcmpeqd %xmm0, %xmm4, %xmm4 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vmovmskps %ymm4, %eax cmpl $255, %eax jne LBB6_225 ## BB#221: vcmpnltps %ymm2, %ymm5, %ymm2 jmp LBB6_226 LBB6_222: ## %eval_13097 vcmpnltps %ymm2, %ymm9, %ymm4 vbroadcastss 8(%rcx), %ymm7 vsubps %ymm1, %ymm7, %ymm7 vcmpnltps %ymm2, %ymm7, %ymm7 vandps %ymm4, %ymm7, %ymm4 vandps %ymm0, %ymm4, %ymm9 LBB6_223: ## %logical_op_done3098 vinsertf128 $1, %xmm6, %ymm3, %ymm6 vinsertf128 $1, %xmm5, %ymm15, %ymm3 movq -80(%rsp), %rax ## 8-byte Reload vbroadcastss 12(%rax), %ymm4 vsubps %ymm4, %ymm1, %ymm5 vcmpltps %ymm2, %ymm5, %ymm4 vandps %ymm0, %ymm4, %ymm4 vextractf128 $1, %ymm4, %xmm7 vpcmpeqd %xmm13, %xmm7, %xmm7 vpcmpeqd %xmm0, %xmm4, %xmm4 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vmovmskps %ymm4, %eax cmpl $255, %eax jne LBB6_230 ## BB#224: vcmpnltps %ymm2, %ymm5, %ymm2 jmp LBB6_231 LBB6_225: ## %eval_11102 vcmpnltps %ymm2, %ymm5, %ymm4 vbroadcastss 12(%rcx), %ymm5 vsubps %ymm1, %ymm5, %ymm5 vcmpnltps %ymm2, %ymm5, %ymm2 vandps %ymm4, %ymm2, %ymm2 vandps %ymm0, %ymm2, %ymm2 LBB6_226: ## %logical_op_done1103 vmovups 64(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm10, %ymm4, %ymm10 vinsertf128 $1, %xmm8, %ymm8, %ymm8 vpermilps $0, (%rsp), %xmm4 ## 16-byte Folded Reload ## xmm4 = mem[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmulps %ymm1, %ymm4, %ymm4 vpermilps $0, 16(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vpermilps $0, 32(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm1, %ymm5, %ymm1 vpermilps $0, 48(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm3, %ymm5, %ymm3 vaddps %ymm1, %ymm3, %ymm1 vandps LCPI6_3(%rip), %ymm4, %ymm3 vmovmskps %ymm0, %eax cmpl $255, %eax jne LBB6_235 ## BB#227: ## %cif_mask_all1149 vcmpnleps %ymm12, %ymm3, %ymm0 vmovmskps %ymm0, %ecx testl %ecx, %ecx je LBB6_269 ## BB#228: ## %cif_mask_all1149 cmpl $255, %ecx jne LBB6_268 ## BB#229: ## %cif_test_all1158 vxorps %ymm0, %ymm0, %ymm0 vcmpnleps %ymm0, %ymm4, %ymm3 vandps %ymm11, %ymm3, %ymm11 vcmpleps %ymm0, %ymm4, %ymm0 vandps %ymm14, %ymm0, %ymm14 vandps %ymm9, %ymm3, %ymm9 vandps %ymm2, %ymm0, %ymm2 jmp LBB6_269 LBB6_230: ## %eval_13127 vcmpnltps %ymm2, %ymm5, %ymm4 vbroadcastss 12(%rcx), %ymm5 vsubps %ymm1, %ymm5, %ymm5 vcmpnltps %ymm2, %ymm5, %ymm2 vandps %ymm4, %ymm2, %ymm2 vandps %ymm0, %ymm2, %ymm2 LBB6_231: ## %logical_op_done3128 vmovups 64(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm10, %ymm4, %ymm10 vinsertf128 $1, %xmm8, %ymm8, %ymm8 vpermilps $0, (%rsp), %xmm4 ## 16-byte Folded Reload ## xmm4 = mem[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmulps %ymm1, %ymm4, %ymm4 vpermilps $0, 16(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vpermilps $0, 32(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm1, %ymm5, %ymm1 vpermilps $0, 48(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm3, %ymm5, %ymm3 vaddps %ymm1, %ymm3, %ymm1 vandps LCPI6_3(%rip), %ymm4, %ymm3 vmovmskps %ymm0, %edx cmpl $255, %edx jne LBB6_245 ## BB#232: ## %cif_mask_all3174 vcmpnleps %ymm12, %ymm3, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB6_273 ## BB#233: ## %cif_mask_all3174 cmpl $255, %eax jne LBB6_272 ## BB#234: ## %cif_test_all3183 vxorps %ymm0, %ymm0, %ymm0 vcmpnleps %ymm0, %ymm4, %ymm3 vandps %ymm11, %ymm3, %ymm11 vcmpleps %ymm0, %ymm4, %ymm0 vandps %ymm14, %ymm0, %ymm14 vandps %ymm9, %ymm3, %ymm9 vandps %ymm2, %ymm0, %ymm2 jmp LBB6_273 LBB6_235: ## %cif_mask_mixed1150 vcmpnleps %ymm12, %ymm3, %ymm5 vxorps %ymm3, %ymm3, %ymm3 vblendvps %ymm5, %ymm0, %ymm3, %ymm5 vmovmskps %ymm5, %ecx testl %ecx, %ecx je LBB6_237 ## BB#236: ## %safe_if_run_true1253 vcmpnleps %ymm3, %ymm4, %ymm4 vblendvps %ymm4, %ymm11, %ymm3, %ymm6 vblendvps %ymm5, %ymm6, %ymm11, %ymm11 vblendvps %ymm4, %ymm3, %ymm14, %ymm6 vblendvps %ymm5, %ymm6, %ymm14, %ymm14 vblendvps %ymm4, %ymm9, %ymm3, %ymm6 vblendvps %ymm5, %ymm6, %ymm9, %ymm9 vblendvps %ymm4, %ymm3, %ymm2, %ymm4 vblendvps %ymm5, %ymm4, %ymm2, %ymm2 LBB6_237: ## %cif_mask_mixed1310 vmovdqu -32(%rsp), %ymm5 ## 32-byte Reload vandps LCPI6_3(%rip), %ymm1, %ymm4 vcmpnleps %ymm12, %ymm4, %ymm4 vblendvps %ymm4, %ymm0, %ymm3, %ymm0 vmovmskps %ymm0, %ecx testl %ecx, %ecx je LBB6_239 ## BB#238: ## %safe_if_run_true1413 vxorps %ymm3, %ymm3, %ymm3 vcmpnleps %ymm3, %ymm1, %ymm1 vblendvps %ymm1, %ymm11, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm11, %ymm11 vblendvps %ymm1, %ymm14, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm14, %ymm14 vblendvps %ymm1, %ymm3, %ymm9, %ymm4 vblendvps %ymm0, %ymm4, %ymm9, %ymm9 vblendvps %ymm1, %ymm3, %ymm2, %ymm1 vblendvps %ymm0, %ymm1, %ymm2, %ymm2 LBB6_239: ## %cif_mask_mixed1467 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm11, %ymm1, %ymm0 vmovmskps %ymm0, %edx testl %edx, %edx je LBB6_266 ## BB#240: ## %loop.i.i7256.preheader movq -96(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%r8,%rcx,4), %rsi xorl %ecx, %ecx testb $1, %dl je LBB6_242 ## BB#241: ## %store.i.i7260 vmovd %xmm5, (%rsi) movl $1, %ecx LBB6_242: ## %loopend.i.i7265 testb $2, %dl je LBB6_244 ## BB#243: ## %store.i.i7260.1 vpextrd $1, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_244: ## %loopend.i.i7265.1 vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload jmp LBB6_289 LBB6_245: ## %cif_mask_mixed3175 vcmpnleps %ymm12, %ymm3, %ymm5 vxorps %ymm3, %ymm3, %ymm3 vblendvps %ymm5, %ymm0, %ymm3, %ymm5 vmovmskps %ymm5, %eax testl %eax, %eax je LBB6_247 ## BB#246: ## %safe_if_run_true3278 vcmpnleps %ymm3, %ymm4, %ymm4 vblendvps %ymm4, %ymm11, %ymm3, %ymm6 vblendvps %ymm5, %ymm6, %ymm11, %ymm11 vblendvps %ymm4, %ymm3, %ymm14, %ymm6 vblendvps %ymm5, %ymm6, %ymm14, %ymm14 vblendvps %ymm4, %ymm9, %ymm3, %ymm6 vblendvps %ymm5, %ymm6, %ymm9, %ymm9 vblendvps %ymm4, %ymm3, %ymm2, %ymm4 vblendvps %ymm5, %ymm4, %ymm2, %ymm2 LBB6_247: ## %cif_mask_mixed3335 vmovdqu -32(%rsp), %ymm5 ## 32-byte Reload vandps LCPI6_3(%rip), %ymm1, %ymm4 vcmpnleps %ymm12, %ymm4, %ymm4 vblendvps %ymm4, %ymm0, %ymm3, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB6_249 ## BB#248: ## %safe_if_run_true3438 vxorps %ymm3, %ymm3, %ymm3 vcmpnleps %ymm3, %ymm1, %ymm1 vblendvps %ymm1, %ymm11, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm11, %ymm11 vblendvps %ymm1, %ymm14, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm14, %ymm14 vblendvps %ymm1, %ymm3, %ymm9, %ymm4 vblendvps %ymm0, %ymm4, %ymm9, %ymm9 vblendvps %ymm1, %ymm3, %ymm2, %ymm1 vblendvps %ymm0, %ymm1, %ymm2, %ymm2 LBB6_249: ## %cif_mask_mixed3492 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm11, %ymm1, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB6_267 ## BB#250: ## %loop.i.i6146.preheader movq -96(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%rbx,%rcx,4), %rsi xorl %ecx, %ecx testb $1, %al je LBB6_252 ## BB#251: ## %store.i.i6150 vmovd %xmm5, (%rsi) movl $1, %ecx LBB6_252: ## %loopend.i.i6155 testb $2, %al je LBB6_254 ## BB#253: ## %store.i.i6150.1 vpextrd $1, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_254: ## %loopend.i.i6155.1 vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload testb $4, %al je LBB6_256 ## BB#255: ## %store.i.i6150.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_256: ## %loopend.i.i6155.2 testb $8, %al je LBB6_258 ## BB#257: ## %store.i.i6150.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_258: ## %loopend.i.i6155.3 testb $16, %al je LBB6_260 ## BB#259: ## %store.i.i6150.4 movslq %ecx, %rcx vmovd %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_260: ## %loopend.i.i6155.4 testb $32, %al je LBB6_262 ## BB#261: ## %store.i.i6150.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_262: ## %loopend.i.i6155.5 testb $64, %al je LBB6_264 ## BB#263: ## %store.i.i6150.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_264: ## %loopend.i.i6155.6 testb %al, %al jns LBB6_385 ## BB#265: ## %store.i.i6150.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rsi,%rcx,4) jmp LBB6_384 LBB6_266: vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload jmp LBB6_305 LBB6_267: vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload jmp LBB6_389 LBB6_268: ## %cif_test_mixed1198 vxorps %ymm3, %ymm3, %ymm3 vcmpnleps %ymm3, %ymm4, %ymm4 vblendvps %ymm4, %ymm11, %ymm3, %ymm5 vblendvps %ymm0, %ymm5, %ymm11, %ymm11 vblendvps %ymm4, %ymm3, %ymm14, %ymm5 vblendvps %ymm0, %ymm5, %ymm14, %ymm14 vblendvps %ymm4, %ymm9, %ymm3, %ymm5 vblendvps %ymm0, %ymm5, %ymm9, %ymm9 vblendvps %ymm4, %ymm3, %ymm2, %ymm3 vblendvps %ymm0, %ymm3, %ymm2, %ymm2 LBB6_269: ## %cif_mask_all1309 vmovdqu -32(%rsp), %ymm5 ## 32-byte Reload vandps LCPI6_3(%rip), %ymm1, %ymm0 vcmpnleps %ymm12, %ymm0, %ymm0 vmovmskps %ymm0, %ecx testl %ecx, %ecx je LBB6_277 ## BB#270: ## %cif_mask_all1309 cmpl $255, %ecx jne LBB6_276 ## BB#271: ## %cif_test_all1318 vxorps %ymm0, %ymm0, %ymm0 vcmpnleps %ymm0, %ymm1, %ymm3 vandps %ymm11, %ymm3, %ymm11 vandps %ymm14, %ymm3, %ymm14 vcmpleps %ymm0, %ymm1, %ymm0 vandps %ymm9, %ymm0, %ymm9 vandps %ymm2, %ymm0, %ymm2 jmp LBB6_277 LBB6_272: ## %cif_test_mixed3223 vxorps %ymm3, %ymm3, %ymm3 vcmpnleps %ymm3, %ymm4, %ymm4 vblendvps %ymm4, %ymm11, %ymm3, %ymm5 vblendvps %ymm0, %ymm5, %ymm11, %ymm11 vblendvps %ymm4, %ymm3, %ymm14, %ymm5 vblendvps %ymm0, %ymm5, %ymm14, %ymm14 vblendvps %ymm4, %ymm9, %ymm3, %ymm5 vblendvps %ymm0, %ymm5, %ymm9, %ymm9 vblendvps %ymm4, %ymm3, %ymm2, %ymm3 vblendvps %ymm0, %ymm3, %ymm2, %ymm2 LBB6_273: ## %cif_mask_all3334 vmovdqu -32(%rsp), %ymm5 ## 32-byte Reload vandps LCPI6_3(%rip), %ymm1, %ymm0 vcmpnleps %ymm12, %ymm0, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB6_281 ## BB#274: ## %cif_mask_all3334 cmpl $255, %eax jne LBB6_280 ## BB#275: ## %cif_test_all3343 vxorps %ymm0, %ymm0, %ymm0 vcmpnleps %ymm0, %ymm1, %ymm3 vandps %ymm11, %ymm3, %ymm11 vandps %ymm14, %ymm3, %ymm14 vcmpleps %ymm0, %ymm1, %ymm0 vandps %ymm9, %ymm0, %ymm9 vandps %ymm2, %ymm0, %ymm2 jmp LBB6_281 LBB6_276: ## %cif_test_mixed1358 vxorps %ymm3, %ymm3, %ymm3 vcmpnleps %ymm3, %ymm1, %ymm1 vblendvps %ymm1, %ymm11, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm11, %ymm11 vblendvps %ymm1, %ymm14, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm14, %ymm14 vblendvps %ymm1, %ymm3, %ymm9, %ymm4 vblendvps %ymm0, %ymm4, %ymm9, %ymm9 vblendvps %ymm1, %ymm3, %ymm2, %ymm1 vblendvps %ymm0, %ymm1, %ymm2, %ymm2 LBB6_277: ## %cif_mask_all1466 vmovmskps %ymm11, %edx testl %edx, %edx je LBB6_284 ## BB#278: ## %cif_mask_all1466 cmpl $255, %edx vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload jne LBB6_285 ## BB#279: ## %all_on.i.i7308 movq -96(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx vextractf128 $1, %ymm5, 16(%r8,%rcx,4) vmovdqu %xmm5, (%r8,%rcx,4) movl $8, %ecx jmp LBB6_301 LBB6_280: ## %cif_test_mixed3383 vxorps %ymm3, %ymm3, %ymm3 vcmpnleps %ymm3, %ymm1, %ymm1 vblendvps %ymm1, %ymm11, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm11, %ymm11 vblendvps %ymm1, %ymm14, %ymm3, %ymm4 vblendvps %ymm0, %ymm4, %ymm14, %ymm14 vblendvps %ymm1, %ymm3, %ymm9, %ymm4 vblendvps %ymm0, %ymm4, %ymm9, %ymm9 vblendvps %ymm1, %ymm3, %ymm2, %ymm1 vblendvps %ymm0, %ymm1, %ymm2, %ymm2 LBB6_281: ## %cif_mask_all3491 vmovmskps %ymm11, %esi testl %esi, %esi je LBB6_367 ## BB#282: ## %cif_mask_all3491 cmpl $255, %esi vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload jne LBB6_368 ## BB#283: ## %all_on.i.i6197 movq -96(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm5, 16(%rbx,%rax,4) vmovdqu %xmm5, (%rbx,%rax,4) movl $8, %ecx jmp LBB6_385 LBB6_284: vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload jmp LBB6_302 LBB6_285: ## %cif_test_mixed1499 movq -96(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%r8,%rcx,4), %rsi xorl %ecx, %ecx testb $1, %dl je LBB6_287 ## BB#286: ## %store.i.i7288 vmovd %xmm5, (%rsi) movl $1, %ecx LBB6_287: ## %loopend.i.i7293 testb $2, %dl je LBB6_289 ## BB#288: ## %store.i.i7288.1 vpextrd $1, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_289: ## %loopend.i.i7293.1 testb $4, %dl je LBB6_291 ## BB#290: ## %store.i.i7288.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_291: ## %loopend.i.i7293.2 testb $8, %dl je LBB6_293 ## BB#292: ## %store.i.i7288.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_293: ## %loopend.i.i7293.3 testb $16, %dl je LBB6_295 ## BB#294: ## %store.i.i7288.4 movslq %ecx, %rcx vmovd %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_295: ## %loopend.i.i7293.4 testb $32, %dl je LBB6_297 ## BB#296: ## %store.i.i7288.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_297: ## %loopend.i.i7293.5 testb $64, %dl je LBB6_299 ## BB#298: ## %store.i.i7288.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_299: ## %loopend.i.i7293.6 testb %dl, %dl jns LBB6_301 ## BB#300: ## %store.i.i7288.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_301: ## %cif_done1468 movq -96(%rsp), %rdx ## 8-byte Reload addl %edx, %ecx movq %rcx, -96(%rsp) ## 8-byte Spill cmpl $255, %eax jne LBB6_305 LBB6_302: ## %cif_mask_all1575 vmovmskps %ymm14, %edx testl %edx, %edx je LBB6_323 ## BB#303: ## %cif_mask_all1575 cmpl $255, %edx jne LBB6_306 ## BB#304: ## %all_on.i.i7216 movq -104(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx vextractf128 $1, %ymm5, 16(%r8,%rcx,4) vmovdqu %xmm5, (%r8,%rcx,4) movl $8, %ecx jmp LBB6_322 LBB6_305: ## %cif_mask_mixed1576 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm14, %ymm1, %ymm0 vmovmskps %ymm0, %edx testl %edx, %edx je LBB6_326 LBB6_306: ## %cif_test_mixed1608 movq -104(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%r8,%rcx,4), %rsi xorl %ecx, %ecx testb $1, %dl je LBB6_308 ## BB#307: ## %store.i.i7196 vmovd %xmm5, (%rsi) movl $1, %ecx LBB6_308: ## %loopend.i.i7201 testb $2, %dl je LBB6_310 ## BB#309: ## %store.i.i7196.1 vpextrd $1, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_310: ## %loopend.i.i7201.1 testb $4, %dl je LBB6_312 ## BB#311: ## %store.i.i7196.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_312: ## %loopend.i.i7201.2 testb $8, %dl je LBB6_314 ## BB#313: ## %store.i.i7196.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_314: ## %loopend.i.i7201.3 testb $16, %dl je LBB6_316 ## BB#315: ## %store.i.i7196.4 movslq %ecx, %rcx vmovd %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_316: ## %loopend.i.i7201.4 testb $32, %dl je LBB6_318 ## BB#317: ## %store.i.i7196.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_318: ## %loopend.i.i7201.5 testb $64, %dl je LBB6_320 ## BB#319: ## %store.i.i7196.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_320: ## %loopend.i.i7201.6 testb %dl, %dl jns LBB6_322 ## BB#321: ## %store.i.i7196.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_322: ## %cif_done1577 movq -104(%rsp), %rdx ## 8-byte Reload addl %edx, %ecx movq %rcx, -104(%rsp) ## 8-byte Spill cmpl $255, %eax jne LBB6_326 LBB6_323: ## %cif_mask_all1684 vmovmskps %ymm9, %edx testl %edx, %edx je LBB6_345 ## BB#324: ## %cif_mask_all1684 movslq %r13d, %rcx cmpl $255, %edx jne LBB6_328 ## BB#325: ## %all_on.i.i7074 vextractf128 $1, %ymm5, 16(%r8,%rcx,4) vmovdqu %xmm5, (%r8,%rcx,4) movl $8, %ecx jmp LBB6_344 LBB6_326: ## %cif_mask_mixed1685 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm9, %ymm1, %ymm0 vmovmskps %ymm0, %edx testl %edx, %edx je LBB6_348 ## BB#327: ## %loop.i.i6998.preheader movslq %r13d, %rcx LBB6_328: ## %cif_test_mixed1717 leaq (%r8,%rcx,4), %rsi xorl %ecx, %ecx testb $1, %dl je LBB6_330 ## BB#329: ## %store.i.i7030 vmovd %xmm5, (%rsi) movl $1, %ecx LBB6_330: ## %loopend.i.i7035 testb $2, %dl je LBB6_332 ## BB#331: ## %store.i.i7030.1 vpextrd $1, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_332: ## %loopend.i.i7035.1 testb $4, %dl je LBB6_334 ## BB#333: ## %store.i.i7030.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_334: ## %loopend.i.i7035.2 testb $8, %dl je LBB6_336 ## BB#335: ## %store.i.i7030.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_336: ## %loopend.i.i7035.3 testb $16, %dl je LBB6_338 ## BB#337: ## %store.i.i7030.4 movslq %ecx, %rcx vmovd %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_338: ## %loopend.i.i7035.4 testb $32, %dl je LBB6_340 ## BB#339: ## %store.i.i7030.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_340: ## %loopend.i.i7035.5 testb $64, %dl je LBB6_342 ## BB#341: ## %store.i.i7030.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_342: ## %loopend.i.i7035.6 testb %dl, %dl jns LBB6_344 ## BB#343: ## %store.i.i7030.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_344: ## %cif_done1686 addl %r13d, %ecx movl %ecx, %r13d cmpl $255, %eax jne LBB6_348 LBB6_345: ## %cif_mask_all1793 vmovmskps %ymm2, %ecx testl %ecx, %ecx je LBB6_406 ## BB#346: ## %cif_mask_all1793 cmpl $255, %ecx jne LBB6_349 ## BB#347: ## %packed_store_active___un_3C_uni_3E_vyi.exit6925 movq -112(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rax vextractf128 $1, %ymm5, 16(%r8,%rax,4) vmovdqu %xmm5, (%r8,%rax,4) addl $8, %ecx movq %rcx, -112(%rsp) ## 8-byte Spill jmp LBB6_406 LBB6_348: ## %cif_mask_mixed1794 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm2, %ymm1, %ymm0 vmovmskps %ymm0, %ecx testl %ecx, %ecx je LBB6_406 LBB6_349: ## %loop.i.i6832.preheader movq -112(%rsp), %rax ## 8-byte Reload cltq leaq (%r8,%rax,4), %rdx xorl %eax, %eax testb $1, %cl je LBB6_351 ## BB#350: ## %store.i.i6836 vmovd %xmm5, (%rdx) movl $1, %eax LBB6_351: ## %loopend.i.i6841 testb $2, %cl je LBB6_353 ## BB#352: ## %store.i.i6836.1 vpextrd $1, %xmm5, (%rdx,%rax,4) incl %eax LBB6_353: ## %loopend.i.i6841.1 testb $4, %cl je LBB6_355 ## BB#354: ## %store.i.i6836.2 cltq vpextrd $2, %xmm5, (%rdx,%rax,4) incl %eax LBB6_355: ## %loopend.i.i6841.2 testb $8, %cl je LBB6_357 ## BB#356: ## %store.i.i6836.3 cltq vpextrd $3, %xmm5, (%rdx,%rax,4) incl %eax LBB6_357: ## %loopend.i.i6841.3 testb $16, %cl je LBB6_359 ## BB#358: ## %store.i.i6836.4 cltq vmovd %xmm3, (%rdx,%rax,4) incl %eax LBB6_359: ## %loopend.i.i6841.4 testb $32, %cl je LBB6_361 ## BB#360: ## %store.i.i6836.5 cltq vpextrd $1, %xmm3, (%rdx,%rax,4) incl %eax LBB6_361: ## %loopend.i.i6841.5 testb $64, %cl je LBB6_363 ## BB#362: ## %store.i.i6836.6 cltq vpextrd $2, %xmm3, (%rdx,%rax,4) incl %eax LBB6_363: ## %loopend.i.i6841.6 testb %cl, %cl jns LBB6_365 ## BB#364: ## %store.i.i6836.7 cltq vpextrd $3, %xmm3, (%rdx,%rax,4) incl %eax LBB6_365: ## %packed_store_active___un_3C_uni_3E_vyi.exit6843 movq -112(%rsp), %rcx ## 8-byte Reload addl %ecx, %eax movq %rax, -112(%rsp) ## 8-byte Spill LBB6_406: movq 320(%rsp), %rax LBB6_407: ## %foreach_reset movq -96(%rsp), %rcx ## 8-byte Reload movl %ecx, (%rax) movq -104(%rsp), %rcx ## 8-byte Reload subl %edi, %ecx movl %ecx, 4(%rax) subl %ebp, %r13d movl %r13d, 8(%rax) movq -112(%rsp), %rcx ## 8-byte Reload subl %ebx, %ecx jmp LBB6_504 LBB6_367: vmovdqa -64(%rsp), %xmm3 ## 16-byte Reload jmp LBB6_386 LBB6_368: ## %cif_test_mixed3524 movq -96(%rsp), %rax ## 8-byte Reload cltq leaq (%rbx,%rax,4), %rax xorl %ecx, %ecx testb $1, %sil je LBB6_370 ## BB#369: ## %store.i.i6177 vmovd %xmm5, (%rax) movl $1, %ecx LBB6_370: ## %loopend.i.i6182 testb $2, %sil je LBB6_372 ## BB#371: ## %store.i.i6177.1 vpextrd $1, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_372: ## %loopend.i.i6182.1 testb $4, %sil je LBB6_374 ## BB#373: ## %store.i.i6177.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_374: ## %loopend.i.i6182.2 testb $8, %sil je LBB6_376 ## BB#375: ## %store.i.i6177.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_376: ## %loopend.i.i6182.3 testb $16, %sil je LBB6_378 ## BB#377: ## %store.i.i6177.4 movslq %ecx, %rcx vmovd %xmm3, (%rax,%rcx,4) incl %ecx LBB6_378: ## %loopend.i.i6182.4 testb $32, %sil je LBB6_380 ## BB#379: ## %store.i.i6177.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_380: ## %loopend.i.i6182.5 testb $64, %sil je LBB6_382 ## BB#381: ## %store.i.i6177.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_382: ## %loopend.i.i6182.6 testb %sil, %sil jns LBB6_385 ## BB#383: ## %store.i.i6177.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rax,%rcx,4) LBB6_384: ## %cif_done3493 incl %ecx LBB6_385: ## %cif_done3493 movq -96(%rsp), %rax ## 8-byte Reload addl %eax, %ecx movq %rcx, -96(%rsp) ## 8-byte Spill cmpl $255, %edx jne LBB6_389 LBB6_386: ## %cif_mask_all3600 vmovmskps %ymm14, %esi testl %esi, %esi je LBB6_426 ## BB#387: ## %cif_mask_all3600 cmpl $255, %esi jne LBB6_408 ## BB#388: ## %all_on.i.i6109 movq -104(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm5, 16(%rbx,%rax,4) vmovdqu %xmm5, (%rbx,%rax,4) movl $8, %ecx jmp LBB6_425 LBB6_389: ## %cif_mask_mixed3601 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm14, %ymm1, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB6_429 ## BB#390: ## %loop.i.i6057.preheader movq -104(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%rbx,%rcx,4), %rsi xorl %ecx, %ecx testb $1, %al je LBB6_392 ## BB#391: ## %store.i.i6061 vmovd %xmm5, (%rsi) movl $1, %ecx LBB6_392: ## %loopend.i.i6066 testb $2, %al je LBB6_394 ## BB#393: ## %store.i.i6061.1 vpextrd $1, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_394: ## %loopend.i.i6066.1 testb $4, %al je LBB6_396 ## BB#395: ## %store.i.i6061.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_396: ## %loopend.i.i6066.2 testb $8, %al je LBB6_398 ## BB#397: ## %store.i.i6061.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_398: ## %loopend.i.i6066.3 testb $16, %al je LBB6_400 ## BB#399: ## %store.i.i6061.4 movslq %ecx, %rcx vmovd %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_400: ## %loopend.i.i6066.4 testb $32, %al je LBB6_402 ## BB#401: ## %store.i.i6061.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_402: ## %loopend.i.i6066.5 testb $64, %al je LBB6_404 ## BB#403: ## %store.i.i6061.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_404: ## %loopend.i.i6066.6 testb %al, %al jns LBB6_425 ## BB#405: ## %store.i.i6061.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rsi,%rcx,4) jmp LBB6_424 LBB6_408: ## %cif_test_mixed3633 movq -104(%rsp), %rax ## 8-byte Reload cltq leaq (%rbx,%rax,4), %rax xorl %ecx, %ecx testb $1, %sil je LBB6_410 ## BB#409: ## %store.i.i6089 vmovd %xmm5, (%rax) movl $1, %ecx LBB6_410: ## %loopend.i.i6094 testb $2, %sil je LBB6_412 ## BB#411: ## %store.i.i6089.1 vpextrd $1, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_412: ## %loopend.i.i6094.1 testb $4, %sil je LBB6_414 ## BB#413: ## %store.i.i6089.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_414: ## %loopend.i.i6094.2 testb $8, %sil je LBB6_416 ## BB#415: ## %store.i.i6089.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_416: ## %loopend.i.i6094.3 testb $16, %sil je LBB6_418 ## BB#417: ## %store.i.i6089.4 movslq %ecx, %rcx vmovd %xmm3, (%rax,%rcx,4) incl %ecx LBB6_418: ## %loopend.i.i6094.4 testb $32, %sil je LBB6_420 ## BB#419: ## %store.i.i6089.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_420: ## %loopend.i.i6094.5 testb $64, %sil je LBB6_422 ## BB#421: ## %store.i.i6089.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_422: ## %loopend.i.i6094.6 testb %sil, %sil jns LBB6_425 ## BB#423: ## %store.i.i6089.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rax,%rcx,4) LBB6_424: ## %cif_done3602 incl %ecx LBB6_425: ## %cif_done3602 movq -104(%rsp), %rax ## 8-byte Reload addl %eax, %ecx movq %rcx, -104(%rsp) ## 8-byte Spill cmpl $255, %edx jne LBB6_429 LBB6_426: ## %cif_mask_all3709 vmovmskps %ymm9, %esi testl %esi, %esi je LBB6_464 ## BB#427: ## %cif_mask_all3709 movslq %r15d, %rax cmpl $255, %esi jne LBB6_446 ## BB#428: ## %all_on.i.i6017 vextractf128 $1, %ymm5, 16(%rbx,%rax,4) vmovdqu %xmm5, (%rbx,%rax,4) movl $8, %ecx jmp LBB6_463 LBB6_429: ## %cif_mask_mixed3710 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm9, %ymm1, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB6_467 ## BB#430: ## %loop.i.i5968.preheader movslq %r15d, %rcx leaq (%rbx,%rcx,4), %rsi xorl %ecx, %ecx testb $1, %al je LBB6_432 ## BB#431: ## %store.i.i5972 vmovd %xmm5, (%rsi) movl $1, %ecx LBB6_432: ## %loopend.i.i5977 testb $2, %al je LBB6_434 ## BB#433: ## %store.i.i5972.1 vpextrd $1, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_434: ## %loopend.i.i5977.1 testb $4, %al je LBB6_436 ## BB#435: ## %store.i.i5972.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_436: ## %loopend.i.i5977.2 testb $8, %al je LBB6_438 ## BB#437: ## %store.i.i5972.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rsi,%rcx,4) incl %ecx LBB6_438: ## %loopend.i.i5977.3 testb $16, %al je LBB6_440 ## BB#439: ## %store.i.i5972.4 movslq %ecx, %rcx vmovd %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_440: ## %loopend.i.i5977.4 testb $32, %al je LBB6_442 ## BB#441: ## %store.i.i5972.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_442: ## %loopend.i.i5977.5 testb $64, %al je LBB6_444 ## BB#443: ## %store.i.i5972.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rsi,%rcx,4) incl %ecx LBB6_444: ## %loopend.i.i5977.6 testb %al, %al jns LBB6_463 ## BB#445: ## %store.i.i5972.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rsi,%rcx,4) jmp LBB6_462 LBB6_446: ## %cif_test_mixed3742 leaq (%rbx,%rax,4), %rax xorl %ecx, %ecx testb $1, %sil je LBB6_448 ## BB#447: ## %store.i.i5997 vmovd %xmm5, (%rax) movl $1, %ecx LBB6_448: ## %loopend.i.i6002 testb $2, %sil je LBB6_450 ## BB#449: ## %store.i.i5997.1 vpextrd $1, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_450: ## %loopend.i.i6002.1 testb $4, %sil je LBB6_452 ## BB#451: ## %store.i.i5997.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_452: ## %loopend.i.i6002.2 testb $8, %sil je LBB6_454 ## BB#453: ## %store.i.i5997.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_454: ## %loopend.i.i6002.3 testb $16, %sil je LBB6_456 ## BB#455: ## %store.i.i5997.4 movslq %ecx, %rcx vmovd %xmm3, (%rax,%rcx,4) incl %ecx LBB6_456: ## %loopend.i.i6002.4 testb $32, %sil je LBB6_458 ## BB#457: ## %store.i.i5997.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_458: ## %loopend.i.i6002.5 testb $64, %sil je LBB6_460 ## BB#459: ## %store.i.i5997.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_460: ## %loopend.i.i6002.6 testb %sil, %sil jns LBB6_463 ## BB#461: ## %store.i.i5997.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rax,%rcx,4) LBB6_462: ## %cif_done3711 incl %ecx LBB6_463: ## %cif_done3711 addl %r15d, %ecx movl %ecx, %r15d cmpl $255, %edx jne LBB6_467 LBB6_464: ## %cif_mask_all3818 vmovmskps %ymm2, %edx testl %edx, %edx je LBB6_484 ## BB#465: ## %cif_mask_all3818 cmpl $255, %edx jne LBB6_485 ## BB#466: ## %packed_store_active___un_3C_uni_3E_vyi.exit5945 movq -112(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rax vextractf128 $1, %ymm5, 16(%rbx,%rax,4) vmovdqu %xmm5, (%rbx,%rax,4) addl $8, %ecx movq %rcx, -112(%rsp) ## 8-byte Spill jmp LBB6_484 LBB6_467: ## %cif_mask_mixed3819 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm2, %ymm1, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB6_484 ## BB#468: ## %loop.i.i.preheader movq -112(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rcx leaq (%rbx,%rcx,4), %rdx xorl %ecx, %ecx testb $1, %al je LBB6_470 ## BB#469: ## %store.i.i vmovd %xmm5, (%rdx) movl $1, %ecx LBB6_470: ## %loopend.i.i testb $2, %al je LBB6_472 ## BB#471: ## %store.i.i.1 vpextrd $1, %xmm5, (%rdx,%rcx,4) incl %ecx LBB6_472: ## %loopend.i.i.1 testb $4, %al je LBB6_474 ## BB#473: ## %store.i.i.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rdx,%rcx,4) incl %ecx LBB6_474: ## %loopend.i.i.2 testb $8, %al je LBB6_476 ## BB#475: ## %store.i.i.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rdx,%rcx,4) incl %ecx LBB6_476: ## %loopend.i.i.3 testb $16, %al je LBB6_478 ## BB#477: ## %store.i.i.4 movslq %ecx, %rcx vmovd %xmm3, (%rdx,%rcx,4) incl %ecx LBB6_478: ## %loopend.i.i.4 testb $32, %al je LBB6_480 ## BB#479: ## %store.i.i.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rdx,%rcx,4) incl %ecx LBB6_480: ## %loopend.i.i.5 testb $64, %al je LBB6_482 ## BB#481: ## %store.i.i.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rdx,%rcx,4) incl %ecx LBB6_482: ## %loopend.i.i.6 testb %al, %al jns LBB6_502 ## BB#483: ## %store.i.i.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rdx,%rcx,4) jmp LBB6_501 LBB6_484: movq 320(%rsp), %rax movq %r15, %r14 jmp LBB6_503 LBB6_485: ## %cif_test_mixed3851 movq -112(%rsp), %rax ## 8-byte Reload cltq leaq (%rbx,%rax,4), %rax xorl %ecx, %ecx testb $1, %dl je LBB6_487 ## BB#486: ## %store.i.i5903 vmovd %xmm5, (%rax) movl $1, %ecx LBB6_487: ## %loopend.i.i5908 testb $2, %dl je LBB6_489 ## BB#488: ## %store.i.i5903.1 vpextrd $1, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_489: ## %loopend.i.i5908.1 testb $4, %dl je LBB6_491 ## BB#490: ## %store.i.i5903.2 movslq %ecx, %rcx vpextrd $2, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_491: ## %loopend.i.i5908.2 testb $8, %dl je LBB6_493 ## BB#492: ## %store.i.i5903.3 movslq %ecx, %rcx vpextrd $3, %xmm5, (%rax,%rcx,4) incl %ecx LBB6_493: ## %loopend.i.i5908.3 testb $16, %dl je LBB6_495 ## BB#494: ## %store.i.i5903.4 movslq %ecx, %rcx vmovd %xmm3, (%rax,%rcx,4) incl %ecx LBB6_495: ## %loopend.i.i5908.4 testb $32, %dl je LBB6_497 ## BB#496: ## %store.i.i5903.5 movslq %ecx, %rcx vpextrd $1, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_497: ## %loopend.i.i5908.5 testb $64, %dl je LBB6_499 ## BB#498: ## %store.i.i5903.6 movslq %ecx, %rcx vpextrd $2, %xmm3, (%rax,%rcx,4) incl %ecx LBB6_499: ## %loopend.i.i5908.6 testb %dl, %dl jns LBB6_502 ## BB#500: ## %store.i.i5903.7 movslq %ecx, %rcx vpextrd $3, %xmm3, (%rax,%rcx,4) LBB6_501: incl %ecx LBB6_502: movq 320(%rsp), %rax movq %r15, %r14 movq -112(%rsp), %rsi ## 8-byte Reload addl %esi, %ecx movq %rcx, -112(%rsp) ## 8-byte Spill LBB6_503: ## %foreach_reset2007 movq -96(%rsp), %rcx ## 8-byte Reload movl %ecx, (%rax) movq -104(%rsp), %rcx ## 8-byte Reload subl %edi, %ecx movl %ecx, 4(%rax) subl %ebp, %r14d movl %r14d, 8(%rax) movq -112(%rsp), %rcx ## 8-byte Reload subl %r8d, %ecx LBB6_504: ## %foreach_reset movl %ecx, 12(%rax) addq $200, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .section __TEXT,__literal4,4byte_literals .p2align 2 LCPI7_0: .long 1056964608 ## float 0.5 LCPI7_1: .long 1077936128 ## float 3 .section __TEXT,__literal16,16byte_literals .p2align 4 LCPI7_2: .long 0 ## 0x0 .long 1 ## 0x1 .long 2 ## 0x2 .long 3 ## 0x3 LCPI7_3: .long 4 ## 0x4 .long 5 ## 0x5 .long 6 ## 0x6 .long 7 ## 0x7 LCPI7_4: .byte 0 ## 0x0 .byte 1 ## 0x1 .byte 4 ## 0x4 .byte 5 ## 0x5 .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 12 ## 0xc .byte 13 ## 0xd .byte 14 ## 0xe .byte 15 ## 0xf .section __TEXT,__text,regular,pure_instructions .globl _IntersectLightsWithTileMinMax .p2align 4, 0x90 _IntersectLightsWithTileMinMax: ## @IntersectLightsWithTileMinMax ## BB#0: ## %all_on pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $264, %rsp ## imm = 0x108 vmovaps %xmm1, 16(%rsp) ## 16-byte Spill vmovaps %xmm0, (%rsp) ## 16-byte Spill movq 360(%rsp), %r10 movq 352(%rsp), %r15 movq 344(%rsp), %r13 movq 336(%rsp), %r11 movq 328(%rsp), %r14 movl 320(%rsp), %r12d vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %r8d, %xmm0, %xmm0 vmovss LCPI7_0(%rip), %xmm9 ## xmm9 = mem[0],zero,zero,zero vmulss %xmm9, %xmm0, %xmm5 vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %r9d, %xmm0, %xmm0 vmulss %xmm9, %xmm0, %xmm6 vmulss %xmm2, %xmm5, %xmm4 vxorps %xmm0, %xmm0, %xmm0 vsubss %xmm4, %xmm0, %xmm7 vmulss %xmm3, %xmm6, %xmm2 vsubss %xmm2, %xmm0, %xmm8 vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %esi, %xmm0, %xmm3 vsubss %xmm5, %xmm3, %xmm0 negl %edi vcvtsi2ssl %edi, %xmm0, %xmm3 vaddss %xmm5, %xmm3, %xmm1 vcvtsi2ssl %ecx, %xmm0, %xmm3 vsubss %xmm6, %xmm3, %xmm5 negl %edx vcvtsi2ssl %edx, %xmm0, %xmm3 vaddss %xmm6, %xmm3, %xmm10 vmulss %xmm7, %xmm7, %xmm6 vmulss %xmm0, %xmm0, %xmm3 vaddss %xmm3, %xmm6, %xmm3 vrsqrtss %xmm3, %xmm0, %xmm6 vmulss %xmm3, %xmm6, %xmm3 vmulss %xmm3, %xmm6, %xmm3 vmovss LCPI7_1(%rip), %xmm11 ## xmm11 = mem[0],zero,zero,zero vsubss %xmm3, %xmm11, %xmm3 vmulss %xmm3, %xmm6, %xmm3 vmulss %xmm9, %xmm3, %xmm3 vmulss %xmm3, %xmm7, %xmm6 vmovaps %xmm6, -112(%rsp) ## 16-byte Spill vmulss %xmm3, %xmm0, %xmm0 vmovaps %xmm0, -128(%rsp) ## 16-byte Spill vmulss %xmm4, %xmm4, %xmm0 vmulss %xmm1, %xmm1, %xmm3 vaddss %xmm3, %xmm0, %xmm0 vrsqrtss %xmm0, %xmm0, %xmm3 vmulss %xmm0, %xmm3, %xmm0 vmulss %xmm0, %xmm3, %xmm0 vsubss %xmm0, %xmm11, %xmm0 vmulss %xmm0, %xmm3, %xmm0 vmulss %xmm9, %xmm0, %xmm0 vmulss %xmm0, %xmm4, %xmm3 vmovaps %xmm3, -80(%rsp) ## 16-byte Spill vmulss %xmm0, %xmm1, %xmm0 vmovaps %xmm0, -64(%rsp) ## 16-byte Spill vmulss %xmm2, %xmm2, %xmm0 vmulss %xmm5, %xmm5, %xmm1 vaddss %xmm1, %xmm0, %xmm0 vrsqrtss %xmm0, %xmm0, %xmm1 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vsubss %xmm0, %xmm11, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm9, %xmm0, %xmm0 vmulss %xmm0, %xmm2, %xmm1 vmovaps %xmm1, -48(%rsp) ## 16-byte Spill vmulss %xmm0, %xmm5, %xmm0 vmovaps %xmm0, -96(%rsp) ## 16-byte Spill vmulss %xmm8, %xmm8, %xmm0 vmulss %xmm10, %xmm10, %xmm1 vaddss %xmm1, %xmm0, %xmm0 vrsqrtss %xmm0, %xmm0, %xmm1 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vsubss %xmm0, %xmm11, %xmm0 vmulss %xmm0, %xmm1, %xmm0 vmulss %xmm9, %xmm0, %xmm0 vmulss %xmm0, %xmm8, %xmm1 vmovaps %xmm1, -16(%rsp) ## 16-byte Spill vmulss %xmm0, %xmm10, %xmm0 vmovaps %xmm0, -32(%rsp) ## 16-byte Spill movl %r12d, %edx sarl $31, %edx shrl $29, %edx addl %r12d, %edx andl $-8, %edx xorl %eax, %eax movl $0, %ecx testl %edx, %edx jle LBB7_22 ## BB#1: ## %foreach_full_body.lr.ph vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm10 vpermilps $0, 16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm11 vpermilps $0, -128(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 224(%rsp) ## 32-byte Spill vpermilps $0, -112(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 192(%rsp) ## 32-byte Spill vpermilps $0, -64(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 160(%rsp) ## 32-byte Spill vpermilps $0, -80(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 128(%rsp) ## 32-byte Spill vpermilps $0, -96(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 96(%rsp) ## 32-byte Spill vpermilps $0, -48(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vpermilps $0, -32(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 32(%rsp) ## 32-byte Spill vpermilps $0, -16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm9 xorl %esi, %esi vxorps %ymm8, %ymm8, %ymm8 vmovdqa LCPI7_4(%rip), %xmm3 ## xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] xorl %eax, %eax xorl %ecx, %ecx .p2align 4, 0x90 LBB7_2: ## %foreach_full_body ## =>This Inner Loop Header: Depth=1 vmovd %ecx, %xmm0 vpshufd $0, %xmm0, %xmm7 ## xmm7 = xmm0[0,0,0,0] vpaddd LCPI7_2(%rip), %xmm7, %xmm12 movslq %esi, %rdi vmovups (%r13,%rdi), %xmm0 vinsertf128 $1, 16(%r13,%rdi), %ymm0, %ymm0 vmovups (%r15,%rdi), %xmm1 vinsertf128 $1, 16(%r15,%rdi), %ymm1, %ymm1 vsubps %ymm1, %ymm8, %ymm6 vsubps %ymm10, %ymm0, %ymm1 vcmpnltps %ymm6, %ymm1, %ymm1 vsubps %ymm0, %ymm11, %ymm13 vcmpnltps %ymm6, %ymm13, %ymm13 vandps %ymm1, %ymm13, %ymm1 vmovmskps %ymm1, %ebp testl %ebp, %ebp je LBB7_29 ## BB#3: ## %if_then ## in Loop: Header=BB7_2 Depth=1 vpaddd LCPI7_3(%rip), %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm12, %ymm7 vmovups (%r14,%rdi), %xmm12 vinsertf128 $1, 16(%r14,%rdi), %ymm12, %ymm12 vmovups (%r11,%rdi), %xmm13 vinsertf128 $1, 16(%r11,%rdi), %ymm13, %ymm13 vmulps 224(%rsp), %ymm0, %ymm14 ## 32-byte Folded Reload vmulps 192(%rsp), %ymm12, %ymm15 ## 32-byte Folded Reload vaddps %ymm15, %ymm14, %ymm14 vcmpnltps %ymm6, %ymm14, %ymm14 vextractf128 $1, %ymm14, %xmm4 vpshufb %xmm3, %xmm4, %xmm4 vpshufb %xmm3, %xmm14, %xmm5 vpunpcklqdq %xmm4, %xmm5, %xmm4 ## xmm4 = xmm5[0],xmm4[0] vmulps 160(%rsp), %ymm0, %ymm5 ## 32-byte Folded Reload vmulps 128(%rsp), %ymm12, %ymm12 ## 32-byte Folded Reload vaddps %ymm12, %ymm5, %ymm5 vcmpnltps %ymm6, %ymm5, %ymm5 vextractf128 $1, %ymm5, %xmm2 vpshufb %xmm3, %xmm2, %xmm2 vpshufb %xmm3, %xmm5, %xmm5 vpunpcklqdq %xmm2, %xmm5, %xmm2 ## xmm2 = xmm5[0],xmm2[0] vpand %xmm4, %xmm2, %xmm2 vmulps 96(%rsp), %ymm0, %ymm4 ## 32-byte Folded Reload vmulps 64(%rsp), %ymm13, %ymm5 ## 32-byte Folded Reload vaddps %ymm5, %ymm4, %ymm4 vcmpnltps %ymm6, %ymm4, %ymm4 vextractf128 $1, %ymm4, %xmm5 vpshufb %xmm3, %xmm5, %xmm5 vpshufb %xmm3, %xmm4, %xmm4 vpunpcklqdq %xmm5, %xmm4, %xmm4 ## xmm4 = xmm4[0],xmm5[0] vmulps 32(%rsp), %ymm0, %ymm0 ## 32-byte Folded Reload vmulps %ymm9, %ymm13, %ymm5 vaddps %ymm5, %ymm0, %ymm0 vcmpnltps %ymm6, %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm5 vpshufb %xmm3, %xmm5, %xmm5 vpshufb %xmm3, %xmm0, %xmm0 vpunpcklqdq %xmm5, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm5[0] vpand %xmm0, %xmm4, %xmm0 vpand %xmm0, %xmm2, %xmm0 vpmovzxwd %xmm0, %xmm2 ## xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vpslld $31, %xmm2, %xmm2 vpunpckhwd %xmm0, %xmm0, %xmm0 ## xmm0 = xmm0[4,4,5,5,6,6,7,7] vpslld $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm2, %ymm0 vblendvps %ymm0, %ymm1, %ymm8, %ymm0 vmovmskps %ymm0, %ebp cmpl $255, %ebp je LBB7_28 ## BB#4: ## %if_then ## in Loop: Header=BB7_2 Depth=1 testl %ebp, %ebp je LBB7_29 ## BB#5: ## %cif_test_mixed ## in Loop: Header=BB7_2 Depth=1 movslq %eax, %rdi leaq (%r10,%rdi,4), %rbx xorl %edi, %edi testb $1, %bpl je LBB7_7 ## BB#6: ## %store.i.i1853 ## in Loop: Header=BB7_2 Depth=1 movl %ecx, (%rbx) movl $1, %edi LBB7_7: ## %loopend.i.i1858 ## in Loop: Header=BB7_2 Depth=1 testb $2, %bpl je LBB7_9 ## BB#8: ## %store.i.i1853.1 ## in Loop: Header=BB7_2 Depth=1 vpextrd $1, %xmm7, (%rbx,%rdi,4) incl %edi LBB7_9: ## %loopend.i.i1858.1 ## in Loop: Header=BB7_2 Depth=1 testb $4, %bpl je LBB7_11 ## BB#10: ## %store.i.i1853.2 ## in Loop: Header=BB7_2 Depth=1 movslq %edi, %rdi vpextrd $2, %xmm7, (%rbx,%rdi,4) incl %edi LBB7_11: ## %loopend.i.i1858.2 ## in Loop: Header=BB7_2 Depth=1 testb $8, %bpl je LBB7_13 ## BB#12: ## %store.i.i1853.3 ## in Loop: Header=BB7_2 Depth=1 movslq %edi, %rdi vpextrd $3, %xmm7, (%rbx,%rdi,4) incl %edi LBB7_13: ## %loopend.i.i1858.3 ## in Loop: Header=BB7_2 Depth=1 testb $16, %bpl je LBB7_15 ## BB#14: ## %store.i.i1853.4 ## in Loop: Header=BB7_2 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vmovd %xmm0, (%rbx,%rdi,4) incl %edi LBB7_15: ## %loopend.i.i1858.4 ## in Loop: Header=BB7_2 Depth=1 testb $32, %bpl je LBB7_17 ## BB#16: ## %store.i.i1853.5 ## in Loop: Header=BB7_2 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vpextrd $1, %xmm0, (%rbx,%rdi,4) incl %edi LBB7_17: ## %loopend.i.i1858.5 ## in Loop: Header=BB7_2 Depth=1 testb $64, %bpl je LBB7_19 ## BB#18: ## %store.i.i1853.6 ## in Loop: Header=BB7_2 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vpextrd $2, %xmm0, (%rbx,%rdi,4) incl %edi LBB7_19: ## %loopend.i.i1858.6 ## in Loop: Header=BB7_2 Depth=1 testb %bpl, %bpl jns LBB7_21 ## BB#20: ## %store.i.i1853.7 ## in Loop: Header=BB7_2 Depth=1 vextractf128 $1, %ymm7, %xmm0 movslq %edi, %rdi vpextrd $3, %xmm0, (%rbx,%rdi,4) incl %edi LBB7_21: ## %loopend.i.i1858.7 ## in Loop: Header=BB7_2 Depth=1 addl %eax, %edi movl %edi, %eax jmp LBB7_29 .p2align 4, 0x90 LBB7_28: ## %packed_store_active___un_3C_uni_3E_vyi.exit1817 ## in Loop: Header=BB7_2 Depth=1 cltq vextractf128 $1, %ymm7, 16(%r10,%rax,4) vmovups %xmm7, (%r10,%rax,4) addl $8, %eax LBB7_29: ## %if_exit ## in Loop: Header=BB7_2 Depth=1 addl $8, %ecx addl $32, %esi cmpl %edx, %ecx jl LBB7_2 LBB7_22: ## %partial_inner_all_outer cmpl %r12d, %ecx jge LBB7_65 ## BB#23: ## %partial_inner_only vmovd %ecx, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI7_2(%rip), %xmm0, %xmm11 vpaddd LCPI7_3(%rip), %xmm0, %xmm12 vmovd %r12d, %xmm0 vpshufd $0, %xmm0, %xmm14 ## xmm14 = xmm0[0,0,0,0] vpcmpgtd %xmm12, %xmm14, %xmm0 vpcmpgtd %xmm11, %xmm14, %xmm4 vinsertf128 $1, %xmm0, %ymm4, %ymm4 leal (,%rcx,4), %edx movslq %edx, %rdx vmaskmovps (%r13,%rdx), %ymm4, %ymm7 vmaskmovps (%r15,%rdx), %ymm4, %ymm0 vxorps %ymm8, %ymm8, %ymm8 vsubps %ymm0, %ymm8, %ymm6 vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vsubps %ymm0, %ymm7, %ymm0 vcmpnltps %ymm6, %ymm0, %ymm0 vpermilps $0, 16(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vsubps %ymm7, %ymm5, %ymm5 vcmpnltps %ymm6, %ymm5, %ymm5 vandps %ymm0, %ymm5, %ymm15 vblendvps %ymm4, %ymm15, %ymm8, %ymm5 vmovmskps %ymm5, %esi testl %esi, %esi je LBB7_65 ## BB#24: ## %if_then277 vmaskmovps (%r14,%rdx), %ymm4, %ymm9 vpermilps $0, -128(%rsp), %xmm5 ## 16-byte Folded Reload ## xmm5 = mem[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmulps %ymm5, %ymm7, %ymm5 vpermilps $0, -112(%rsp), %xmm10 ## 16-byte Folded Reload ## xmm10 = mem[0,0,0,0] vinsertf128 $1, %xmm10, %ymm10, %ymm10 vmulps %ymm10, %ymm9, %ymm10 vaddps %ymm10, %ymm5, %ymm5 vcmpnltps %ymm6, %ymm5, %ymm10 vextractf128 $1, %ymm10, %xmm2 vmovdqa LCPI7_4(%rip), %xmm5 ## xmm5 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm5, %xmm2, %xmm2 vpshufb %xmm5, %xmm10, %xmm3 vpunpcklqdq %xmm2, %xmm3, %xmm10 ## xmm10 = xmm3[0],xmm2[0] vpermilps $0, -80(%rsp), %xmm3 ## 16-byte Folded Reload ## xmm3 = mem[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmulps %ymm3, %ymm9, %ymm3 vpermilps $0, -64(%rsp), %xmm9 ## 16-byte Folded Reload ## xmm9 = mem[0,0,0,0] vinsertf128 $1, %xmm9, %ymm9, %ymm9 vmulps %ymm9, %ymm7, %ymm9 vaddps %ymm3, %ymm9, %ymm3 vcmpnltps %ymm6, %ymm3, %ymm3 vextractf128 $1, %ymm3, %xmm2 vpshufb %xmm5, %xmm2, %xmm2 vpshufb %xmm5, %xmm3, %xmm3 vpunpcklqdq %xmm2, %xmm3, %xmm2 ## xmm2 = xmm3[0],xmm2[0] vmaskmovps (%r11,%rdx), %ymm4, %ymm3 vpermilps $0, -96(%rsp), %xmm9 ## 16-byte Folded Reload ## xmm9 = mem[0,0,0,0] vinsertf128 $1, %xmm9, %ymm9, %ymm9 vmulps %ymm9, %ymm7, %ymm9 vpermilps $0, -48(%rsp), %xmm13 ## 16-byte Folded Reload ## xmm13 = mem[0,0,0,0] vinsertf128 $1, %xmm13, %ymm13, %ymm13 vmulps %ymm13, %ymm3, %ymm13 vaddps %ymm13, %ymm9, %ymm9 vcmpnltps %ymm6, %ymm9, %ymm9 vextractf128 $1, %ymm9, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm9, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] vpand %xmm0, %xmm2, %xmm0 vpand %xmm0, %xmm10, %xmm0 vpermilps $0, -32(%rsp), %xmm1 ## 16-byte Folded Reload ## xmm1 = mem[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vmulps %ymm1, %ymm7, %ymm1 vpermilps $0, -16(%rsp), %xmm2 ## 16-byte Folded Reload ## xmm2 = mem[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vmulps %ymm2, %ymm3, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vcmpnltps %ymm6, %ymm1, %ymm1 vextractf128 $1, %ymm1, %xmm2 vpshufb %xmm5, %xmm2, %xmm2 vpshufb %xmm5, %xmm1, %xmm1 vpunpcklqdq %xmm2, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm2[0] vpand %xmm1, %xmm0, %xmm0 vpmovzxwd %xmm0, %xmm1 ## xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vpslld $31, %xmm1, %xmm1 vpunpckhwd %xmm0, %xmm0, %xmm0 ## xmm0 = xmm0[4,4,5,5,6,6,7,7] vpslld $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vblendvps %ymm0, %ymm15, %ymm8, %ymm5 vinsertf128 $1, %xmm12, %ymm11, %ymm0 vmovmskps %ymm4, %edx cmpl $255, %edx jne LBB7_30 ## BB#25: ## %cif_mask_all336 vmovmskps %ymm5, %esi testl %esi, %esi je LBB7_65 ## BB#26: ## %cif_mask_all336 cmpl $255, %esi jne LBB7_47 ## BB#27: ## %packed_store_active___un_3C_uni_3E_vyi.exit1793 movslq %eax, %rcx vextractf128 $1, %ymm0, 16(%r10,%rcx,4) vmovups %xmm0, (%r10,%rcx,4) addl $8, %eax jmp LBB7_65 LBB7_30: ## %cif_mask_mixed337 vinsertf128 $1, %xmm14, %ymm14, %ymm2 vextractf128 $1, %ymm0, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpcmpgtd %xmm1, %xmm3, %xmm3 vpcmpgtd %xmm0, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vxorps %ymm3, %ymm3, %ymm3 vblendvps %ymm2, %ymm5, %ymm3, %ymm2 vmovmskps %ymm2, %esi testl %esi, %esi je LBB7_65 ## BB#31: ## %loop.i.i1723.preheader movslq %eax, %rdx leaq (%r10,%rdx,4), %rdi xorl %edx, %edx testb $1, %sil je LBB7_33 ## BB#32: ## %store.i.i1727 movl %ecx, (%rdi) movl $1, %edx LBB7_33: ## %loopend.i.i1732 testb $2, %sil je LBB7_35 ## BB#34: ## %store.i.i1727.1 vpextrd $1, %xmm0, (%rdi,%rdx,4) incl %edx LBB7_35: ## %loopend.i.i1732.1 testb $4, %sil je LBB7_37 ## BB#36: ## %store.i.i1727.2 movslq %edx, %rdx vpextrd $2, %xmm0, (%rdi,%rdx,4) incl %edx LBB7_37: ## %loopend.i.i1732.2 testb $8, %sil je LBB7_39 ## BB#38: ## %store.i.i1727.3 movslq %edx, %rdx vpextrd $3, %xmm0, (%rdi,%rdx,4) incl %edx LBB7_39: ## %loopend.i.i1732.3 testb $16, %sil je LBB7_41 ## BB#40: ## %store.i.i1727.4 movslq %edx, %rdx vmovd %xmm1, (%rdi,%rdx,4) incl %edx LBB7_41: ## %loopend.i.i1732.4 testb $32, %sil je LBB7_43 ## BB#42: ## %store.i.i1727.5 movslq %edx, %rdx vpextrd $1, %xmm1, (%rdi,%rdx,4) incl %edx LBB7_43: ## %loopend.i.i1732.5 testb $64, %sil je LBB7_45 ## BB#44: ## %store.i.i1727.6 movslq %edx, %rdx vpextrd $2, %xmm1, (%rdi,%rdx,4) incl %edx LBB7_45: ## %loopend.i.i1732.6 testb %sil, %sil jns LBB7_64 ## BB#46: ## %store.i.i1727.7 movslq %edx, %rdx vpextrd $3, %xmm1, (%rdi,%rdx,4) jmp LBB7_63 LBB7_47: ## %cif_test_mixed368 movslq %eax, %rdx leaq (%r10,%rdx,4), %rdi xorl %edx, %edx testb $1, %sil je LBB7_49 ## BB#48: ## %store.i.i1756 movl %ecx, (%rdi) movl $1, %edx LBB7_49: ## %loopend.i.i1761 testb $2, %sil je LBB7_51 ## BB#50: ## %store.i.i1756.1 vpextrd $1, %xmm0, (%rdi,%rdx,4) incl %edx LBB7_51: ## %loopend.i.i1761.1 testb $4, %sil je LBB7_53 ## BB#52: ## %store.i.i1756.2 movslq %edx, %rdx vpextrd $2, %xmm0, (%rdi,%rdx,4) incl %edx LBB7_53: ## %loopend.i.i1761.2 testb $8, %sil je LBB7_55 ## BB#54: ## %store.i.i1756.3 movslq %edx, %rdx vpextrd $3, %xmm0, (%rdi,%rdx,4) incl %edx LBB7_55: ## %loopend.i.i1761.3 testb $16, %sil je LBB7_57 ## BB#56: ## %store.i.i1756.4 vextractf128 $1, %ymm0, %xmm1 movslq %edx, %rdx vmovd %xmm1, (%rdi,%rdx,4) incl %edx LBB7_57: ## %loopend.i.i1761.4 testb $32, %sil je LBB7_59 ## BB#58: ## %store.i.i1756.5 vextractf128 $1, %ymm0, %xmm1 movslq %edx, %rdx vpextrd $1, %xmm1, (%rdi,%rdx,4) incl %edx LBB7_59: ## %loopend.i.i1761.5 testb $64, %sil je LBB7_61 ## BB#60: ## %store.i.i1756.6 vextractf128 $1, %ymm0, %xmm1 movslq %edx, %rdx vpextrd $2, %xmm1, (%rdi,%rdx,4) incl %edx LBB7_61: ## %loopend.i.i1761.6 testb %sil, %sil jns LBB7_64 ## BB#62: ## %store.i.i1756.7 vextractf128 $1, %ymm0, %xmm0 movslq %edx, %rdx vpextrd $3, %xmm0, (%rdi,%rdx,4) LBB7_63: ## %packed_store_active___un_3C_uni_3E_vyi.exit1734 incl %edx LBB7_64: ## %packed_store_active___un_3C_uni_3E_vyi.exit1734 addl %eax, %edx movl %edx, %eax LBB7_65: ## %foreach_reset addq $264, %rsp ## imm = 0x108 popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .section __TEXT,__literal4,4byte_literals .p2align 2 LCPI8_0: .long 1073741824 ## float 2 LCPI8_1: .long 1056964608 ## float 0.5 LCPI8_2: .long 3212836864 ## float -1 LCPI8_54: .long 0 ## float 0 .section __TEXT,__literal16,16byte_literals .p2align 4 LCPI8_3: .long 0 ## 0x0 .long 1 ## 0x1 .long 2 ## 0x2 .long 3 ## 0x3 LCPI8_4: .long 4 ## 0x4 .long 5 ## 0x5 .long 6 ## 0x6 .long 7 ## 0x7 LCPI8_10: .long 939524096 ## 0x38000000 .long 939524096 ## 0x38000000 .long 939524096 ## 0x38000000 .long 939524096 ## 0x38000000 LCPI8_12: .long 947912704 ## 0x38800000 .long 947912704 ## 0x38800000 .long 947912704 ## 0x38800000 .long 947912704 ## 0x38800000 LCPI8_15: .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 LCPI8_23: .long 4294967170 ## 0xffffff82 .long 4294967170 ## 0xffffff82 .long 4294967170 ## 0xffffff82 .long 4294967170 ## 0xffffff82 LCPI8_46: .long 127 ## 0x7f .long 127 ## 0x7f .long 127 ## 0x7f .long 127 ## 0x7f LCPI8_47: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 LCPI8_51: .byte 0 ## 0x0 .byte 1 ## 0x1 .byte 4 ## 0x4 .byte 5 ## 0x5 .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 8 ## 0x8 .byte 9 ## 0x9 .byte 12 ## 0xc .byte 13 ## 0xd .byte 12 ## 0xc .byte 13 ## 0xd .byte 14 ## 0xe .byte 15 ## 0xf LCPI8_52: .byte 0 ## 0x0 .byte 2 ## 0x2 .byte 4 ## 0x4 .byte 6 ## 0x6 .byte 8 ## 0x8 .byte 10 ## 0xa .byte 12 ## 0xc .byte 14 ## 0xe .space 1 .space 1 .space 1 .space 1 .space 1 .space 1 .space 1 .space 1 LCPI8_53: .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .byte 255 ## 0xff .byte 0 ## 0x0 .section __TEXT,__const .p2align 5 LCPI8_5: .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 .long 1056964608 ## float 0.5 LCPI8_6: .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 .long 3212836864 ## float -1 LCPI8_7: .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 .long 1077936128 ## float 3 LCPI8_8: .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff .long 32767 ## 0x7fff LCPI8_9: .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 .long 260046848 ## 0xf800000 LCPI8_11: .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 .long 1879048192 ## 0x70000000 LCPI8_13: .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 .long 3095396352 ## float -6.10351563E-5 LCPI8_14: .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 .long 32768 ## 0x8000 LCPI8_16: .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 .long 1082130432 ## float 4 LCPI8_17: .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 .long 3221225472 ## float -2 LCPI8_18: .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 .long 1090519040 ## float 8 LCPI8_19: .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 .long 998277249 ## float 0.00392156886 LCPI8_20: .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 .long 1073741824 ## float 2 LCPI8_21: .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 .long 1040187392 ## float 0.125 LCPI8_22: .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 .long 1065353216 ## float 1 LCPI8_24: .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff .long 2155872255 ## 0x807fffff LCPI8_25: .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 .long 1071637134 ## float 1.7491014 LCPI8_26: .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 .long 3223280375 ## float -2.48992705 LCPI8_27: .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 .long 1073611155 ## float 1.98442304 LCPI8_28: .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 .long 3206119809 ## float -0.599632323 LCPI8_29: .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 .long 1051677837 ## float 0.342441946 LCPI8_30: .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 .long 1043570863 ## float 0.175417647 LCPI8_31: .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 .long 1048640391 ## float 0.251919001 LCPI8_32: .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 .long 1051369756 ## float 0.333260417 LCPI8_33: .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 .long 1056964624 ## float 0.500000954 LCPI8_34: .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 .long 1060205080 ## float 0.693147182 LCPI8_35: .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf .long 4286578688 ## float -Inf LCPI8_36: .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN .long 2143289344 ## float NaN LCPI8_37: .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 .long 1069066811 ## float 1.44269502 LCPI8_38: .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 .long 1060205056 ## float 0.693145751 LCPI8_39: .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 .long 901758606 ## float 1.42860677E-6 LCPI8_40: .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 .long 965769269 ## float 2.75553815E-4 LCPI8_41: .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 .long 984283061 ## float 0.00130437932 LCPI8_42: .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 .long 1007241053 ## float 0.00837883074 LCPI8_43: .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 .long 1026202953 ## float 0.0416539051 LCPI8_44: .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 .long 1042983712 ## float 0.166668415 LCPI8_45: .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 .long 1056964605 ## float 0.499999911 LCPI8_48: .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf .long 2139095040 ## float +Inf LCPI8_49: .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 .long 1055439406 ## float 0.454545438 LCPI8_50: .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 .long 1132396544 ## float 255 LCPI8_55: .space 32 .section __TEXT,__text,regular,pure_instructions .globl _ShadeTile .p2align 4, 0x90 _ShadeTile: ## @ShadeTile ## BB#0: ## %all_on pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $2600, %rsp ## imm = 0xA28 movl %r9d, %eax movl %r8d, -112(%rsp) ## 4-byte Spill movl %ecx, -100(%rsp) ## 4-byte Spill movl %esi, -104(%rsp) ## 4-byte Spill movq 2704(%rsp), %rbx movq 2696(%rsp), %r9 movq 2688(%rsp), %r15 movl 2672(%rsp), %r8d testl %r8d, %r8d je LBB8_2 ## BB#1: ## %all_on testb $1, 2680(%rsp) jne LBB8_2 ## BB#59: ## %if_else cmpl -100(%rsp), %edx ## 4-byte Folded Reload jge LBB8_71 ## BB#60: ## %for_loop121.lr.ph movq 2656(%rsp), %r11 vcvtsi2ssl %eax, %xmm0, %xmm4 vmovss LCPI8_0(%rip), %xmm5 ## xmm5 = mem[0],zero,zero,zero vdivss %xmm4, %xmm5, %xmm4 vmovss %xmm4, 252(%rsp) ## 4-byte Spill vcvtsi2ssl -112(%rsp), %xmm0, %xmm4 ## 4-byte Folded Reload vdivss %xmm4, %xmm5, %xmm4 movl -104(%rsp), %esi ## 4-byte Reload movl %esi, %eax subl %edi, %eax movl %eax, %ecx sarl $31, %ecx shrl $29, %ecx addl %eax, %ecx andl $-8, %ecx subl %ecx, %eax movl %esi, %ecx subl %eax, %ecx movl %ecx, 60(%rsp) ## 4-byte Spill vmovd %esi, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 vmovups %ymm5, 2080(%rsp) ## 32-byte Spill vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmovups %ymm4, 1920(%rsp) ## 32-byte Spill vpermilps $0, %xmm3, %xmm3 ## xmm3 = xmm3[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmovups %ymm3, 1888(%rsp) ## 32-byte Spill vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vmovups %ymm2, 1856(%rsp) ## 32-byte Spill vpermilps $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1824(%rsp) ## 32-byte Spill vpermilps $0, %xmm1, %xmm0 ## xmm0 = xmm1[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 1792(%rsp) ## 32-byte Spill vpxor %xmm8, %xmm8, %xmm8 ## implicit-def: %YMM3 ## implicit-def: %XMM6 ## implicit-def: %XMM0 vmovaps %xmm0, 32(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, 16(%rsp) ## 16-byte Spill ## implicit-def: %XMM0 vmovaps %xmm0, 112(%rsp) ## 16-byte Spill ## implicit-def: %XMM10 ## implicit-def: %YMM4 ## implicit-def: %YMM5 ## implicit-def: %YMM14 ## implicit-def: %XMM0 vmovaps %xmm0, -96(%rsp) ## 16-byte Spill ## implicit-def: %XMM7 ## implicit-def: %YMM0 vmovups %ymm0, 1696(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1664(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1568(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1536(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1760(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1728(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1632(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1600(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1504(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1472(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1440(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1408(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1376(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1344(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 2048(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 2016(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovups %ymm0, 1984(%rsp) ## 32-byte Spill ## implicit-def: %YMM0 vmovdqu %ymm0, 1952(%rsp) ## 32-byte Spill movl %edi, -108(%rsp) ## 4-byte Spill .p2align 4, 0x90 LBB8_61: ## %for_loop121 ## =>This Loop Header: Depth=1 ## Child Loop BB8_66 Depth 2 ## Child Loop BB8_73 Depth 3 ## Child Loop BB8_271 Depth 2 vxorps %xmm0, %xmm0, %xmm0 vcvtsi2ssl %edx, %xmm0, %xmm0 vaddss LCPI8_1(%rip), %xmm0, %xmm0 vmulss 252(%rsp), %xmm0, %xmm0 ## 4-byte Folded Reload vaddss LCPI8_2(%rip), %xmm0, %xmm0 vxorps %xmm1, %xmm1, %xmm1 vsubss %xmm0, %xmm1, %xmm1 cmpl %edi, 60(%rsp) ## 4-byte Folded Reload movl %edx, -68(%rsp) ## 4-byte Spill vmovdqu %ymm3, 640(%rsp) ## 32-byte Spill vmovdqa %xmm6, 560(%rsp) ## 16-byte Spill vmovdqa %xmm10, 544(%rsp) ## 16-byte Spill vmovups %ymm4, 480(%rsp) ## 32-byte Spill vmovdqu %ymm5, 448(%rsp) ## 32-byte Spill vmovups %ymm14, 416(%rsp) ## 32-byte Spill vmovdqa %xmm7, 528(%rsp) ## 16-byte Spill vmovaps %xmm1, 800(%rsp) ## 16-byte Spill jle LBB8_62 ## BB#65: ## %foreach_full_body131.lr.ph ## in Loop: Header=BB8_61 Depth=1 movl %edx, %r13d imull -112(%rsp), %r13d ## 4-byte Folded Reload vmovd %r13d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 864(%rsp) ## 32-byte Spill vpermilps $0, %xmm1, %xmm0 ## xmm0 = xmm1[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 832(%rsp) ## 32-byte Spill movl %edi, %r15d .p2align 4, 0x90 LBB8_66: ## %foreach_full_body131 ## Parent Loop BB8_61 Depth=1 ## => This Loop Header: Depth=2 ## Child Loop BB8_73 Depth 3 vmovd %r15d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI8_3(%rip), %xmm0, %xmm1 vpaddd LCPI8_4(%rip), %xmm0, %xmm2 vmovdqu 864(%rsp), %ymm0 ## 32-byte Reload vmovdqu %ymm1, -64(%rsp) ## 32-byte Spill vpaddd %xmm0, %xmm1, %xmm10 vextractf128 $1, %ymm0, %xmm0 vmovdqa %xmm2, 192(%rsp) ## 16-byte Spill vpaddd %xmm0, %xmm2, %xmm1 vpslld $2, %xmm10, %xmm0 vpmovsxdq %xmm0, %xmm2 vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1] vpmovsxdq %xmm0, %xmm0 vmovq (%r11), %xmm3 ## xmm3 = mem[0],zero vpshufd $68, %xmm3, %xmm3 ## xmm3 = xmm3[0,1,0,1] vpaddq %xmm0, %xmm3, %xmm0 vmovdqa %xmm3, 384(%rsp) ## 16-byte Spill vpaddq %xmm2, %xmm3, %xmm2 vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vmovq %xmm0, %rdx vpextrq $1, %xmm0, %rsi vmovss (%rax), %xmm0 ## xmm0 = mem[0],zero,zero,zero vinsertps $16, (%rcx), %xmm0, %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[2,3] vinsertps $32, (%rdx), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3] vinsertps $48, (%rsi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[0] vmovups %ymm0, 608(%rsp) ## 32-byte Spill vmovdqa %xmm1, -32(%rsp) ## 16-byte Spill vpaddd %xmm1, %xmm1, %xmm0 vpaddd %xmm10, %xmm10, %xmm2 vpmovsxdq %xmm2, %xmm4 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm6 vpmovsxdq %xmm0, %xmm7 vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1] vpmovsxdq %xmm0, %xmm5 vmovq 8(%r11), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm2 vpaddq %xmm7, %xmm0, %xmm3 vpaddq %xmm6, %xmm0, %xmm1 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm1, %rdx vpextrq $1, %xmm1, %rsi vmovq %xmm3, %rdi vpextrq $1, %xmm3, %rbp vmovq %xmm2, %rbx vpextrq $1, %xmm2, %r10 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbp), %xmm0, %xmm0 vpinsrw $6, (%rbx), %xmm0, %xmm0 vpinsrw $7, (%r10), %xmm0, %xmm0 vpunpckhwd %xmm8, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm8[4],xmm0[5],xmm8[5],xmm0[6],xmm8[6],xmm0[7],xmm8[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vpxor %xmm12, %xmm12, %xmm12 vinsertf128 $1, %xmm1, %ymm0, %ymm11 vmovaps LCPI8_8(%rip), %ymm9 ## ymm9 = [32767,32767,32767,32767,32767,32767,32767,32767] vandps %ymm9, %ymm11, %ymm8 vmovq 16(%r11), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm7, %xmm0, %xmm3 vpaddq %xmm6, %xmm0, %xmm2 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vmovq %xmm3, %rdi vpextrq $1, %xmm3, %rbp vmovq %xmm1, %rbx vpextrq $1, %xmm1, %r10 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbp), %xmm0, %xmm0 vpinsrw $6, (%rbx), %xmm0, %xmm0 vpinsrw $7, (%r10), %xmm0, %xmm0 vpunpckhwd %xmm12, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm15 vmovq 24(%r11), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm4, %xmm0, %xmm2 vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vpaddq %xmm7, %xmm0, %xmm2 vpaddq %xmm6, %xmm0, %xmm0 vmovq %xmm0, %rdx vpextrq $1, %xmm0, %rsi vmovq %xmm2, %rdi vpextrq $1, %xmm2, %rbp vmovq %xmm1, %rbx vpextrq $1, %xmm1, %r10 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbp), %xmm0, %xmm0 vpinsrw $6, (%rbx), %xmm0, %xmm0 vpinsrw $7, (%r10), %xmm0, %xmm0 vpunpckhwd %xmm12, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vmovq 32(%r11), %xmm0 ## xmm0 = mem[0],zero vpshufd $68, %xmm0, %xmm0 ## xmm0 = xmm0[0,1,0,1] vpaddq %xmm5, %xmm0, %xmm1 vpaddq %xmm7, %xmm0, %xmm2 vpaddq %xmm6, %xmm0, %xmm3 vpaddq %xmm4, %xmm0, %xmm0 vmovq %xmm0, %rax vpextrq $1, %xmm0, %rcx vmovq %xmm3, %rdx vpextrq $1, %xmm3, %rsi vmovq %xmm2, %rdi vpextrq $1, %xmm2, %rbp vmovq %xmm1, %rbx vpextrq $1, %xmm1, %r10 movzwl (%rax), %eax vmovd %eax, %xmm0 vpinsrw $1, (%rcx), %xmm0, %xmm0 vpinsrw $2, (%rdx), %xmm0, %xmm0 vpinsrw $3, (%rsi), %xmm0, %xmm0 vpinsrw $4, (%rdi), %xmm0, %xmm0 vpinsrw $5, (%rbp), %xmm0, %xmm0 vpinsrw $6, (%rbx), %xmm0, %xmm0 vpinsrw $7, (%r10), %xmm0, %xmm0 vpunpckhwd %xmm12, %xmm0, %xmm1 ## xmm1 = xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] vpmovzxwd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero vinsertf128 $1, %xmm1, %ymm0, %ymm0 vpmovsxdq %xmm10, %xmm3 vpshufd $78, %xmm10, %xmm1 ## xmm1 = xmm10[2,3,0,1] vpmovsxdq %xmm1, %xmm1 vmovq 40(%r11), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm4 ## xmm4 = xmm2[0,1,0,1] vpaddq %xmm1, %xmm4, %xmm2 vmovdqa %xmm4, 160(%rsp) ## 16-byte Spill vpaddq %xmm3, %xmm4, %xmm4 vmovq %xmm4, %rax vpextrq $1, %xmm4, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vpinsrb $0, (%rax), %xmm0, %xmm2 vpinsrb $1, (%rcx), %xmm2, %xmm2 vmovq 48(%r11), %xmm4 ## xmm4 = mem[0],zero vpshufd $68, %xmm4, %xmm5 ## xmm5 = xmm4[0,1,0,1] vpaddq %xmm3, %xmm5, %xmm4 vmovq %xmm4, %r10 vpextrq $1, %xmm4, %rcx vmovdqa %xmm5, 128(%rsp) ## 16-byte Spill vpaddq %xmm1, %xmm5, %xmm4 vmovq %xmm4, %rdi vpextrq $1, %xmm4, %rbp vmovq 56(%r11), %xmm4 ## xmm4 = mem[0],zero vpshufd $68, %xmm4, %xmm5 ## xmm5 = xmm4[0,1,0,1] vpaddq %xmm3, %xmm5, %xmm3 vmovq %xmm3, %rbx vpextrq $1, %xmm3, %rax vpslld $13, %xmm8, %xmm3 vextractf128 $1, %ymm8, %xmm4 vpslld $13, %xmm4, %xmm14 vpinsrb $2, (%rdx), %xmm2, %xmm2 vpinsrb $3, (%rsi), %xmm2, %xmm6 vmovdqa %xmm5, 352(%rsp) ## 16-byte Spill vpaddq %xmm1, %xmm5, %xmm1 vmovq %xmm1, %rdx vpextrq $1, %xmm1, %rsi vinsertf128 $1, %xmm14, %ymm3, %ymm12 vmovdqa LCPI8_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vpaddd %xmm5, %xmm3, %xmm1 vmovdqu %ymm1, 768(%rsp) ## 32-byte Spill vmovdqa LCPI8_12(%rip), %xmm13 ## xmm13 = [947912704,947912704,947912704,947912704] vmovdqa %xmm13, %xmm4 vpaddd %xmm4, %xmm3, %xmm1 vmovdqu %ymm1, 736(%rsp) ## 32-byte Spill vmovaps LCPI8_14(%rip), %ymm2 ## ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768] vmovaps %ymm2, %ymm13 vandps %ymm13, %ymm11, %ymm1 vandps %ymm9, %ymm15, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm10 vinsertf128 $1, %xmm10, %ymm3, %ymm7 vpaddd %xmm5, %xmm3, %xmm2 vmovdqu %ymm2, 1056(%rsp) ## 32-byte Spill vpaddd %xmm4, %xmm3, %xmm2 vmovdqu %ymm2, 1088(%rsp) ## 32-byte Spill vandps %ymm13, %ymm15, %ymm15 vandps %ymm9, %ymm0, %ymm3 vpslld $13, %xmm3, %xmm8 vextractf128 $1, %ymm3, %xmm3 vpslld $13, %xmm3, %xmm2 vmovdqa %xmm2, 1120(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm8, %ymm2 vpaddd %xmm5, %xmm8, %xmm3 vmovdqu %ymm3, 1184(%rsp) ## 32-byte Spill vpaddd %xmm4, %xmm8, %xmm3 vmovdqu %ymm3, 1216(%rsp) ## 32-byte Spill vandps %ymm13, %ymm0, %ymm4 vpmovzxbd %xmm6, %xmm0 ## xmm0 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero vmovdqu %ymm0, 288(%rsp) ## 32-byte Spill vpinsrb $0, (%r10), %xmm0, %xmm0 vpinsrb $1, (%rcx), %xmm0, %xmm0 vpinsrb $2, (%rdi), %xmm0, %xmm0 vpinsrb $3, (%rbp), %xmm0, %xmm0 vpmovzxbd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero vmovdqu %ymm0, 320(%rsp) ## 32-byte Spill vpinsrb $0, (%rbx), %xmm0, %xmm0 vpinsrb $1, (%rax), %xmm0, %xmm0 vpinsrb $2, (%rdx), %xmm0, %xmm0 vpinsrb $3, (%rsi), %xmm0, %xmm0 vpmovzxbd %xmm0, %xmm0 ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero vmovdqu %ymm0, 576(%rsp) ## 32-byte Spill testl %r8d, %r8d vmovaps LCPI8_9(%rip), %ymm0 ## ymm0 = [260046848,260046848,260046848,260046848,260046848,260046848,260046848,260046848] vandps %ymm0, %ymm12, %ymm11 vpxor %xmm3, %xmm3, %xmm3 vpcmpeqd %xmm3, %xmm11, %xmm6 vmovdqu %ymm6, 960(%rsp) ## 32-byte Spill vmovups %ymm1, 672(%rsp) ## 32-byte Spill vpslld $16, %xmm1, %xmm1 vmovdqa LCPI8_15(%rip), %xmm13 ## xmm13 = [260046848,260046848,260046848,260046848] vpcmpeqd %xmm13, %xmm11, %xmm6 vmovdqu %ymm6, 992(%rsp) ## 32-byte Spill vmovups %ymm7, 1152(%rsp) ## 32-byte Spill vandps %ymm0, %ymm7, %ymm6 vpcmpeqd %xmm3, %xmm6, %xmm3 vmovdqu %ymm3, 1024(%rsp) ## 32-byte Spill vpxor %xmm8, %xmm8, %xmm8 vmovups %ymm15, 704(%rsp) ## 32-byte Spill vpslld $16, %xmm15, %xmm3 vmovdqu %ymm3, 1280(%rsp) ## 32-byte Spill vpcmpeqd %xmm13, %xmm6, %xmm7 vandps 64(%rsp), %ymm9, %ymm15 ## 32-byte Folded Reload vpslld $13, %xmm15, %xmm9 vmovups %ymm2, 1248(%rsp) ## 32-byte Spill vandps %ymm0, %ymm2, %ymm3 vpcmpeqd %xmm8, %xmm3, %xmm8 vmovups %ymm4, 256(%rsp) ## 32-byte Spill vpslld $16, %xmm4, %xmm0 vpcmpeqd %xmm13, %xmm3, %xmm2 jle LBB8_67 ## BB#72: ## %cif_mask_all.lr.ph ## in Loop: Header=BB8_66 Depth=2 vmovdqa %xmm5, %xmm13 vpaddd %xmm13, %xmm14, %xmm5 vmovdqu %ymm1, 928(%rsp) ## 32-byte Spill vmovups 768(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm1, %ymm1 vmovdqu %ymm9, 896(%rsp) ## 32-byte Spill vmovdqa LCPI8_12(%rip), %xmm9 ## xmm9 = [947912704,947912704,947912704,947912704] vpaddd %xmm9, %xmm14, %xmm5 vmovups 736(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm4, %ymm4 vextractf128 $1, %ymm11, %xmm5 vmovdqa %ymm8, %ymm9 vpxor %xmm8, %xmm8, %xmm8 vmovdqu %ymm0, 768(%rsp) ## 32-byte Spill vpcmpeqd %xmm8, %xmm5, %xmm0 vmovups 960(%rsp), %ymm8 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm8, %ymm0 vmovaps LCPI8_13(%rip), %ymm14 ## ymm14 = [-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05] vaddps %ymm14, %ymm4, %ymm4 vblendvps %ymm0, %ymm4, %ymm1, %ymm0 vmovdqa LCPI8_15(%rip), %xmm8 ## xmm8 = [260046848,260046848,260046848,260046848] vpcmpeqd %xmm8, %xmm5, %xmm1 vmovups 992(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm4, %ymm1 vmovaps LCPI8_11(%rip), %ymm11 ## ymm11 = [1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192] vorps %ymm11, %ymm12, %ymm4 vblendvps %ymm1, %ymm4, %ymm0, %ymm0 vmovups %ymm0, 736(%rsp) ## 32-byte Spill vpaddd %xmm13, %xmm10, %xmm0 vmovups 1056(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vmovdqa LCPI8_12(%rip), %xmm12 ## xmm12 = [947912704,947912704,947912704,947912704] vpaddd %xmm12, %xmm10, %xmm1 vmovups 1088(%rsp), %ymm4 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm4, %ymm1 vextractf128 $1, %ymm6, %xmm4 vpxor %xmm6, %xmm6, %xmm6 vpcmpeqd %xmm6, %xmm4, %xmm5 vmovups 1024(%rsp), %ymm10 ## 32-byte Reload vinsertf128 $1, %xmm5, %ymm10, %ymm5 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm5, %ymm1, %ymm0, %ymm0 vpcmpeqd %xmm8, %xmm4, %xmm1 vinsertf128 $1, %xmm1, %ymm7, %ymm1 vmovdqa %ymm2, %ymm5 vorps 1152(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm0, %ymm10 vmovdqa 1120(%rsp), %xmm2 ## 16-byte Reload vpaddd %xmm13, %xmm2, %xmm0 vmovups 1184(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vpaddd %xmm12, %xmm2, %xmm1 vmovdqa %xmm12, %xmm7 vmovups 1216(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vextractf128 $1, %ymm3, %xmm2 vpcmpeqd %xmm6, %xmm2, %xmm3 vpxor %xmm6, %xmm6, %xmm6 vinsertf128 $1, %xmm3, %ymm9, %ymm3 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm3, %ymm1, %ymm0, %ymm0 vpcmpeqd %xmm8, %xmm2, %xmm1 vinsertf128 $1, %xmm1, %ymm5, %ymm1 vorps 1248(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm0, %ymm12 vextractf128 $1, %ymm15, %xmm0 vpslld $13, %xmm0, %xmm0 vmovups 896(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpaddd %xmm13, %xmm1, %xmm2 vpaddd %xmm13, %xmm0, %xmm3 vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovdqa %xmm7, %xmm3 vpaddd %xmm3, %xmm1, %xmm1 vpaddd %xmm3, %xmm0, %xmm3 vinsertf128 $1, %xmm1, %ymm3, %ymm1 vandps LCPI8_9(%rip), %ymm0, %ymm3 vextractf128 $1, %ymm3, %xmm4 vpcmpeqd %xmm6, %xmm4, %xmm5 vpcmpeqd %xmm6, %xmm3, %xmm6 vinsertf128 $1, %xmm5, %ymm6, %ymm5 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm5, %ymm1, %ymm2, %ymm1 vpcmpeqd %xmm8, %xmm4, %xmm2 vpcmpeqd %xmm8, %xmm3, %xmm3 vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps %ymm11, %ymm0, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm14 vmovups -64(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, 192(%rsp), %ymm0, %ymm1 ## 16-byte Folded Reload vmovdqa -32(%rsp), %xmm0 ## 16-byte Reload vpslld $2, %xmm0, %xmm2 vpmovsxdq %xmm2, %xmm3 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm2 vmovdqa 384(%rsp), %xmm4 ## 16-byte Reload vpaddq %xmm2, %xmm4, %xmm2 vpaddq %xmm3, %xmm4, %xmm3 vmovq %xmm3, %rax vpextrq $1, %xmm3, %rcx vmovq %xmm2, %rdx vpextrq $1, %xmm2, %rsi vmovss (%rax), %xmm2 ## xmm2 = mem[0],zero,zero,zero vinsertps $16, (%rcx), %xmm2, %xmm2 ## xmm2 = xmm2[0],mem[0],xmm2[2,3] vinsertps $32, (%rdx), %xmm2, %xmm2 ## xmm2 = xmm2[0,1],mem[0],xmm2[3] vinsertps $48, (%rsi), %xmm2, %xmm2 ## xmm2 = xmm2[0,1,2],mem[0] vmovups 608(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vcvtdq2ps %ymm1, %ymm1 vmovaps LCPI8_5(%rip), %ymm5 ## ymm5 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vaddps %ymm5, %ymm1, %ymm1 vmulps 1920(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmovaps LCPI8_6(%rip), %ymm3 ## ymm3 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] vmovaps %ymm3, %ymm7 vaddps %ymm7, %ymm1, %ymm1 vsubps 1856(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmovups 1888(%rsp), %ymm3 ## 32-byte Reload vdivps %ymm2, %ymm3, %ymm13 vmulps %ymm13, %ymm1, %ymm1 vdivps 1824(%rsp), %ymm1, %ymm4 ## 32-byte Folded Reload vmulps 832(%rsp), %ymm13, %ymm1 ## 32-byte Folded Reload vdivps 1792(%rsp), %ymm1, %ymm11 ## 32-byte Folded Reload vmulps %ymm4, %ymm4, %ymm1 vmulps %ymm11, %ymm11, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vmulps %ymm13, %ymm13, %ymm2 vaddps %ymm1, %ymm2, %ymm1 vrsqrtps %ymm1, %ymm2 vmulps %ymm1, %ymm2, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vmovaps LCPI8_7(%rip), %ymm9 ## ymm9 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm1, %ymm9, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vmovups 672(%rsp), %ymm2 ## 32-byte Reload vextractf128 $1, %ymm2, %xmm2 vpslld $16, %xmm2, %xmm2 vmovups 928(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps 736(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmovups 704(%rsp), %ymm3 ## 32-byte Reload vextractf128 $1, %ymm3, %xmm3 vpslld $16, %xmm3, %xmm3 vmovups 1280(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 vorps %ymm10, %ymm3, %ymm3 vmovaps %ymm4, %ymm8 vmulps %ymm2, %ymm2, %ymm4 vsubps %ymm4, %ymm2, %ymm4 vmulps %ymm3, %ymm3, %ymm6 vsubps %ymm6, %ymm3, %ymm6 vaddps %ymm6, %ymm4, %ymm4 vmovaps LCPI8_16(%rip), %ymm6 ## ymm6 = [4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00] vmovaps %ymm6, %ymm10 vmulps %ymm10, %ymm4, %ymm6 vaddps %ymm7, %ymm6, %ymm6 vsqrtps %ymm6, %ymm6 vmulps %ymm10, %ymm2, %ymm2 vmovaps LCPI8_17(%rip), %ymm7 ## ymm7 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00] vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm6, %ymm2, %ymm2 vmovups %ymm2, -64(%rsp) ## 32-byte Spill vmulps %ymm10, %ymm3, %ymm2 vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm2, %ymm6, %ymm2 vmovups %ymm2, 192(%rsp) ## 32-byte Spill vmovups 256(%rsp), %ymm2 ## 32-byte Reload vextractf128 $1, %ymm2, %xmm2 vpslld $16, %xmm2, %xmm2 vmovups 768(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vorps %ymm12, %ymm2, %ymm10 vpmovsxdq %xmm0, %xmm2 vmovdqa 160(%rsp), %xmm6 ## 16-byte Reload vpaddq %xmm2, %xmm6, %xmm3 vmovq %xmm3, %rax vpextrq $1, %xmm3, %rcx vpshufd $78, %xmm0, %xmm3 ## xmm3 = xmm0[2,3,0,1] vpmovsxdq %xmm3, %xmm3 vpaddq %xmm3, %xmm6, %xmm6 vmovq %xmm6, %rdx vpextrq $1, %xmm6, %rsi vpinsrb $0, (%rax), %xmm0, %xmm6 vpinsrb $1, (%rcx), %xmm6, %xmm6 vpinsrb $2, (%rdx), %xmm6, %xmm6 vpinsrb $3, (%rsi), %xmm6, %xmm6 vpmovzxbd %xmm6, %xmm6 ## xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero vmovups 288(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm0, %ymm6 vmovdqa 128(%rsp), %xmm0 ## 16-byte Reload vpaddq %xmm2, %xmm0, %xmm7 vmovq %xmm7, %rax vpextrq $1, %xmm7, %rcx vpaddq %xmm3, %xmm0, %xmm7 vmovq %xmm7, %rdx vpextrq $1, %xmm7, %rsi vpinsrb $0, (%rax), %xmm0, %xmm7 vpinsrb $1, (%rcx), %xmm7, %xmm7 vpinsrb $2, (%rdx), %xmm7, %xmm7 vpinsrb $3, (%rsi), %xmm7, %xmm7 vpmovzxbd %xmm7, %xmm7 ## xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero vmovups 320(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm0, %ymm7 vmulps %ymm5, %ymm1, %ymm1 vmovdqa 352(%rsp), %xmm0 ## 16-byte Reload vpaddq %xmm3, %xmm0, %xmm3 vpaddq %xmm2, %xmm0, %xmm2 vmulps %ymm1, %ymm8, %ymm5 vmovups %ymm5, 128(%rsp) ## 32-byte Spill vmovq %xmm2, %rax vpextrq $1, %xmm2, %rcx vmulps %ymm1, %ymm11, %ymm2 vmovups %ymm2, 352(%rsp) ## 32-byte Spill vmulps %ymm1, %ymm13, %ymm1 vmovups %ymm1, 320(%rsp) ## 32-byte Spill vmulps LCPI8_18(%rip), %ymm4, %ymm1 vsubps %ymm1, %ymm9, %ymm9 vcvtdq2ps %ymm6, %ymm1 vmovaps LCPI8_19(%rip), %ymm2 ## ymm2 = [3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03] vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 288(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 256(%rsp) ## 32-byte Spill vmovq %xmm3, %rdx vpextrq $1, %xmm3, %rsi vpinsrb $0, (%rax), %xmm0, %xmm1 vpinsrb $1, (%rcx), %xmm1, %xmm1 vpinsrb $2, (%rdx), %xmm1, %xmm1 vpinsrb $3, (%rsi), %xmm1, %xmm1 vpmovzxbd %xmm1, %xmm1 ## xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero vmovups 576(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm0, %ymm1 vcvtdq2ps %ymm1, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 576(%rsp) ## 32-byte Spill vmovaps LCPI8_14(%rip), %ymm0 ## ymm0 = [32768,32768,32768,32768,32768,32768,32768,32768] vandps 64(%rsp), %ymm0, %ymm1 ## 32-byte Folded Reload vpslld $16, %xmm1, %xmm2 vextractf128 $1, %ymm1, %xmm1 vpslld $16, %xmm1, %xmm1 vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps %ymm14, %ymm1, %ymm0 movq 64(%r11), %r10 movq 72(%r11), %r14 movq 80(%r11), %r12 movq 120(%r11), %rdi vmovups %ymm10, 160(%rsp) ## 32-byte Spill vaddps LCPI8_20(%rip), %ymm10, %ymm1 vmulps LCPI8_21(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vmovups %ymm0, 672(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vmovaps %ymm11, %ymm0 movl %r8d, %ecx movq 2664(%rsp), %rbx vxorps %ymm1, %ymm1, %ymm1 vmovups %ymm1, -32(%rsp) ## 32-byte Spill vxorps %ymm10, %ymm10, %ymm10 vxorps %ymm15, %ymm15, %ymm15 vmovups %ymm13, 704(%rsp) ## 32-byte Spill vmovups %ymm8, 384(%rsp) ## 32-byte Spill vmovups %ymm0, 608(%rsp) ## 32-byte Spill .p2align 4, 0x90 LBB8_73: ## %cif_mask_all ## Parent Loop BB8_61 Depth=1 ## Parent Loop BB8_66 Depth=2 ## => This Inner Loop Header: Depth=3 movslq (%rbx), %rax vmovss (%rdi,%rax,4), %xmm4 ## xmm4 = mem[0],zero,zero,zero vbroadcastss (%r10,%rax,4), %ymm1 vsubps %ymm8, %ymm1, %ymm6 vbroadcastss (%r14,%rax,4), %ymm1 vsubps %ymm0, %ymm1, %ymm1 vbroadcastss (%r12,%rax,4), %ymm2 vsubps %ymm13, %ymm2, %ymm3 vmulps %ymm6, %ymm6, %ymm2 vmulps %ymm1, %ymm1, %ymm7 vaddps %ymm7, %ymm2, %ymm2 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm2, %ymm2 vmulss %xmm4, %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vcmpnleps %ymm2, %ymm7, %ymm7 vmovmskps %ymm7, %esi cmpl $255, %esi je LBB8_77 ## BB#74: ## %cif_mask_all ## in Loop: Header=BB8_73 Depth=3 testl %esi, %esi je LBB8_75 ## BB#82: ## %cif_test_mixed706 ## in Loop: Header=BB8_73 Depth=3 vsqrtps %ymm2, %ymm2 vrcpps %ymm2, %ymm11 vmulps %ymm11, %ymm2, %ymm12 vmovaps LCPI8_20(%rip), %ymm5 ## ymm5 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm12, %ymm5, %ymm12 vmulps %ymm12, %ymm11, %ymm11 vmulps %ymm11, %ymm6, %ymm12 vblendvps %ymm7, %ymm12, %ymm6, %ymm12 vmulps %ymm11, %ymm1, %ymm6 vblendvps %ymm7, %ymm6, %ymm1, %ymm1 vmulps %ymm11, %ymm3, %ymm6 vblendvps %ymm7, %ymm6, %ymm3, %ymm3 vmovups -64(%rsp), %ymm5 ## 32-byte Reload vmulps %ymm12, %ymm5, %ymm6 vmovaps %ymm9, %ymm14 vmovups 192(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm1, %ymm9, %ymm11 vaddps %ymm11, %ymm6, %ymm6 vmulps %ymm3, %ymm14, %ymm11 vaddps %ymm11, %ymm6, %ymm11 vcmpnleps %ymm15, %ymm11, %ymm6 vblendvps %ymm7, %ymm6, %ymm15, %ymm6 vmovmskps %ymm6, %edx testl %edx, %edx je LBB8_83 ## BB#84: ## %safe_if_run_true1007 ## in Loop: Header=BB8_73 Depth=3 movq 88(%r11), %rdx vsubss (%rdx,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm4 vsubps 128(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 352(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 320(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vrsqrtps %ymm7, %ymm12 vmulps %ymm12, %ymm7, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps %ymm5, %ymm0 vmovaps LCPI8_7(%rip), %ymm5 ## ymm5 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm5, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI8_5(%rip), %ymm5 ## ymm5 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vxorps %ymm13, %ymm13, %ymm13 vmovaps %ymm5, %ymm15 vmulps %ymm15, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm12 vblendvps %ymm6, %ymm12, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm12 vblendvps %ymm6, %ymm12, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm7 vblendvps %ymm6, %ymm7, %ymm3, %ymm3 vmulps %ymm2, %ymm0, %ymm2 vmulps %ymm1, %ymm9, %ymm1 vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm14, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vmaxps %ymm13, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm13, %ymm1 vcmpnltps %ymm3, %ymm13, %ymm2 vmovups 704(%rsp), %ymm13 ## 32-byte Reload vmovaps LCPI8_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vmovdqa LCPI8_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm5, %xmm5 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vmovups 2016(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm6, %ymm5, %ymm7, %ymm7 vandps LCPI8_24(%rip), %ymm3, %ymm3 vorps %ymm15, %ymm3, %ymm3 vxorps %ymm15, %ymm15, %ymm15 vmovups 2048(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm6, %ymm3, %ymm0, %ymm0 vmovups %ymm0, 2048(%rsp) ## 32-byte Spill vsubps %ymm0, %ymm8, %ymm3 vmulps LCPI8_25(%rip), %ymm3, %ymm5 vaddps LCPI8_26(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_27(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_28(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_29(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_30(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_31(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_32(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_33(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps %ymm8, %ymm5, %ymm5 vsubps %ymm3, %ymm15, %ymm3 vmulps %ymm5, %ymm3, %ymm3 vmovups %ymm7, 2016(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm5 vmulps LCPI8_34(%rip), %ymm5, %ymm5 vaddps %ymm3, %ymm5, %ymm3 vmovaps LCPI8_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI8_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 160(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI8_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI8_38(%rip), %ymm2, %ymm5 vsubps %ymm5, %ymm1, %ymm1 vmulps LCPI8_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI8_40(%rip), %ymm1, %ymm2 vaddps LCPI8_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI8_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm5 vpaddd %xmm0, %xmm5, %xmm7 vpcmpgtd %xmm0, %xmm5, %xmm5 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm5, %ymm3, %ymm3 vmovdqa LCPI8_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm5 vpcmpgtd %xmm2, %xmm0, %xmm12 vinsertf128 $1, %xmm5, %ymm12, %ymm5 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI8_48(%rip), %ymm1, %ymm1 vblendvps %ymm5, %ymm15, %ymm1, %ymm1 vminps %ymm8, %ymm4, %ymm2 vmulps %ymm2, %ymm11, %ymm2 movq 96(%r11), %rdx vmulps 672(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmovups 384(%rsp), %ymm8 ## 32-byte Reload vmulps %ymm1, %ymm2, %ymm1 movq 104(%r11), %rsi movq 112(%r11), %rbp vbroadcastss (%rdx,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm2 vblendvps %ymm6, %ymm2, %ymm10, %ymm10 vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 256(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -32(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm2 vblendvps %ymm6, %ymm2, %ymm0, %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 576(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups 64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm1 vblendvps %ymm6, %ymm1, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vmovups 608(%rsp), %ymm0 ## 32-byte Reload vmovaps %ymm14, %ymm9 jmp LBB8_75 .p2align 4, 0x90 LBB8_77: ## %cif_test_all ## in Loop: Header=BB8_73 Depth=3 vsqrtps %ymm2, %ymm2 vrcpps %ymm2, %ymm7 vmulps %ymm7, %ymm2, %ymm11 vmovaps LCPI8_20(%rip), %ymm5 ## ymm5 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm11, %ymm5, %ymm11 vmulps %ymm11, %ymm7, %ymm7 vmulps %ymm7, %ymm6, %ymm12 vmulps %ymm7, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm3 vmulps -64(%rsp), %ymm12, %ymm6 ## 32-byte Folded Reload vmulps 192(%rsp), %ymm1, %ymm7 ## 32-byte Folded Reload vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm9, %ymm7 vaddps %ymm6, %ymm7, %ymm6 vcmpnleps %ymm15, %ymm6, %ymm11 vmovmskps %ymm11, %esi testl %esi, %esi je LBB8_75 ## BB#78: ## %cif_test_all ## in Loop: Header=BB8_73 Depth=3 cmpl $255, %esi jne LBB8_81 ## BB#79: ## %cif_test_all379 ## in Loop: Header=BB8_73 Depth=3 movq 88(%r11), %rsi vsubss (%rsi,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm15 vsubps 128(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 352(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 320(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm11 vaddps %ymm11, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm11 vaddps %ymm7, %ymm11, %ymm7 vrsqrtps %ymm7, %ymm11 vmulps %ymm11, %ymm7, %ymm7 vmulps %ymm7, %ymm11, %ymm7 vmovaps LCPI8_7(%rip), %ymm4 ## ymm4 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm4, %ymm7 vmulps %ymm7, %ymm11, %ymm7 vmovaps LCPI8_5(%rip), %ymm4 ## ymm4 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm4, %ymm5 vmulps %ymm5, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm3 vmulps -64(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps 192(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm9, %ymm2 vaddps %ymm1, %ymm2, %ymm1 vxorps %ymm12, %ymm12, %ymm12 vmaxps %ymm12, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm12, %ymm1 vcmpnltps %ymm3, %ymm12, %ymm2 vmovaps LCPI8_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI8_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm4, %xmm4 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vandps LCPI8_24(%rip), %ymm3, %ymm3 vorps %ymm5, %ymm3, %ymm3 vsubps %ymm3, %ymm8, %ymm3 vmulps LCPI8_25(%rip), %ymm3, %ymm7 vaddps LCPI8_26(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI8_27(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI8_28(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI8_29(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI8_30(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI8_31(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI8_32(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps LCPI8_33(%rip), %ymm7, %ymm7 vmulps %ymm7, %ymm3, %ymm7 vaddps %ymm8, %ymm7, %ymm7 vsubps %ymm3, %ymm12, %ymm3 vmulps %ymm7, %ymm3, %ymm3 vcvtdq2ps %ymm4, %ymm4 vmulps LCPI8_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI8_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI8_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 160(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI8_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI8_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI8_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI8_40(%rip), %ymm1, %ymm2 vaddps LCPI8_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI8_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm0, %xmm4, %xmm7 vpcmpgtd %xmm0, %xmm4, %xmm4 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovdqa LCPI8_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm4 vpcmpgtd %xmm2, %xmm0, %xmm11 vinsertf128 $1, %xmm4, %ymm11, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI8_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm12, %ymm1, %ymm1 vminps %ymm8, %ymm15, %ymm2 vxorps %ymm15, %ymm15, %ymm15 vmulps %ymm2, %ymm6, %ymm2 movq 96(%r11), %rsi vmulps 672(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 movq 104(%r11), %rbp movq 112(%r11), %rdx vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm10 vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 256(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -32(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vbroadcastss (%rdx,%rax,4), %ymm2 vmulps 576(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups 64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm0 jmp LBB8_80 LBB8_83: ## in Loop: Header=BB8_73 Depth=3 vmovaps %ymm14, %ymm9 jmp LBB8_75 LBB8_81: ## %cif_test_mixed ## in Loop: Header=BB8_73 Depth=3 movq 88(%r11), %rdx vsubss (%rdx,%rax,4), %xmm4, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm4, %xmm4 ## xmm4 = xmm4[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vsubps %ymm2, %ymm4, %ymm2 vdivps %ymm7, %ymm2, %ymm15 vsubps 128(%rsp), %ymm12, %ymm2 ## 32-byte Folded Reload vsubps 352(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vsubps 320(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm7 vmulps %ymm1, %ymm1, %ymm12 vaddps %ymm12, %ymm7, %ymm7 vmulps %ymm3, %ymm3, %ymm12 vaddps %ymm7, %ymm12, %ymm7 vrsqrtps %ymm7, %ymm12 vmulps %ymm12, %ymm7, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI8_7(%rip), %ymm4 ## ymm4 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm7, %ymm4, %ymm7 vmulps %ymm7, %ymm12, %ymm7 vmovaps LCPI8_5(%rip), %ymm4 ## ymm4 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm4, %ymm5 vmulps %ymm5, %ymm7, %ymm7 vmulps %ymm7, %ymm2, %ymm12 vblendvps %ymm11, %ymm12, %ymm2, %ymm2 vmulps %ymm7, %ymm1, %ymm12 vblendvps %ymm11, %ymm12, %ymm1, %ymm1 vmulps %ymm7, %ymm3, %ymm7 vblendvps %ymm11, %ymm7, %ymm3, %ymm3 vmulps -64(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps 192(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm1, %ymm2, %ymm1 vmulps %ymm3, %ymm9, %ymm2 vaddps %ymm2, %ymm1, %ymm1 vxorps %ymm13, %ymm13, %ymm13 vmaxps %ymm13, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm13, %ymm1 vcmpnltps %ymm3, %ymm13, %ymm2 vmovaps LCPI8_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm7 vextractf128 $1, %ymm3, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI8_23(%rip), %xmm0 ## xmm0 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm0, %xmm4, %xmm4 vpaddd %xmm0, %xmm7, %xmm7 vinsertf128 $1, %xmm4, %ymm7, %ymm4 vmovups 1952(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm11, %ymm4, %ymm7, %ymm7 vandps LCPI8_24(%rip), %ymm3, %ymm3 vorps %ymm5, %ymm3, %ymm3 vmovups 1984(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm11, %ymm3, %ymm0, %ymm0 vmovups %ymm0, 1984(%rsp) ## 32-byte Spill vsubps %ymm0, %ymm8, %ymm3 vmulps LCPI8_25(%rip), %ymm3, %ymm4 vaddps LCPI8_26(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_27(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_28(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_29(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_30(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_31(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_32(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_33(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm8, %ymm4, %ymm4 vsubps %ymm3, %ymm13, %ymm3 vmulps %ymm4, %ymm3, %ymm3 vmovups %ymm7, 1952(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm4 vmulps LCPI8_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI8_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI8_36(%rip), %ymm0, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps 160(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps LCPI8_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI8_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI8_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI8_40(%rip), %ymm1, %ymm2 vaddps LCPI8_41(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_42(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_43(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_44(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps LCPI8_45(%rip), %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vmovdqa LCPI8_46(%rip), %xmm0 ## xmm0 = [127,127,127,127] vpaddd %xmm0, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm0, %xmm4, %xmm7 vpcmpgtd %xmm0, %xmm4, %xmm4 vpcmpgtd %xmm0, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovdqa LCPI8_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm7, %xmm0, %xmm4 vpcmpgtd %xmm2, %xmm0, %xmm12 vinsertf128 $1, %xmm4, %ymm12, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm2, %ymm2 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI8_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm13, %ymm1, %ymm1 vmovups 704(%rsp), %ymm13 ## 32-byte Reload vminps %ymm8, %ymm15, %ymm2 vxorps %ymm15, %ymm15, %ymm15 vmulps %ymm2, %ymm6, %ymm2 movq 96(%r11), %rdx vmulps 672(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm2, %ymm1 movq 104(%r11), %rsi movq 112(%r11), %rbp vbroadcastss (%rdx,%rax,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm2, %ymm10, %ymm2 vblendvps %ymm11, %ymm2, %ymm10, %ymm10 vbroadcastss (%rsi,%rax,4), %ymm2 vmulps 256(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -32(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm2, %ymm0, %ymm2 vblendvps %ymm11, %ymm2, %ymm0, %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vbroadcastss (%rbp,%rax,4), %ymm2 vmulps 576(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups 64(%rsp), %ymm0 ## 32-byte Reload vaddps %ymm1, %ymm0, %ymm1 vblendvps %ymm11, %ymm1, %ymm0, %ymm0 LBB8_80: ## %cif_done ## in Loop: Header=BB8_73 Depth=3 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vmovups 384(%rsp), %ymm8 ## 32-byte Reload vmovups 608(%rsp), %ymm0 ## 32-byte Reload LBB8_75: ## %cif_done ## in Loop: Header=BB8_73 Depth=3 addq $4, %rbx decl %ecx jne LBB8_73 jmp LBB8_76 .p2align 4, 0x90 LBB8_67: ## in Loop: Header=BB8_66 Depth=2 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vxorps %ymm10, %ymm10, %ymm10 vxorps %ymm15, %ymm15, %ymm15 LBB8_76: ## %for_exit293 ## in Loop: Header=BB8_66 Depth=2 vmaxps %ymm15, %ymm10, %ymm0 vmovaps LCPI8_22(%rip), %ymm8 ## ymm8 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vminps %ymm8, %ymm0, %ymm2 vcmpnleps %ymm2, %ymm15, %ymm0 vcmpnltps %ymm2, %ymm15, %ymm1 vblendvps %ymm1, %ymm8, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI8_23(%rip), %xmm9 ## xmm9 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm9, %xmm4, %xmm4 vpaddd %xmm9, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovaps LCPI8_24(%rip), %ymm10 ## ymm10 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm10, %ymm2, %ymm2 vmovaps LCPI8_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm7, %ymm2, %ymm2 vsubps %ymm2, %ymm8, %ymm2 vmovaps LCPI8_25(%rip), %ymm11 ## ymm11 = [1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00] vmulps %ymm11, %ymm2, %ymm4 vmovaps LCPI8_26(%rip), %ymm12 ## ymm12 = [-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00] vaddps %ymm12, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI8_27(%rip), %ymm13 ## ymm13 = [1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00] vaddps %ymm13, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI8_28(%rip), %ymm7 ## ymm7 = [-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01] vaddps %ymm7, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI8_29(%rip), %ymm10 ## ymm10 = [3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01] vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI8_30(%rip), %ymm11 ## ymm11 = [1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01] vaddps %ymm11, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI8_31(%rip), %ymm5 ## ymm5 = [2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI8_32(%rip), %ymm5 ## ymm5 = [3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vmovaps LCPI8_33(%rip), %ymm5 ## ymm5 = [5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01] vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm4, %ymm2, %ymm4 vaddps %ymm8, %ymm4, %ymm4 vsubps %ymm2, %ymm15, %ymm2 vmulps %ymm4, %ymm2, %ymm2 vcvtdq2ps %ymm3, %ymm3 vmovaps LCPI8_34(%rip), %ymm4 ## ymm4 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm4, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI8_35(%rip), %ymm3 ## ymm3 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vmovaps %ymm3, %ymm4 vmovaps LCPI8_36(%rip), %ymm3 ## ymm3 = [nan,nan,nan,nan,nan,nan,nan,nan] vblendvps %ymm0, %ymm3, %ymm4, %ymm0 vblendvps %ymm1, %ymm0, %ymm2, %ymm0 vmovaps LCPI8_49(%rip), %ymm1 ## ymm1 = [4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01] vmulps %ymm1, %ymm0, %ymm0 vmovaps LCPI8_37(%rip), %ymm1 ## ymm1 = [1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00] vmulps %ymm1, %ymm0, %ymm1 vroundps $9, %ymm1, %ymm1 vcvttps2dq %ymm1, %ymm2 vmovaps LCPI8_38(%rip), %ymm3 ## ymm3 = [6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01] vmulps %ymm3, %ymm1, %ymm3 vsubps %ymm3, %ymm0, %ymm0 vmovaps LCPI8_39(%rip), %ymm3 ## ymm3 = [1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06] vmulps %ymm3, %ymm1, %ymm1 vsubps %ymm1, %ymm0, %ymm0 vmovaps LCPI8_40(%rip), %ymm1 ## ymm1 = [2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04] vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_41(%rip), %ymm14 ## ymm14 = [1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03] vaddps %ymm14, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_42(%rip), %ymm13 ## ymm13 = [8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03] vaddps %ymm13, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_43(%rip), %ymm12 ## ymm12 = [4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02] vaddps %ymm12, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_44(%rip), %ymm11 ## ymm11 = [1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01] vaddps %ymm11, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_45(%rip), %ymm6 ## ymm6 = [4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01] vaddps %ymm6, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vaddps %ymm8, %ymm0, %ymm0 vmovdqa LCPI8_46(%rip), %xmm7 ## xmm7 = [127,127,127,127] vpaddd %xmm7, %xmm2, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpaddd %xmm7, %xmm3, %xmm4 vpcmpgtd %xmm7, %xmm3, %xmm3 vpcmpgtd %xmm7, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vmovdqa LCPI8_47(%rip), %xmm10 ## xmm10 = [1,1,1,1] vpcmpgtd %xmm4, %xmm10, %xmm3 vpcmpgtd %xmm1, %xmm10, %xmm5 vinsertf128 $1, %xmm3, %ymm5, %ymm3 vpslld $23, %xmm1, %xmm1 vpslld $23, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm1, %ymm1 vmulps %ymm0, %ymm1, %ymm0 vmovaps LCPI8_48(%rip), %ymm1 ## ymm1 = [inf,inf,inf,inf,inf,inf,inf,inf] vblendvps %ymm2, %ymm1, %ymm0, %ymm0 vblendvps %ymm3, %ymm15, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vmovups -32(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm15, %ymm1, %ymm1 vminps %ymm8, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm15, %ymm1 vcmpnltps %ymm3, %ymm15, %ymm2 vblendvps %ymm2, %ymm8, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm4 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vpaddd %xmm9, %xmm5, %xmm5 vpaddd %xmm9, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovaps LCPI8_24(%rip), %ymm0 ## ymm0 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm0, %ymm3, %ymm3 vorps LCPI8_5(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm8, %ymm3 vmulps LCPI8_25(%rip), %ymm3, %ymm5 vaddps LCPI8_26(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_27(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_28(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_29(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_30(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_31(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_32(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps LCPI8_33(%rip), %ymm5, %ymm5 vmulps %ymm5, %ymm3, %ymm5 vaddps %ymm8, %ymm5, %ymm5 vsubps %ymm3, %ymm15, %ymm3 vmulps %ymm5, %ymm3, %ymm3 vcvtdq2ps %ymm4, %ymm4 vmulps LCPI8_34(%rip), %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI8_35(%rip), %ymm4 ## ymm4 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI8_36(%rip), %ymm4, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps LCPI8_49(%rip), %ymm1, %ymm1 vmulps LCPI8_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm3 vmulps LCPI8_38(%rip), %ymm2, %ymm4 vsubps %ymm4, %ymm1, %ymm1 vmulps LCPI8_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm1 vmulps LCPI8_40(%rip), %ymm1, %ymm2 vaddps %ymm14, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm13, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm12, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm11, %ymm2, %ymm2 vmovaps %ymm11, %ymm12 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm6, %ymm2, %ymm2 vmovaps %ymm6, %ymm11 vmulps %ymm2, %ymm1, %ymm2 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm1, %ymm1 vaddps %ymm8, %ymm1, %ymm1 vpaddd %xmm7, %xmm3, %xmm2 vextractf128 $1, %ymm3, %xmm4 vpaddd %xmm7, %xmm4, %xmm5 vpcmpgtd %xmm7, %xmm4, %xmm4 vpcmpgtd %xmm7, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vpcmpgtd %xmm5, %xmm10, %xmm4 vpcmpgtd %xmm2, %xmm10, %xmm6 vinsertf128 $1, %xmm4, %ymm6, %ymm4 vpslld $23, %xmm2, %xmm2 vpslld $23, %xmm5, %xmm5 vinsertf128 $1, %xmm5, %ymm2, %ymm2 vmulps %ymm1, %ymm2, %ymm1 vblendvps %ymm3, LCPI8_48(%rip), %ymm1, %ymm1 vblendvps %ymm4, %ymm15, %ymm1, %ymm1 vmovups 64(%rsp), %ymm2 ## 32-byte Reload vmaxps %ymm15, %ymm2, %ymm2 vminps %ymm8, %ymm2, %ymm4 vcmpnleps %ymm4, %ymm15, %ymm2 vcmpnltps %ymm4, %ymm15, %ymm3 vblendvps %ymm3, %ymm8, %ymm4, %ymm4 vpsrad $23, %xmm4, %xmm5 vextractf128 $1, %ymm4, %xmm6 vpsrad $23, %xmm6, %xmm6 vpaddd %xmm9, %xmm6, %xmm6 vpaddd %xmm9, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vandps %ymm0, %ymm4, %ymm4 vorps LCPI8_5(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm8, %ymm4 vmulps LCPI8_25(%rip), %ymm4, %ymm6 vaddps LCPI8_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps %ymm8, %ymm6, %ymm6 vsubps %ymm4, %ymm15, %ymm4 vmulps %ymm6, %ymm4, %ymm4 vcvtdq2ps %ymm5, %ymm5 vmulps LCPI8_34(%rip), %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vmovaps LCPI8_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm2, LCPI8_36(%rip), %ymm0, %ymm2 vblendvps %ymm3, %ymm2, %ymm4, %ymm2 vmulps LCPI8_49(%rip), %ymm2, %ymm2 vmulps LCPI8_37(%rip), %ymm2, %ymm3 vroundps $9, %ymm3, %ymm3 vcvttps2dq %ymm3, %ymm4 vmulps LCPI8_38(%rip), %ymm3, %ymm5 vsubps %ymm5, %ymm2, %ymm2 vmulps LCPI8_39(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm2, %ymm2 vmulps LCPI8_40(%rip), %ymm2, %ymm3 vaddps %ymm14, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm13, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_43(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm12, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm8, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm2 vpaddd %xmm7, %xmm4, %xmm3 vextractf128 $1, %ymm4, %xmm5 vpaddd %xmm7, %xmm5, %xmm6 vpcmpgtd %xmm7, %xmm5, %xmm5 vpcmpgtd %xmm7, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vpcmpgtd %xmm6, %xmm10, %xmm5 vpcmpgtd %xmm3, %xmm10, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vpslld $23, %xmm3, %xmm3 vpslld $23, %xmm6, %xmm6 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vaddps %ymm8, %ymm2, %ymm2 vmulps %ymm2, %ymm3, %ymm2 vblendvps %ymm4, LCPI8_48(%rip), %ymm2, %ymm2 vblendvps %ymm5, %ymm15, %ymm2, %ymm2 vmovaps LCPI8_50(%rip), %ymm3 ## ymm3 = [2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02,2.550000e+02] vmovaps %ymm3, %ymm4 vmulps -64(%rsp), %ymm4, %ymm0 ## 32-byte Folded Reload vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm3 vmovdqa LCPI8_51(%rip), %xmm5 ## xmm5 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm5, %xmm3, %xmm3 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm3, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm3[0] leal (%r15,%r13), %eax cltq vmovdqa LCPI8_52(%rip), %xmm3 ## xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> vpshufb %xmm3, %xmm0, %xmm0 movq 2688(%rsp), %rcx vmovq %xmm0, (%rcx,%rax) vmulps %ymm4, %ymm1, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] vpshufb %xmm3, %xmm0, %xmm0 vmovq %xmm0, (%r9,%rax) vmulps %ymm4, %ymm2, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vpshufb %xmm5, %xmm1, %xmm1 vpshufb %xmm5, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] vpshufb %xmm3, %xmm0, %xmm0 movq 2704(%rsp), %rcx vmovq %xmm0, (%rcx,%rax) addl $8, %r15d cmpl 60(%rsp), %r15d ## 4-byte Folded Reload vxorps %xmm8, %xmm8, %xmm8 jl LBB8_66 jmp LBB8_63 .p2align 4, 0x90 LBB8_62: ## in Loop: Header=BB8_61 Depth=1 movl %edi, %r15d LBB8_63: ## %partial_inner_all_outer170 ## in Loop: Header=BB8_61 Depth=1 cmpl -104(%rsp), %r15d ## 4-byte Folded Reload jge LBB8_64 ## BB#85: ## %partial_inner_only1646 ## in Loop: Header=BB8_61 Depth=1 vmovd %r15d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI8_3(%rip), %xmm0, %xmm8 vpaddd LCPI8_4(%rip), %xmm0, %xmm9 vmovups 2080(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm0 vpcmpgtd %xmm9, %xmm0, %xmm0 vpcmpgtd %xmm8, %xmm1, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm7 movl -68(%rsp), %r10d ## 4-byte Reload movl %r10d, %ecx imull -112(%rsp), %ecx ## 4-byte Folded Reload vmovd %ecx, %xmm0 vpshufd $0, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,0,0] vpaddd %xmm1, %xmm9, %xmm0 vpaddd %xmm1, %xmm8, %xmm1 vpslld $2, %xmm0, %xmm5 vpslld $2, %xmm1, %xmm2 vpmovsxdq %xmm2, %xmm4 vpshufd $78, %xmm2, %xmm2 ## xmm2 = xmm2[2,3,0,1] vpmovsxdq %xmm2, %xmm6 vpmovsxdq %xmm5, %xmm2 vmovq (%r11), %xmm3 ## xmm3 = mem[0],zero vpshufd $68, %xmm3, %xmm3 ## xmm3 = xmm3[0,1,0,1] vpaddq %xmm2, %xmm3, %xmm2 vpaddq %xmm6, %xmm3, %xmm6 vpaddq %xmm4, %xmm3, %xmm4 vinsertf128 $1, %xmm6, %ymm4, %ymm4 vmovups %ymm7, 608(%rsp) ## 32-byte Spill vmovmskps %ymm7, %ebp movq %rbp, %r14 andq $1, %r14 jne LBB8_87 ## BB#86: ## in Loop: Header=BB8_61 Depth=1 vmovups 640(%rsp), %ymm7 ## 32-byte Reload jmp LBB8_88 .p2align 4, 0x90 LBB8_64: ## in Loop: Header=BB8_61 Depth=1 movl -68(%rsp), %edx ## 4-byte Reload movl -108(%rsp), %edi ## 4-byte Reload vmovdqu 640(%rsp), %ymm3 ## 32-byte Reload vmovdqa 560(%rsp), %xmm6 ## 16-byte Reload vmovaps 544(%rsp), %xmm10 ## 16-byte Reload vmovups 480(%rsp), %ymm4 ## 32-byte Reload vmovups 448(%rsp), %ymm5 ## 32-byte Reload vmovups 416(%rsp), %ymm14 ## 32-byte Reload vmovaps 528(%rsp), %xmm7 ## 16-byte Reload jmp LBB8_70 .p2align 4, 0x90 LBB8_87: ## %pl_dolane.i14005 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm4, %rax vmovss (%rax), %xmm6 ## xmm6 = mem[0],zero,zero,zero vmovups 640(%rsp), %ymm7 ## 32-byte Reload vblendps $1, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0],ymm7[1,2,3,4,5,6,7] LBB8_88: ## %pl_loopend.i14008 ## in Loop: Header=BB8_61 Depth=1 vpshufd $78, %xmm5, %xmm5 ## xmm5 = xmm5[2,3,0,1] movq %rbp, %r12 andq $2, %r12 je LBB8_90 ## BB#89: ## %pl_dolane.1.i14012 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm4, %rax vinsertps $16, (%rax), %xmm7, %xmm6 ## xmm6 = xmm7[0],mem[0],xmm7[2,3] vblendps $15, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0,1,2,3],ymm7[4,5,6,7] LBB8_90: ## %pl_loopend.1.i14015 ## in Loop: Header=BB8_61 Depth=1 vmovaps LCPI8_9(%rip), %ymm10 ## ymm10 = [260046848,260046848,260046848,260046848,260046848,260046848,260046848,260046848] vmovaps LCPI8_14(%rip), %ymm13 ## ymm13 = [32768,32768,32768,32768,32768,32768,32768,32768] vmovdqa LCPI8_15(%rip), %xmm14 ## xmm14 = [260046848,260046848,260046848,260046848] vpmovsxdq %xmm5, %xmm5 movq %rbp, %rdi andq $4, %rdi je LBB8_92 ## BB#91: ## %pl_dolane.2.i14018 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm4, %xmm6 vmovq %xmm6, %rax vinsertps $32, (%rax), %xmm7, %xmm6 ## xmm6 = xmm7[0,1],mem[0],xmm7[3] vblendps $15, %ymm6, %ymm7, %ymm7 ## ymm7 = ymm6[0,1,2,3],ymm7[4,5,6,7] LBB8_92: ## %pl_loopend.2.i14021 ## in Loop: Header=BB8_61 Depth=1 vxorps %xmm11, %xmm11, %xmm11 vpaddq %xmm5, %xmm3, %xmm3 movq %rbp, %rbx andq $8, %rbx movl %ecx, 1152(%rsp) ## 4-byte Spill je LBB8_94 ## BB#93: ## %pl_dolane.3.i14024 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm4, %xmm4 vpextrq $1, %xmm4, %rax vinsertps $48, (%rax), %xmm7, %xmm4 ## xmm4 = xmm7[0,1,2],mem[0] vblendps $15, %ymm4, %ymm7, %ymm7 ## ymm7 = ymm4[0,1,2,3],ymm7[4,5,6,7] LBB8_94: ## %pl_loopend.3.i14027 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm3, %ymm2, %ymm2 movq %rbp, %rcx andq $16, %rcx je LBB8_96 ## BB#95: ## %pl_dolane.4.i14030 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm2, %rax vextractf128 $1, %ymm7, %xmm3 vmovss (%rax), %xmm4 ## xmm4 = mem[0],zero,zero,zero vblendps $1, %xmm4, %xmm3, %xmm3 ## xmm3 = xmm4[0],xmm3[1,2,3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB8_96: ## %pl_loopend.4.i14033 ## in Loop: Header=BB8_61 Depth=1 movq %rbp, %rdx andq $32, %rdx je LBB8_98 ## BB#97: ## %pl_dolane.5.i14036 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm2, %rax vextractf128 $1, %ymm7, %xmm3 vinsertps $16, (%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB8_98: ## %pl_loopend.5.i14039 ## in Loop: Header=BB8_61 Depth=1 movq %rbp, %rsi andq $64, %rsi vmovdqu %ymm8, -32(%rsp) ## 32-byte Spill vmovdqa %xmm9, -64(%rsp) ## 16-byte Spill je LBB8_100 ## BB#99: ## %pl_dolane.6.i14042 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm2, %xmm3 vmovq %xmm3, %rax vextractf128 $1, %ymm7, %xmm3 vinsertps $32, (%rax), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] vinsertf128 $1, %xmm3, %ymm7, %ymm7 LBB8_100: ## %pl_loopend.6.i14044 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm0, %ymm1, %ymm9 testb %bpl, %bpl jns LBB8_102 ## BB#101: ## %pl_dolane.7.i14047 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm2, %xmm0 vpextrq $1, %xmm0, %rax vextractf128 $1, %ymm7, %xmm0 vinsertps $48, (%rax), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[0] vinsertf128 $1, %xmm0, %ymm7, %ymm7 LBB8_102: ## %__gather64_float.exit ## in Loop: Header=BB8_61 Depth=1 vmovups %ymm7, 640(%rsp) ## 32-byte Spill vmovaps 800(%rsp), %xmm0 ## 16-byte Reload vpermilps $0, %xmm0, %xmm1 ## xmm1 = xmm0[0,0,0,0] vpaddd %xmm9, %xmm9, %xmm3 vextractf128 $1, %ymm9, %xmm12 vpaddd %xmm12, %xmm12, %xmm4 vpmovsxdq %xmm4, %xmm0 vpmovsxdq %xmm3, %xmm2 vpshufd $78, %xmm3, %xmm3 ## xmm3 = xmm3[2,3,0,1] vpmovsxdq %xmm3, %xmm3 vmovq 8(%r11), %xmm5 ## xmm5 = mem[0],zero vpshufd $68, %xmm5, %xmm6 ## xmm6 = xmm5[0,1,0,1] vpaddq %xmm0, %xmm6, %xmm5 vpaddq %xmm3, %xmm6, %xmm7 vpaddq %xmm2, %xmm6, %xmm8 vinsertf128 $1, %xmm7, %ymm8, %ymm7 testq %r14, %r14 jne LBB8_104 ## BB#103: ## in Loop: Header=BB8_61 Depth=1 vmovdqa 560(%rsp), %xmm8 ## 16-byte Reload jmp LBB8_105 .p2align 4, 0x90 LBB8_104: ## %pl_dolane.i13931 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm7, %rax vmovdqa 560(%rsp), %xmm8 ## 16-byte Reload vpinsrw $0, (%rax), %xmm8, %xmm8 LBB8_105: ## %pl_loopend.i13934 ## in Loop: Header=BB8_61 Depth=1 vpshufd $78, %xmm4, %xmm4 ## xmm4 = xmm4[2,3,0,1] testq %r12, %r12 je LBB8_107 ## BB#106: ## %pl_dolane.1.i13940 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm7, %rax vpinsrw $1, (%rax), %xmm8, %xmm8 LBB8_107: ## %pl_loopend.1.i13943 ## in Loop: Header=BB8_61 Depth=1 vpmovsxdq %xmm4, %xmm4 testq %rdi, %rdi vmovups %ymm1, 64(%rsp) ## 32-byte Spill je LBB8_109 ## BB#108: ## %pl_dolane.2.i13948 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm7, %xmm1 vmovq %xmm1, %rax vpinsrw $2, (%rax), %xmm8, %xmm8 LBB8_109: ## %pl_loopend.2.i13951 ## in Loop: Header=BB8_61 Depth=1 vpaddq %xmm4, %xmm6, %xmm6 testq %rbx, %rbx je LBB8_111 ## BB#110: ## %pl_dolane.3.i13956 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm7, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $3, (%rax), %xmm8, %xmm8 LBB8_111: ## %pl_loopend.3.i13959 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm6, %ymm5, %ymm5 testq %rcx, %rcx je LBB8_113 ## BB#112: ## %pl_dolane.4.i13964 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm5, %rax vpinsrw $4, (%rax), %xmm8, %xmm8 LBB8_113: ## %pl_loopend.4.i13967 ## in Loop: Header=BB8_61 Depth=1 testq %rdx, %rdx je LBB8_115 ## BB#114: ## %pl_dolane.5.i13972 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm5, %rax vpinsrw $5, (%rax), %xmm8, %xmm8 LBB8_115: ## %pl_loopend.5.i13975 ## in Loop: Header=BB8_61 Depth=1 testq %rsi, %rsi je LBB8_117 ## BB#116: ## %pl_dolane.6.i13980 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm8, %xmm8 LBB8_117: ## %pl_loopend.6.i13982 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm4, %ymm0, %ymm0 vinsertf128 $1, %xmm3, %ymm2, %ymm4 testb %bpl, %bpl jns LBB8_119 ## BB#118: ## %pl_dolane.7.i13987 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm8, %xmm8 LBB8_119: ## %__gather64_i16.exit13988 ## in Loop: Header=BB8_61 Depth=1 vmovdqa LCPI8_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vpunpckhwd %xmm11, %xmm8, %xmm1 ## xmm1 = xmm8[4],xmm11[4],xmm8[5],xmm11[5],xmm8[6],xmm11[6],xmm8[7],xmm11[7] vpmovzxwd %xmm8, %xmm2 ## xmm2 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI8_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 576(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 352(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm2 vpaddd %xmm5, %xmm3, %xmm5 vmovdqu %ymm5, 256(%rsp) ## 32-byte Spill vpaddd LCPI8_12(%rip), %xmm3, %xmm3 vmovdqu %ymm3, 1088(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm2, %xmm3 vmovdqu %ymm3, 1120(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm6 vpslld $16, %xmm6, %xmm1 vmovdqu %ymm1, 384(%rsp) ## 32-byte Spill vmovups %ymm2, 288(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm2, %xmm1 vmovdqu %ymm1, 320(%rsp) ## 32-byte Spill vmovq 16(%r11), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vextractf128 $1, %ymm4, %xmm7 vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm5 vpaddq %xmm0, %xmm3, %xmm2 testq %r14, %r14 je LBB8_121 ## BB#120: ## %pl_dolane.i13855 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm5, %rax vmovdqa 32(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 32(%rsp) ## 16-byte Spill LBB8_121: ## %pl_loopend.i13858 ## in Loop: Header=BB8_61 Depth=1 testq %r12, %r12 vmovups %ymm6, 192(%rsp) ## 32-byte Spill je LBB8_123 ## BB#122: ## %pl_dolane.1.i13864 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm5, %rax vmovdqa 32(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 32(%rsp) ## 16-byte Spill LBB8_123: ## %pl_loopend.1.i13867 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm0, %xmm6 testq %rdi, %rdi je LBB8_125 ## BB#124: ## %pl_dolane.2.i13872 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vmovdqa 32(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 32(%rsp) ## 16-byte Spill LBB8_125: ## %pl_loopend.2.i13875 ## in Loop: Header=BB8_61 Depth=1 vpaddq %xmm6, %xmm3, %xmm3 testq %rbx, %rbx je LBB8_127 ## BB#126: ## %pl_dolane.3.i13880 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa 32(%rsp), %xmm1 ## 16-byte Reload vpinsrw $3, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 32(%rsp) ## 16-byte Spill LBB8_127: ## %pl_loopend.3.i13883 ## in Loop: Header=BB8_61 Depth=1 vmovdqa LCPI8_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vinsertf128 $1, %xmm3, %ymm2, %ymm2 testq %rcx, %rcx jne LBB8_129 ## BB#128: ## in Loop: Header=BB8_61 Depth=1 vmovdqa 32(%rsp), %xmm3 ## 16-byte Reload jmp LBB8_130 .p2align 4, 0x90 LBB8_129: ## %pl_dolane.4.i13888 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm2, %rax vmovdqa 32(%rsp), %xmm3 ## 16-byte Reload vpinsrw $4, (%rax), %xmm3, %xmm3 LBB8_130: ## %pl_loopend.4.i13891 ## in Loop: Header=BB8_61 Depth=1 testq %rdx, %rdx je LBB8_132 ## BB#131: ## %pl_dolane.5.i13896 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm2, %rax vpinsrw $5, (%rax), %xmm3, %xmm3 LBB8_132: ## %pl_loopend.5.i13899 ## in Loop: Header=BB8_61 Depth=1 testq %rsi, %rsi je LBB8_134 ## BB#133: ## %pl_dolane.6.i13904 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm3, %xmm3 LBB8_134: ## %pl_loopend.6.i13906 ## in Loop: Header=BB8_61 Depth=1 testb %bpl, %bpl jns LBB8_136 ## BB#135: ## %pl_dolane.7.i13911 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm3, %xmm3 LBB8_136: ## %__gather64_i16.exit13912 ## in Loop: Header=BB8_61 Depth=1 vpunpckhwd %xmm11, %xmm3, %xmm1 ## xmm1 = xmm3[4],xmm11[4],xmm3[5],xmm11[5],xmm3[6],xmm11[6],xmm3[7],xmm11[7] vmovdqa %xmm3, 32(%rsp) ## 16-byte Spill vpmovzxwd %xmm3, %xmm2 ## xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI8_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 736(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 768(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm2 vpaddd %xmm5, %xmm3, %xmm5 vmovdqu %ymm5, 960(%rsp) ## 32-byte Spill vpaddd LCPI8_12(%rip), %xmm3, %xmm3 vmovdqu %ymm3, 896(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm2, %xmm3 vmovdqu %ymm3, 864(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm1 vmovups %ymm1, 1056(%rsp) ## 32-byte Spill vpslld $16, %xmm1, %xmm1 vmovdqu %ymm1, 1024(%rsp) ## 32-byte Spill vmovups %ymm2, 992(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm2, %xmm1 vmovdqu %ymm1, 928(%rsp) ## 32-byte Spill vmovq 24(%r11), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm5 vpaddq %xmm0, %xmm3, %xmm2 testq %r14, %r14 je LBB8_138 ## BB#137: ## %pl_dolane.i13779 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm5, %rax vmovdqa 16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 16(%rsp) ## 16-byte Spill LBB8_138: ## %pl_loopend.i13782 ## in Loop: Header=BB8_61 Depth=1 testq %r12, %r12 je LBB8_140 ## BB#139: ## %pl_dolane.1.i13788 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm5, %rax vmovdqa 16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 16(%rsp) ## 16-byte Spill LBB8_140: ## %pl_loopend.1.i13791 ## in Loop: Header=BB8_61 Depth=1 testq %rdi, %rdi je LBB8_142 ## BB#141: ## %pl_dolane.2.i13796 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vmovq %xmm1, %rax vmovdqa 16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 16(%rsp) ## 16-byte Spill LBB8_142: ## %pl_loopend.2.i13799 ## in Loop: Header=BB8_61 Depth=1 vpaddq %xmm6, %xmm3, %xmm3 testq %rbx, %rbx je LBB8_144 ## BB#143: ## %pl_dolane.3.i13804 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa 16(%rsp), %xmm1 ## 16-byte Reload vpinsrw $3, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 16(%rsp) ## 16-byte Spill LBB8_144: ## %pl_loopend.3.i13807 ## in Loop: Header=BB8_61 Depth=1 vmovdqa LCPI8_10(%rip), %xmm5 ## xmm5 = [939524096,939524096,939524096,939524096] vinsertf128 $1, %xmm3, %ymm2, %ymm2 testq %rcx, %rcx jne LBB8_146 ## BB#145: ## in Loop: Header=BB8_61 Depth=1 vmovdqa 16(%rsp), %xmm3 ## 16-byte Reload jmp LBB8_147 .p2align 4, 0x90 LBB8_146: ## %pl_dolane.4.i13812 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm2, %rax vmovdqa 16(%rsp), %xmm3 ## 16-byte Reload vpinsrw $4, (%rax), %xmm3, %xmm3 LBB8_147: ## %pl_loopend.4.i13815 ## in Loop: Header=BB8_61 Depth=1 testq %rdx, %rdx je LBB8_149 ## BB#148: ## %pl_dolane.5.i13820 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm2, %rax vpinsrw $5, (%rax), %xmm3, %xmm3 LBB8_149: ## %pl_loopend.5.i13823 ## in Loop: Header=BB8_61 Depth=1 testq %rsi, %rsi je LBB8_151 ## BB#150: ## %pl_dolane.6.i13828 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm3, %xmm3 LBB8_151: ## %pl_loopend.6.i13830 ## in Loop: Header=BB8_61 Depth=1 testb %bpl, %bpl jns LBB8_153 ## BB#152: ## %pl_dolane.7.i13835 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm2, %xmm1 vpextrq $1, %xmm1, %rax vpinsrw $7, (%rax), %xmm3, %xmm3 LBB8_153: ## %__gather64_i16.exit13836 ## in Loop: Header=BB8_61 Depth=1 vpunpckhwd %xmm11, %xmm3, %xmm1 ## xmm1 = xmm3[4],xmm11[4],xmm3[5],xmm11[5],xmm3[6],xmm11[6],xmm3[7],xmm11[7] vmovdqa %xmm3, 16(%rsp) ## 16-byte Spill vpmovzxwd %xmm3, %xmm2 ## xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero vinsertf128 $1, %xmm1, %ymm2, %ymm1 vandps LCPI8_8(%rip), %ymm1, %ymm2 vpslld $13, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm2 vpslld $13, %xmm2, %xmm2 vmovdqa %xmm2, 1328(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups %ymm2, 2560(%rsp) ## 32-byte Spill vandps %ymm10, %ymm2, %ymm15 vpaddd %xmm5, %xmm3, %xmm2 vmovdqu %ymm2, 2496(%rsp) ## 32-byte Spill vpaddd LCPI8_12(%rip), %xmm3, %xmm2 vmovdqu %ymm2, 2400(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm15, %xmm2 vmovdqu %ymm2, 2432(%rsp) ## 32-byte Spill vandps %ymm13, %ymm1, %ymm1 vmovups %ymm1, 832(%rsp) ## 32-byte Spill vpslld $16, %xmm1, %xmm1 vmovdqu %ymm1, 800(%rsp) ## 32-byte Spill vmovq 32(%r11), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm3 ## xmm3 = xmm1[0,1,0,1] vpaddq %xmm7, %xmm3, %xmm1 vpaddq %xmm4, %xmm3, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm4 vmovups %ymm15, 2528(%rsp) ## 32-byte Spill vpcmpeqd %xmm14, %xmm15, %xmm1 vpaddq %xmm0, %xmm3, %xmm0 testq %r14, %r14 vmovdqu %ymm1, 2464(%rsp) ## 32-byte Spill je LBB8_155 ## BB#154: ## %pl_dolane.i13704 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm4, %rax vmovdqa 112(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 112(%rsp) ## 16-byte Spill LBB8_155: ## %pl_loopend.i13707 ## in Loop: Header=BB8_61 Depth=1 testq %r12, %r12 je LBB8_157 ## BB#156: ## %pl_dolane.1.i13713 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm4, %rax vmovdqa 112(%rsp), %xmm1 ## 16-byte Reload vpinsrw $1, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 112(%rsp) ## 16-byte Spill LBB8_157: ## %pl_loopend.1.i13716 ## in Loop: Header=BB8_61 Depth=1 testq %rdi, %rdi je LBB8_159 ## BB#158: ## %pl_dolane.2.i13721 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm4, %xmm1 vmovq %xmm1, %rax vmovdqa 112(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, (%rax), %xmm1, %xmm1 vmovdqa %xmm1, 112(%rsp) ## 16-byte Spill LBB8_159: ## %pl_loopend.2.i13724 ## in Loop: Header=BB8_61 Depth=1 vpaddq %xmm6, %xmm3, %xmm2 testq %rbx, %rbx jne LBB8_161 ## BB#160: ## in Loop: Header=BB8_61 Depth=1 vmovdqa 112(%rsp), %xmm6 ## 16-byte Reload jmp LBB8_162 .p2align 4, 0x90 LBB8_161: ## %pl_dolane.3.i13729 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm4, %xmm1 vpextrq $1, %xmm1, %rax vmovdqa 112(%rsp), %xmm6 ## 16-byte Reload vpinsrw $3, (%rax), %xmm6, %xmm6 LBB8_162: ## %pl_loopend.3.i13732 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm2, %ymm0, %ymm0 testq %rcx, %rcx je LBB8_164 ## BB#163: ## %pl_dolane.4.i13737 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm0, %rax vpinsrw $4, (%rax), %xmm6, %xmm6 LBB8_164: ## %pl_loopend.4.i13740 ## in Loop: Header=BB8_61 Depth=1 testq %rdx, %rdx je LBB8_166 ## BB#165: ## %pl_dolane.5.i13745 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm0, %rax vpinsrw $5, (%rax), %xmm6, %xmm6 LBB8_166: ## %pl_loopend.5.i13748 ## in Loop: Header=BB8_61 Depth=1 testq %rsi, %rsi je LBB8_168 ## BB#167: ## %pl_dolane.6.i13753 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm0, %xmm1 vmovq %xmm1, %rax vpinsrw $6, (%rax), %xmm6, %xmm6 LBB8_168: ## %pl_loopend.6.i13755 ## in Loop: Header=BB8_61 Depth=1 testb %bpl, %bpl vmovdqa %xmm8, 560(%rsp) ## 16-byte Spill jns LBB8_170 ## BB#169: ## %pl_dolane.7.i13760 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm0, %xmm0 vpextrq $1, %xmm0, %rax vpinsrw $7, (%rax), %xmm6, %xmm6 LBB8_170: ## %__gather64_i16.exit ## in Loop: Header=BB8_61 Depth=1 vpunpckhwd %xmm11, %xmm6, %xmm0 ## xmm0 = xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7] vpmovzxwd %xmm6, %xmm1 ## xmm1 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero vinsertf128 $1, %xmm0, %ymm1, %ymm0 vandps LCPI8_8(%rip), %ymm0, %ymm1 vpslld $13, %xmm1, %xmm2 vextractf128 $1, %ymm1, %xmm1 vpslld $13, %xmm1, %xmm1 vmovdqa %xmm1, 1312(%rsp) ## 16-byte Spill vinsertf128 $1, %xmm1, %ymm2, %ymm15 vandps %ymm10, %ymm15, %ymm8 vpaddd %xmm5, %xmm2, %xmm1 vmovdqu %ymm1, 2240(%rsp) ## 32-byte Spill vpaddd LCPI8_12(%rip), %xmm2, %xmm1 vmovdqu %ymm1, 2208(%rsp) ## 32-byte Spill vpcmpeqd %xmm11, %xmm8, %xmm1 vmovdqu %ymm1, 2176(%rsp) ## 32-byte Spill vandps %ymm13, %ymm0, %ymm0 vpslld $16, %xmm0, %xmm10 vpcmpeqd %xmm14, %xmm8, %xmm13 vpmovsxdq %xmm12, %xmm11 vpmovsxdq %xmm9, %xmm4 vpshufd $78, %xmm9, %xmm1 ## xmm1 = xmm9[2,3,0,1] vpmovsxdq %xmm1, %xmm5 vmovq 40(%r11), %xmm1 ## xmm1 = mem[0],zero vpshufd $68, %xmm1, %xmm7 ## xmm7 = xmm1[0,1,0,1] vpaddq %xmm5, %xmm7, %xmm1 vpaddq %xmm4, %xmm7, %xmm2 vinsertf128 $1, %xmm1, %ymm2, %ymm9 vpaddq %xmm11, %xmm7, %xmm3 testq %r14, %r14 vmovdqu %ymm10, 2304(%rsp) ## 32-byte Spill jne LBB8_172 ## BB#171: ## in Loop: Header=BB8_61 Depth=1 vmovdqa 544(%rsp), %xmm10 ## 16-byte Reload jmp LBB8_173 .p2align 4, 0x90 LBB8_172: ## %pl_dolane.i13628 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm9, %rax movzbl (%rax), %eax vmovdqa 544(%rsp), %xmm10 ## 16-byte Reload vpinsrw $0, %eax, %xmm10, %xmm10 LBB8_173: ## %pl_loopend.i13631 ## in Loop: Header=BB8_61 Depth=1 vmovups 416(%rsp), %ymm14 ## 32-byte Reload vpshufd $78, %xmm12, %xmm1 ## xmm1 = xmm12[2,3,0,1] testq %r12, %r12 je LBB8_175 ## BB#174: ## %pl_dolane.1.i13637 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm9, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm10, %xmm10 LBB8_175: ## %pl_loopend.1.i13640 ## in Loop: Header=BB8_61 Depth=1 vpmovsxdq %xmm1, %xmm1 testq %rdi, %rdi je LBB8_177 ## BB#176: ## %pl_dolane.2.i13645 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm9, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $2, %eax, %xmm10, %xmm10 LBB8_177: ## %pl_loopend.2.i13648 ## in Loop: Header=BB8_61 Depth=1 vpaddq %xmm1, %xmm7, %xmm2 testq %rbx, %rbx vmovups %ymm0, 2336(%rsp) ## 32-byte Spill je LBB8_179 ## BB#178: ## %pl_dolane.3.i13653 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm9, %xmm7 vpextrq $1, %xmm7, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm10, %xmm10 LBB8_179: ## %pl_loopend.3.i13656 ## in Loop: Header=BB8_61 Depth=1 vmovups -32(%rsp), %ymm7 ## 32-byte Reload vmovaps -64(%rsp), %xmm0 ## 16-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm3 testq %rcx, %rcx je LBB8_181 ## BB#180: ## %pl_dolane.4.i13661 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm3, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm10, %xmm10 LBB8_181: ## %pl_loopend.4.i13664 ## in Loop: Header=BB8_61 Depth=1 testq %rdx, %rdx je LBB8_183 ## BB#182: ## %pl_dolane.5.i13669 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm10, %xmm10 LBB8_183: ## %pl_loopend.5.i13672 ## in Loop: Header=BB8_61 Depth=1 testq %rsi, %rsi je LBB8_185 ## BB#184: ## %pl_dolane.6.i13677 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm3, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm10, %xmm10 LBB8_185: ## %pl_loopend.6.i13679 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm0, %ymm7, %ymm7 vinsertf128 $1, %xmm1, %ymm11, %ymm0 vinsertf128 $1, %xmm5, %ymm4, %ymm4 testb %bpl, %bpl vmovups %ymm15, 2368(%rsp) ## 32-byte Spill jns LBB8_187 ## BB#186: ## %pl_dolane.7.i13684 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm3, %xmm1 vpextrq $1, %xmm1, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm10, %xmm10 LBB8_187: ## %__gather64_i8.exit13685 ## in Loop: Header=BB8_61 Depth=1 vcvtdq2ps %ymm7, %ymm9 vpand LCPI8_53(%rip), %xmm10, %xmm1 vpmovzxwd %xmm1, %xmm5 ## xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero vmovq 48(%r11), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm3 ## xmm3 = xmm2[0,1,0,1] vpaddq %xmm0, %xmm3, %xmm7 vextractf128 $1, %ymm4, %xmm11 vpaddq %xmm11, %xmm3, %xmm2 vpaddq %xmm4, %xmm3, %xmm12 vinsertf128 $1, %xmm2, %ymm12, %ymm15 testq %r14, %r14 vmovdqa %xmm1, -32(%rsp) ## 16-byte Spill je LBB8_189 ## BB#188: ## %pl_dolane.i13560 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm15, %rax movzbl (%rax), %eax vmovdqa -96(%rsp), %xmm1 ## 16-byte Reload vpinsrw $0, %eax, %xmm1, %xmm1 vmovdqa %xmm1, -96(%rsp) ## 16-byte Spill LBB8_189: ## %pl_loopend.i13563 ## in Loop: Header=BB8_61 Depth=1 vmovups 640(%rsp), %ymm1 ## 32-byte Reload vaddps LCPI8_5(%rip), %ymm9, %ymm2 testq %r12, %r12 vmovdqu %ymm13, 2272(%rsp) ## 32-byte Spill vmovdqu %ymm5, -64(%rsp) ## 32-byte Spill je LBB8_191 ## BB#190: ## %pl_dolane.1.i13569 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm15, %rax movzbl (%rax), %eax vmovdqa -96(%rsp), %xmm5 ## 16-byte Reload vpinsrw $1, %eax, %xmm5, %xmm5 vmovdqa %xmm5, -96(%rsp) ## 16-byte Spill LBB8_191: ## %pl_loopend.1.i13572 ## in Loop: Header=BB8_61 Depth=1 vmulps 1920(%rsp), %ymm2, %ymm9 ## 32-byte Folded Reload vsubps 1856(%rsp), %ymm1, %ymm13 ## 32-byte Folded Reload vextractf128 $1, %ymm0, %xmm12 testq %rdi, %rdi je LBB8_193 ## BB#192: ## %pl_dolane.2.i13577 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm15, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vmovdqa -96(%rsp), %xmm1 ## 16-byte Reload vpinsrw $2, %eax, %xmm1, %xmm1 vmovdqa %xmm1, -96(%rsp) ## 16-byte Spill LBB8_193: ## %pl_loopend.2.i13580 ## in Loop: Header=BB8_61 Depth=1 vaddps LCPI8_6(%rip), %ymm9, %ymm9 vmovups 1888(%rsp), %ymm1 ## 32-byte Reload vdivps %ymm13, %ymm1, %ymm1 vmovups %ymm1, 128(%rsp) ## 32-byte Spill vmovups 64(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm1, %ymm13 vpaddq %xmm12, %xmm3, %xmm3 testq %rbx, %rbx je LBB8_195 ## BB#194: ## %pl_dolane.3.i13585 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm15, %xmm2 vpextrq $1, %xmm2, %rax movzbl (%rax), %eax vmovdqa -96(%rsp), %xmm1 ## 16-byte Reload vpinsrw $3, %eax, %xmm1, %xmm1 vmovdqa %xmm1, -96(%rsp) ## 16-byte Spill LBB8_195: ## %pl_loopend.3.i13588 ## in Loop: Header=BB8_61 Depth=1 vmovups 128(%rsp), %ymm1 ## 32-byte Reload vmulps %ymm1, %ymm9, %ymm9 vmulps %ymm1, %ymm13, %ymm13 vinsertf128 $1, %xmm3, %ymm7, %ymm3 testq %rcx, %rcx je LBB8_197 ## BB#196: ## %pl_dolane.4.i13593 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm3, %rax movzbl (%rax), %eax vmovdqa -96(%rsp), %xmm1 ## 16-byte Reload vpinsrw $4, %eax, %xmm1, %xmm1 vmovdqa %xmm1, -96(%rsp) ## 16-byte Spill LBB8_197: ## %pl_loopend.4.i13596 ## in Loop: Header=BB8_61 Depth=1 vdivps 1824(%rsp), %ymm9, %ymm1 ## 32-byte Folded Reload vmovups %ymm1, 160(%rsp) ## 32-byte Spill vdivps 1792(%rsp), %ymm13, %ymm1 ## 32-byte Folded Reload vmovups %ymm1, 64(%rsp) ## 32-byte Spill testq %rdx, %rdx je LBB8_199 ## BB#198: ## %pl_dolane.5.i13601 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vmovdqa -96(%rsp), %xmm1 ## 16-byte Reload vpinsrw $5, %eax, %xmm1, %xmm1 vmovdqa %xmm1, -96(%rsp) ## 16-byte Spill LBB8_199: ## %pl_loopend.5.i13604 ## in Loop: Header=BB8_61 Depth=1 vmovups 448(%rsp), %ymm13 ## 32-byte Reload vmovups 160(%rsp), %ymm1 ## 32-byte Reload vmulps %ymm1, %ymm1, %ymm7 vmovups 64(%rsp), %ymm1 ## 32-byte Reload vmulps %ymm1, %ymm1, %ymm2 testq %rsi, %rsi vmovdqa %xmm6, 112(%rsp) ## 16-byte Spill jne LBB8_201 ## BB#200: ## in Loop: Header=BB8_61 Depth=1 vmovdqa -96(%rsp), %xmm1 ## 16-byte Reload jmp LBB8_202 .p2align 4, 0x90 LBB8_201: ## %pl_dolane.6.i13609 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm3, %xmm6 vmovq %xmm6, %rax movzbl (%rax), %eax vmovdqa -96(%rsp), %xmm1 ## 16-byte Reload vpinsrw $6, %eax, %xmm1, %xmm1 LBB8_202: ## %pl_loopend.6.i13611 ## in Loop: Header=BB8_61 Depth=1 vaddps %ymm2, %ymm7, %ymm7 vmovups 128(%rsp), %ymm2 ## 32-byte Reload vmulps %ymm2, %ymm2, %ymm2 testb %bpl, %bpl jns LBB8_204 ## BB#203: ## %pl_dolane.7.i13616 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm3, %xmm3 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm1, %xmm1 LBB8_204: ## %__gather64_i8.exit13617 ## in Loop: Header=BB8_61 Depth=1 vaddps %ymm7, %ymm2, %ymm9 vpand LCPI8_53(%rip), %xmm1, %xmm15 vpmovzxwd %xmm15, %xmm6 ## xmm6 = xmm15[0],zero,xmm15[1],zero,xmm15[2],zero,xmm15[3],zero vmovq 56(%r11), %xmm2 ## xmm2 = mem[0],zero vpshufd $68, %xmm2, %xmm3 ## xmm3 = xmm2[0,1,0,1] vpaddq %xmm0, %xmm3, %xmm0 vpaddq %xmm11, %xmm3, %xmm2 vpaddq %xmm4, %xmm3, %xmm4 vinsertf128 $1, %xmm2, %ymm4, %ymm5 testq %r14, %r14 jne LBB8_206 ## BB#205: ## in Loop: Header=BB8_61 Depth=1 vmovdqa 528(%rsp), %xmm7 ## 16-byte Reload jmp LBB8_207 .p2align 4, 0x90 LBB8_206: ## %pl_dolane.i13515 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm5, %rax movzbl (%rax), %eax vmovdqa 528(%rsp), %xmm7 ## 16-byte Reload vpinsrw $0, %eax, %xmm7, %xmm7 LBB8_207: ## %pl_loopend.i13518 ## in Loop: Header=BB8_61 Depth=1 vmovups 128(%rsp), %ymm11 ## 32-byte Reload vrsqrtps %ymm9, %ymm4 testq %r12, %r12 je LBB8_209 ## BB#208: ## %pl_dolane.1.i13520 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm5, %rax movzbl (%rax), %eax vpinsrw $1, %eax, %xmm7, %xmm7 LBB8_209: ## %pl_loopend.1.i13523 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm9, %ymm4, %ymm2 testq %rdi, %rdi vmovdqa %xmm1, -96(%rsp) ## 16-byte Spill vmovdqu %ymm6, 2144(%rsp) ## 32-byte Spill je LBB8_211 ## BB#210: ## %pl_dolane.2.i13525 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm6 vmovq %xmm6, %rax movzbl (%rax), %eax vpinsrw $2, %eax, %xmm7, %xmm7 LBB8_211: ## %pl_loopend.2.i13528 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm2, %ymm4, %ymm9 vpaddq %xmm12, %xmm3, %xmm2 testq %rbx, %rbx je LBB8_213 ## BB#212: ## %pl_dolane.3.i13530 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm5, %xmm3 vpextrq $1, %xmm3, %rax movzbl (%rax), %eax vpinsrw $3, %eax, %xmm7, %xmm7 LBB8_213: ## %pl_loopend.3.i13533 ## in Loop: Header=BB8_61 Depth=1 vmovups 480(%rsp), %ymm6 ## 32-byte Reload vmovaps LCPI8_7(%rip), %ymm1 ## ymm1 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm9, %ymm1, %ymm3 vinsertf128 $1, %xmm2, %ymm0, %ymm0 testq %rcx, %rcx je LBB8_215 ## BB#214: ## %pl_dolane.4.i13535 ## in Loop: Header=BB8_61 Depth=1 vmovq %xmm0, %rax movzbl (%rax), %eax vpinsrw $4, %eax, %xmm7, %xmm7 LBB8_215: ## %pl_loopend.4.i13538 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm3, %ymm4, %ymm2 testq %rdx, %rdx je LBB8_217 ## BB#216: ## %pl_dolane.5.i13540 ## in Loop: Header=BB8_61 Depth=1 vpextrq $1, %xmm0, %rax movzbl (%rax), %eax vpinsrw $5, %eax, %xmm7, %xmm7 LBB8_217: ## %pl_loopend.5.i13543 ## in Loop: Header=BB8_61 Depth=1 vmulps LCPI8_5(%rip), %ymm2, %ymm3 testq %rsi, %rsi je LBB8_219 ## BB#218: ## %pl_dolane.6.i13545 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm0, %xmm2 vmovq %xmm2, %rax movzbl (%rax), %eax vpinsrw $6, %eax, %xmm7, %xmm7 LBB8_219: ## %pl_loopend.6.i13547 ## in Loop: Header=BB8_61 Depth=1 vmulps 160(%rsp), %ymm3, %ymm4 ## 32-byte Folded Reload vmulps 64(%rsp), %ymm3, %ymm5 ## 32-byte Folded Reload vmulps %ymm3, %ymm11, %ymm3 testb %bpl, %bpl vmovdqa %xmm10, 544(%rsp) ## 16-byte Spill jns LBB8_221 ## BB#220: ## %pl_dolane.7.i13549 ## in Loop: Header=BB8_61 Depth=1 vextractf128 $1, %ymm0, %xmm0 vpextrq $1, %xmm0, %rax movzbl (%rax), %eax vpinsrw $7, %eax, %xmm7, %xmm7 LBB8_221: ## %__gather64_i8.exit ## in Loop: Header=BB8_61 Depth=1 vmovups 608(%rsp), %ymm0 ## 32-byte Reload vblendvps %ymm0, %ymm4, %ymm14, %ymm14 vmovups %ymm14, 416(%rsp) ## 32-byte Spill vblendvps %ymm0, %ymm5, %ymm13, %ymm13 vmovups %ymm13, 448(%rsp) ## 32-byte Spill vblendvps %ymm0, %ymm3, %ymm6, %ymm6 vmovups %ymm6, 480(%rsp) ## 32-byte Spill vpand LCPI8_53(%rip), %xmm7, %xmm10 vpmovzxwd %xmm10, %xmm0 ## xmm0 = xmm10[0],zero,xmm10[1],zero,xmm10[2],zero,xmm10[3],zero testl %r8d, %r8d vmovdqa %xmm7, 528(%rsp) ## 16-byte Spill movq %r12, 672(%rsp) ## 8-byte Spill movq %rdi, 704(%rsp) ## 8-byte Spill movq %rbx, 1280(%rsp) ## 8-byte Spill movq %rcx, 1248(%rsp) ## 8-byte Spill movq %rdx, 1216(%rsp) ## 8-byte Spill movq %rsi, 1184(%rsp) ## 8-byte Spill jle LBB8_222 ## BB#270: ## %for_loop1815.lr.ph ## in Loop: Header=BB8_61 Depth=1 movq %r14, %rbx vmovdqu %ymm0, 2112(%rsp) ## 32-byte Spill vmovdqa LCPI8_12(%rip), %xmm0 ## xmm0 = [947912704,947912704,947912704,947912704] vmovdqa %xmm0, %xmm5 vmovdqa 576(%rsp), %xmm0 ## 16-byte Reload vpaddd %xmm5, %xmm0, %xmm2 vmovups 1088(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm1, %ymm2 vmovdqa LCPI8_10(%rip), %xmm4 ## xmm4 = [939524096,939524096,939524096,939524096] vpaddd %xmm4, %xmm0, %xmm3 vmovups 256(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm0, %ymm3 vmovups 288(%rsp), %ymm0 ## 32-byte Reload vextractf128 $1, %ymm0, %xmm11 vxorps %xmm0, %xmm0, %xmm0 vpcmpeqd %xmm0, %xmm11, %xmm6 vmovups 1120(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vmovaps LCPI8_13(%rip), %ymm14 ## ymm14 = [-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05,-6.103516e-05] vaddps %ymm14, %ymm2, %ymm2 vblendvps %ymm6, %ymm2, %ymm3, %ymm2 vmovdqa 736(%rsp), %xmm1 ## 16-byte Reload vpaddd %xmm5, %xmm1, %xmm3 vmovups 896(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 vpaddd %xmm4, %xmm1, %xmm6 vmovups 960(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vmovups 992(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm1 vpcmpeqd %xmm0, %xmm1, %xmm7 vmovups 864(%rsp), %ymm12 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm12, %ymm7 vaddps %ymm14, %ymm3, %ymm3 vblendvps %ymm7, %ymm3, %ymm6, %ymm6 vpcmpeqd LCPI8_15(%rip), %xmm11, %xmm3 vmovups 320(%rsp), %ymm7 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm7, %ymm3 vmovdqa %xmm15, %xmm9 vmovdqa %xmm5, %xmm15 vmovaps LCPI8_11(%rip), %ymm11 ## ymm11 = [1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192,1879048192] vorps 352(%rsp), %ymm11, %ymm5 ## 32-byte Folded Reload vblendvps %ymm3, %ymm5, %ymm2, %ymm12 vpcmpeqd LCPI8_15(%rip), %xmm1, %xmm1 vmovups 928(%rsp), %ymm2 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm2, %ymm1 vorps 768(%rsp), %ymm11, %ymm2 ## 32-byte Folded Reload vblendvps %ymm1, %ymm2, %ymm6, %ymm13 vmovdqa 1328(%rsp), %xmm2 ## 16-byte Reload vpaddd %xmm15, %xmm2, %xmm1 vmovups 2400(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm3, %ymm1 vpaddd %xmm4, %xmm2, %xmm2 vmovups 2496(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm3, %ymm2 vmovups 2528(%rsp), %ymm3 ## 32-byte Reload vextractf128 $1, %ymm3, %xmm6 vpcmpeqd %xmm0, %xmm6, %xmm7 vmovups 2432(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm3, %ymm7 vaddps %ymm14, %ymm1, %ymm1 vblendvps %ymm7, %ymm1, %ymm2, %ymm1 vmovdqa 1312(%rsp), %xmm3 ## 16-byte Reload vpaddd %xmm15, %xmm3, %xmm2 vmovups 2208(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm5, %ymm2 vpaddd %xmm4, %xmm3, %xmm7 vmovups 2240(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm3, %ymm7 vextractf128 $1, %ymm8, %xmm4 vpcmpeqd %xmm0, %xmm4, %xmm3 vmovups 2176(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm5, %ymm3 vaddps %ymm14, %ymm2, %ymm2 vmovdqa LCPI8_15(%rip), %xmm5 ## xmm5 = [260046848,260046848,260046848,260046848] vmovdqa %xmm5, %xmm14 vblendvps %ymm3, %ymm2, %ymm7, %ymm2 vpcmpeqd %xmm14, %xmm6, %xmm3 vmovups 2464(%rsp), %ymm5 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm5, %ymm3 vorps 2560(%rsp), %ymm11, %ymm6 ## 32-byte Folded Reload vblendvps %ymm3, %ymm6, %ymm1, %ymm8 vmovups 192(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm3 vpslld $16, %xmm3, %xmm3 vmovups 384(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm1, %ymm3 vmovups 1056(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm6 vpslld $16, %xmm6, %xmm6 vmovups 1024(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vorps %ymm3, %ymm12, %ymm3 vorps %ymm6, %ymm13, %ymm5 vpcmpeqd %xmm14, %xmm4, %xmm4 vmovups 2272(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm4, %ymm1, %ymm4 vorps 2368(%rsp), %ymm11, %ymm6 ## 32-byte Folded Reload vblendvps %ymm4, %ymm6, %ymm2, %ymm2 vmulps %ymm3, %ymm3, %ymm4 vmulps %ymm5, %ymm5, %ymm6 vsubps %ymm4, %ymm3, %ymm4 vsubps %ymm6, %ymm5, %ymm6 vaddps %ymm6, %ymm4, %ymm4 vmovdqa -32(%rsp), %xmm1 ## 16-byte Reload vpunpckhwd %xmm0, %xmm1, %xmm6 ## xmm6 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] vmovups -64(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm6, %ymm1, %ymm6 vpunpckhwd %xmm0, %xmm9, %xmm7 ## xmm7 = xmm9[4],xmm0[4],xmm9[5],xmm0[5],xmm9[6],xmm0[6],xmm9[7],xmm0[7] vxorps %xmm12, %xmm12, %xmm12 vmovups 2144(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm7, %ymm0, %ymm7 vmovups 832(%rsp), %ymm0 ## 32-byte Reload vextractf128 $1, %ymm0, %xmm0 vpslld $16, %xmm0, %xmm0 vmovups 800(%rsp), %ymm1 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm1, %ymm0 vmovups 2336(%rsp), %ymm1 ## 32-byte Reload vextractf128 $1, %ymm1, %xmm1 vpslld $16, %xmm1, %xmm1 vmovups 2304(%rsp), %ymm11 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm11, %ymm1 vmovaps LCPI8_16(%rip), %ymm11 ## ymm11 = [4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00,4.000000e+00] vmovaps %ymm11, %ymm13 vmulps %ymm13, %ymm4, %ymm11 vaddps LCPI8_6(%rip), %ymm11, %ymm11 vmulps %ymm13, %ymm3, %ymm3 vmulps %ymm13, %ymm5, %ymm5 vsqrtps %ymm11, %ymm11 vmovaps LCPI8_17(%rip), %ymm13 ## ymm13 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00] vaddps %ymm13, %ymm3, %ymm3 vaddps %ymm13, %ymm5, %ymm5 vmulps %ymm11, %ymm3, %ymm3 vmovups %ymm3, 192(%rsp) ## 32-byte Spill vmulps %ymm5, %ymm11, %ymm14 vmovups 128(%rsp), %ymm11 ## 32-byte Reload vmulps LCPI8_18(%rip), %ymm4, %ymm3 vcvtdq2ps %ymm6, %ymm4 vcvtdq2ps %ymm7, %ymm5 vmovaps LCPI8_7(%rip), %ymm6 ## ymm6 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm3, %ymm6, %ymm3 vmovups %ymm3, 384(%rsp) ## 32-byte Spill vorps %ymm0, %ymm8, %ymm0 vorps %ymm1, %ymm2, %ymm13 vmovaps LCPI8_19(%rip), %ymm1 ## ymm1 = [3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03,3.921569e-03] vmovaps %ymm1, %ymm2 vmulps %ymm2, %ymm4, %ymm1 vmovups %ymm1, 352(%rsp) ## 32-byte Spill vmulps %ymm2, %ymm5, %ymm1 vmovups %ymm1, 320(%rsp) ## 32-byte Spill vpunpckhwd %xmm12, %xmm10, %xmm1 ## xmm1 = xmm10[4],xmm12[4],xmm10[5],xmm12[5],xmm10[6],xmm12[6],xmm10[7],xmm12[7] vmovups 2112(%rsp), %ymm3 ## 32-byte Reload vinsertf128 $1, %xmm1, %ymm3, %ymm1 vcvtdq2ps %ymm1, %ymm1 vmulps %ymm2, %ymm1, %ymm1 vmovups %ymm1, 288(%rsp) ## 32-byte Spill movq 64(%r11), %r14 movq 72(%r11), %rcx movq 80(%r11), %rsi movl %r8d, %r12d movq 120(%r11), %r8 vaddps LCPI8_20(%rip), %ymm13, %ymm1 vmulps LCPI8_21(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vmovups %ymm0, 256(%rsp) ## 32-byte Spill vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill movq 2664(%rsp), %r13 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vxorps %ymm15, %ymm15, %ymm15 vmovups 160(%rsp), %ymm10 ## 32-byte Reload .p2align 4, 0x90 LBB8_271: ## %for_loop1815 ## Parent Loop BB8_61 Depth=1 ## => This Inner Loop Header: Depth=2 movslq (%r13), %r10 vmovss (%r8,%r10,4), %xmm1 ## xmm1 = mem[0],zero,zero,zero vbroadcastss (%r14,%r10,4), %ymm0 vsubps %ymm10, %ymm0, %ymm3 vbroadcastss (%rcx,%r10,4), %ymm0 vsubps 64(%rsp), %ymm0, %ymm2 ## 32-byte Folded Reload vbroadcastss (%rsi,%r10,4), %ymm0 vsubps %ymm11, %ymm0, %ymm0 vmulps %ymm3, %ymm3, %ymm4 vmulps %ymm2, %ymm2, %ymm5 vaddps %ymm5, %ymm4, %ymm4 vmulps %ymm0, %ymm0, %ymm5 vaddps %ymm5, %ymm4, %ymm4 vmulss %xmm1, %xmm1, %xmm5 vpermilps $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vinsertf128 $1, %xmm5, %ymm5, %ymm5 cmpl $255, %ebp jne LBB8_278 ## BB#272: ## %cif_mask_all1885 ## in Loop: Header=BB8_271 Depth=2 vcmpnleps %ymm4, %ymm5, %ymm6 vmovmskps %ymm6, %eax testl %eax, %eax je LBB8_279 ## BB#273: ## %cif_mask_all1885 ## in Loop: Header=BB8_271 Depth=2 cmpl $255, %eax jne LBB8_283 ## BB#274: ## %cif_test_all1894 ## in Loop: Header=BB8_271 Depth=2 vsqrtps %ymm4, %ymm6 vrcpps %ymm6, %ymm4 vmulps %ymm4, %ymm6, %ymm5 vmovaps LCPI8_20(%rip), %ymm7 ## ymm7 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm5, %ymm7, %ymm5 vmulps %ymm5, %ymm4, %ymm7 vmulps %ymm7, %ymm3, %ymm5 vmulps %ymm7, %ymm2, %ymm4 vmulps %ymm7, %ymm0, %ymm3 vmovups 192(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm5, %ymm9, %ymm0 vmulps %ymm4, %ymm14, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 384(%rsp), %ymm12 ## 32-byte Reload vmulps %ymm3, %ymm12, %ymm2 vaddps %ymm0, %ymm2, %ymm0 vcmpnleps LCPI8_55(%rip), %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB8_279 ## BB#275: ## %cif_test_all1894 ## in Loop: Header=BB8_271 Depth=2 cmpl $255, %eax jne LBB8_280 ## BB#276: ## %cif_test_all1936 ## in Loop: Header=BB8_271 Depth=2 movq 88(%r11), %rax vsubss (%rax,%r10,4), %xmm1, %xmm2 vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm2, %ymm1, %ymm10 vsubps 416(%rsp), %ymm5, %ymm2 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm5 vmulps %ymm4, %ymm4, %ymm6 vaddps %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm3, %ymm6 vaddps %ymm5, %ymm6, %ymm5 jmp LBB8_277 .p2align 4, 0x90 LBB8_278: ## %cif_mask_mixed1886 ## in Loop: Header=BB8_271 Depth=2 vcmpnleps %ymm4, %ymm5, %ymm5 vandps 608(%rsp), %ymm5, %ymm7 ## 32-byte Folded Reload vmovmskps %ymm7, %eax testl %eax, %eax je LBB8_279 ## BB#286: ## %safe_if_run_true2757 ## in Loop: Header=BB8_271 Depth=2 vsqrtps %ymm4, %ymm6 vrcpps %ymm6, %ymm4 vmulps %ymm4, %ymm6, %ymm5 vmovaps LCPI8_20(%rip), %ymm8 ## ymm8 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm5, %ymm8, %ymm5 vmulps %ymm5, %ymm4, %ymm8 vmulps %ymm8, %ymm3, %ymm4 vblendvps %ymm7, %ymm4, %ymm3, %ymm5 vmulps %ymm8, %ymm2, %ymm3 vblendvps %ymm7, %ymm3, %ymm2, %ymm4 vmulps %ymm8, %ymm0, %ymm2 vblendvps %ymm7, %ymm2, %ymm0, %ymm3 vmovups 192(%rsp), %ymm9 ## 32-byte Reload vmulps %ymm5, %ymm9, %ymm0 vmulps %ymm4, %ymm14, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 384(%rsp), %ymm12 ## 32-byte Reload vmulps %ymm3, %ymm12, %ymm2 vaddps %ymm2, %ymm0, %ymm0 cmpl $255, %eax jne LBB8_290 ## BB#287: ## %cif_mask_all2797 ## in Loop: Header=BB8_271 Depth=2 vcmpnleps LCPI8_55(%rip), %ymm0, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB8_279 ## BB#288: ## %cif_mask_all2797 ## in Loop: Header=BB8_271 Depth=2 cmpl $255, %eax jne LBB8_292 ## BB#289: ## %cif_test_all2806 ## in Loop: Header=BB8_271 Depth=2 movq 88(%r11), %rax vsubss (%rax,%r10,4), %xmm1, %xmm2 vpermilps $0, %xmm2, %xmm2 ## xmm2 = xmm2[0,0,0,0] vinsertf128 $1, %xmm2, %ymm2, %ymm2 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm2, %ymm1, %ymm10 vsubps 416(%rsp), %ymm5, %ymm2 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm2, %ymm2, %ymm5 vmulps %ymm4, %ymm4, %ymm6 vaddps %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm3, %ymm6 vaddps %ymm6, %ymm5, %ymm5 LBB8_277: ## %cif_done1887 ## in Loop: Header=BB8_271 Depth=2 vrsqrtps %ymm5, %ymm6 vmulps %ymm6, %ymm5, %ymm5 vmulps %ymm5, %ymm6, %ymm5 vmovaps LCPI8_7(%rip), %ymm7 ## ymm7 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm5, %ymm7, %ymm5 vmulps %ymm5, %ymm6, %ymm5 vmovaps LCPI8_5(%rip), %ymm6 ## ymm6 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm6, %ymm7 vmulps %ymm7, %ymm5, %ymm5 vmulps %ymm5, %ymm2, %ymm2 vmulps %ymm5, %ymm4, %ymm4 vmulps %ymm5, %ymm3, %ymm3 vmulps %ymm2, %ymm9, %ymm2 vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm2, %ymm2 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vxorps %ymm8, %ymm8, %ymm8 vmaxps %ymm8, %ymm2, %ymm4 vcmpnleps %ymm4, %ymm8, %ymm2 vcmpnltps %ymm4, %ymm8, %ymm3 vmovaps LCPI8_22(%rip), %ymm9 ## ymm9 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm3, %ymm9, %ymm4, %ymm4 vpsrad $23, %xmm4, %xmm5 vextractf128 $1, %ymm4, %xmm6 vpsrad $23, %xmm6, %xmm6 vmovdqa LCPI8_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm6, %xmm6 vpaddd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vandps LCPI8_24(%rip), %ymm4, %ymm4 vorps %ymm7, %ymm4, %ymm4 vsubps %ymm4, %ymm9, %ymm4 vmulps LCPI8_25(%rip), %ymm4, %ymm6 vaddps LCPI8_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps LCPI8_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm4, %ymm6 vaddps %ymm9, %ymm6, %ymm6 vsubps %ymm4, %ymm8, %ymm4 vmulps %ymm6, %ymm4, %ymm4 vcvtdq2ps %ymm5, %ymm5 vmulps LCPI8_34(%rip), %ymm5, %ymm5 vaddps %ymm4, %ymm5, %ymm4 vmovaps LCPI8_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm2, LCPI8_36(%rip), %ymm1, %ymm2 vblendvps %ymm3, %ymm2, %ymm4, %ymm2 vmulps %ymm2, %ymm13, %ymm2 vmulps LCPI8_37(%rip), %ymm2, %ymm3 vroundps $9, %ymm3, %ymm3 vcvttps2dq %ymm3, %ymm4 vmulps LCPI8_38(%rip), %ymm3, %ymm5 vsubps %ymm5, %ymm2, %ymm2 vmulps LCPI8_39(%rip), %ymm3, %ymm3 vsubps %ymm3, %ymm2, %ymm2 vmulps LCPI8_40(%rip), %ymm2, %ymm3 vaddps LCPI8_41(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_42(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_43(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_44(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_45(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm9, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm2 vmovdqa LCPI8_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm4, %xmm3 vextractf128 $1, %ymm4, %xmm5 vpaddd %xmm1, %xmm5, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vpcmpgtd %xmm1, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovdqa LCPI8_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm6, %xmm1, %xmm5 vpcmpgtd %xmm3, %xmm1, %xmm7 vinsertf128 $1, %xmm5, %ymm7, %ymm5 vpslld $23, %xmm3, %xmm3 vpslld $23, %xmm6, %xmm6 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vaddps %ymm9, %ymm2, %ymm2 vmulps %ymm2, %ymm3, %ymm2 vblendvps %ymm4, LCPI8_48(%rip), %ymm2, %ymm2 vblendvps %ymm5, %ymm8, %ymm2, %ymm2 vminps %ymm9, %ymm10, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 96(%r11), %rax vmulps 256(%rsp), %ymm2, %ymm1 ## 32-byte Folded Reload vaddps %ymm9, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 104(%r11), %rdx movq 112(%r11), %rdi vbroadcastss (%rax,%r10,4), %ymm1 vmulps 352(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%r10,4), %ymm1 vmulps 320(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -64(%rsp), %ymm2 ## 32-byte Reload vaddps %ymm1, %ymm2, %ymm2 vmovups %ymm2, -64(%rsp) ## 32-byte Spill vbroadcastss (%rdi,%r10,4), %ymm1 vmulps 288(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -32(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm1 jmp LBB8_282 LBB8_283: ## %cif_test_mixed2318 ## in Loop: Header=BB8_271 Depth=2 vmovups %ymm15, 576(%rsp) ## 32-byte Spill vmovaps %ymm13, %ymm15 vsqrtps %ymm4, %ymm4 vrcpps %ymm4, %ymm5 vmulps %ymm5, %ymm4, %ymm7 vmovaps LCPI8_20(%rip), %ymm8 ## ymm8 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00] vsubps %ymm7, %ymm8, %ymm7 vmulps %ymm7, %ymm5, %ymm7 vmulps %ymm7, %ymm3, %ymm5 vblendvps %ymm6, %ymm5, %ymm3, %ymm3 vmulps %ymm7, %ymm2, %ymm5 vblendvps %ymm6, %ymm5, %ymm2, %ymm5 vmulps %ymm7, %ymm0, %ymm2 vblendvps %ymm6, %ymm2, %ymm0, %ymm7 vmovaps %ymm14, %ymm12 vmovups 192(%rsp), %ymm14 ## 32-byte Reload vmulps %ymm3, %ymm14, %ymm0 vmulps %ymm5, %ymm12, %ymm2 vaddps %ymm2, %ymm0, %ymm0 vmovups 384(%rsp), %ymm13 ## 32-byte Reload vmulps %ymm7, %ymm13, %ymm2 vaddps %ymm2, %ymm0, %ymm2 vxorps %ymm9, %ymm9, %ymm9 vcmpnleps %ymm9, %ymm2, %ymm0 vblendvps %ymm6, %ymm0, %ymm9, %ymm0 vmovmskps %ymm0, %eax testl %eax, %eax je LBB8_284 ## BB#285: ## %safe_if_run_true2619 ## in Loop: Header=BB8_271 Depth=2 movq 88(%r11), %rax vsubss (%rax,%r10,4), %xmm1, %xmm6 vpermilps $0, %xmm6, %xmm6 ## xmm6 = xmm6[0,0,0,0] vinsertf128 $1, %xmm6, %ymm6, %ymm6 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm4, %ymm1, %ymm1 vdivps %ymm6, %ymm1, %ymm11 vsubps 416(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm5, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm7, %ymm5 ## 32-byte Folded Reload vmulps %ymm3, %ymm3, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm5, %ymm5, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm3, %ymm7 vblendvps %ymm0, %ymm7, %ymm3, %ymm3 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm0, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm5, %ymm6 vblendvps %ymm0, %ymm6, %ymm5, %ymm5 vmulps %ymm3, %ymm14, %ymm3 vmulps %ymm4, %ymm12, %ymm4 vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm5, %ymm13, %ymm4 vaddps %ymm4, %ymm3, %ymm3 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI8_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI8_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1728(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm0, %ymm6, %ymm7, %ymm7 vandps LCPI8_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1760(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm0, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1760(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI8_25(%rip), %ymm5, %ymm6 vaddps LCPI8_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1728(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm6 vmulps LCPI8_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI8_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI8_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmovaps %ymm15, %ymm13 vmulps %ymm3, %ymm13, %ymm3 vmulps LCPI8_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI8_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI8_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI8_40(%rip), %ymm3, %ymm4 vaddps LCPI8_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI8_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI8_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI8_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 128(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm2, %ymm1 movq 96(%r11), %rax vmulps 256(%rsp), %ymm3, %ymm2 ## 32-byte Folded Reload vaddps %ymm10, %ymm2, %ymm2 vmovups 160(%rsp), %ymm10 ## 32-byte Reload vmulps %ymm2, %ymm1, %ymm1 movq 104(%r11), %rdx movq 112(%r11), %rdi vbroadcastss (%rax,%r10,4), %ymm2 vmulps 352(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups 576(%rsp), %ymm15 ## 32-byte Reload vaddps %ymm2, %ymm15, %ymm2 vblendvps %ymm0, %ymm2, %ymm15, %ymm15 vbroadcastss (%rdx,%r10,4), %ymm2 vmulps 320(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm2 vmovups -64(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm2, %ymm3, %ymm2 vblendvps %ymm0, %ymm2, %ymm3, %ymm3 vmovups %ymm3, -64(%rsp) ## 32-byte Spill vbroadcastss (%rdi,%r10,4), %ymm2 vmulps 288(%rsp), %ymm2, %ymm2 ## 32-byte Folded Reload vmulps %ymm2, %ymm1, %ymm1 vmovups -32(%rsp), %ymm2 ## 32-byte Reload vaddps %ymm1, %ymm2, %ymm1 vblendvps %ymm0, %ymm1, %ymm2, %ymm2 vmovups %ymm2, -32(%rsp) ## 32-byte Spill vmovaps %ymm12, %ymm14 jmp LBB8_279 LBB8_290: ## %cif_mask_mixed2798 ## in Loop: Header=BB8_271 Depth=2 vxorps %ymm9, %ymm9, %ymm9 vcmpnleps %ymm9, %ymm0, %ymm2 vblendvps %ymm2, %ymm7, %ymm9, %ymm2 vmovmskps %ymm2, %eax testl %eax, %eax je LBB8_279 ## BB#291: ## %safe_if_run_true3055 ## in Loop: Header=BB8_271 Depth=2 movq 88(%r11), %rax vsubss (%rax,%r10,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 416(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps 192(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI8_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI8_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1664(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI8_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1696(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1696(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI8_25(%rip), %ymm5, %ymm6 vaddps LCPI8_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1664(%rsp) ## 32-byte Spill vcvtdq2ps %ymm7, %ymm6 vmulps LCPI8_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI8_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI8_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmulps %ymm3, %ymm13, %ymm3 vmulps LCPI8_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI8_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI8_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI8_40(%rip), %ymm3, %ymm4 vaddps LCPI8_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI8_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI8_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI8_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 128(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 96(%r11), %rax vmulps 256(%rsp), %ymm3, %ymm1 ## 32-byte Folded Reload vaddps %ymm10, %ymm1, %ymm1 vmovups 160(%rsp), %ymm10 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 104(%r11), %rdx movq 112(%r11), %rdi vbroadcastss (%rax,%r10,4), %ymm1 vmulps 352(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm1 vblendvps %ymm2, %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%r10,4), %ymm1 vmulps 320(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -64(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm1, %ymm3, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm3 vmovups %ymm3, -64(%rsp) ## 32-byte Spill vbroadcastss (%rdi,%r10,4), %ymm1 vmulps 288(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -32(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm1 vmovups %ymm1, -32(%rsp) ## 32-byte Spill jmp LBB8_279 LBB8_284: ## in Loop: Header=BB8_271 Depth=2 vmovaps %ymm12, %ymm14 vmovaps %ymm15, %ymm13 vmovups 576(%rsp), %ymm15 ## 32-byte Reload jmp LBB8_279 LBB8_280: ## %cif_test_mixed2053 ## in Loop: Header=BB8_271 Depth=2 movq 88(%r11), %rax vsubss (%rax,%r10,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 416(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm6, %ymm7, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps %ymm5, %ymm9, %ymm5 vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vxorps %ymm9, %ymm9, %ymm9 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI8_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI8_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1600(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI8_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1632(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1632(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI8_25(%rip), %ymm5, %ymm6 vaddps LCPI8_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1600(%rsp) ## 32-byte Spill jmp LBB8_281 LBB8_292: ## %cif_test_mixed2926 ## in Loop: Header=BB8_271 Depth=2 movq 88(%r11), %rax vsubss (%rax,%r10,4), %xmm1, %xmm7 vpermilps $0, %xmm7, %xmm7 ## xmm7 = xmm7[0,0,0,0] vinsertf128 $1, %xmm7, %ymm7, %ymm7 vpermilps $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0] vinsertf128 $1, %xmm1, %ymm1, %ymm1 vsubps %ymm6, %ymm1, %ymm1 vdivps %ymm7, %ymm1, %ymm11 vsubps 416(%rsp), %ymm5, %ymm5 ## 32-byte Folded Reload vsubps 448(%rsp), %ymm4, %ymm4 ## 32-byte Folded Reload vsubps 480(%rsp), %ymm3, %ymm3 ## 32-byte Folded Reload vmulps %ymm5, %ymm5, %ymm6 vmulps %ymm4, %ymm4, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vmulps %ymm3, %ymm3, %ymm7 vaddps %ymm7, %ymm6, %ymm6 vrsqrtps %ymm6, %ymm7 vmulps %ymm7, %ymm6, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_7(%rip), %ymm8 ## ymm8 = [3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00,3.000000e+00] vsubps %ymm6, %ymm8, %ymm6 vmulps %ymm6, %ymm7, %ymm6 vmovaps LCPI8_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vmovaps %ymm7, %ymm8 vmulps %ymm8, %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm7 vblendvps %ymm2, %ymm7, %ymm5, %ymm5 vmulps %ymm6, %ymm4, %ymm7 vblendvps %ymm2, %ymm7, %ymm4, %ymm4 vmulps %ymm6, %ymm3, %ymm6 vblendvps %ymm2, %ymm6, %ymm3, %ymm3 vmulps %ymm5, %ymm9, %ymm5 vmulps %ymm4, %ymm14, %ymm4 vaddps %ymm4, %ymm5, %ymm4 vmulps %ymm3, %ymm12, %ymm3 vaddps %ymm3, %ymm4, %ymm3 vxorps %ymm9, %ymm9, %ymm9 vmaxps %ymm9, %ymm3, %ymm5 vcmpnleps %ymm5, %ymm9, %ymm3 vcmpnltps %ymm5, %ymm9, %ymm4 vmovaps LCPI8_22(%rip), %ymm10 ## ymm10 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vblendvps %ymm4, %ymm10, %ymm5, %ymm5 vpsrad $23, %xmm5, %xmm6 vextractf128 $1, %ymm5, %xmm7 vpsrad $23, %xmm7, %xmm7 vmovdqa LCPI8_23(%rip), %xmm1 ## xmm1 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm1, %xmm7, %xmm7 vpaddd %xmm1, %xmm6, %xmm6 vinsertf128 $1, %xmm7, %ymm6, %ymm6 vmovups 1536(%rsp), %ymm7 ## 32-byte Reload vblendvps %ymm2, %ymm6, %ymm7, %ymm7 vandps LCPI8_24(%rip), %ymm5, %ymm5 vorps %ymm8, %ymm5, %ymm5 vmovups 1568(%rsp), %ymm1 ## 32-byte Reload vblendvps %ymm2, %ymm5, %ymm1, %ymm1 vmovups %ymm1, 1568(%rsp) ## 32-byte Spill vsubps %ymm1, %ymm10, %ymm5 vmulps LCPI8_25(%rip), %ymm5, %ymm6 vaddps LCPI8_26(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_27(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_28(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_29(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_30(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_31(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_32(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps LCPI8_33(%rip), %ymm6, %ymm6 vmulps %ymm6, %ymm5, %ymm6 vaddps %ymm10, %ymm6, %ymm6 vsubps %ymm5, %ymm9, %ymm5 vmulps %ymm6, %ymm5, %ymm5 vmovups %ymm7, 1536(%rsp) ## 32-byte Spill LBB8_281: ## %cif_done1887 ## in Loop: Header=BB8_271 Depth=2 vcvtdq2ps %ymm7, %ymm6 vmulps LCPI8_34(%rip), %ymm6, %ymm6 vaddps %ymm5, %ymm6, %ymm5 vmovaps LCPI8_35(%rip), %ymm1 ## ymm1 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm3, LCPI8_36(%rip), %ymm1, %ymm3 vblendvps %ymm4, %ymm3, %ymm5, %ymm3 vmulps %ymm3, %ymm13, %ymm3 vmulps LCPI8_37(%rip), %ymm3, %ymm4 vroundps $9, %ymm4, %ymm4 vcvttps2dq %ymm4, %ymm5 vmulps LCPI8_38(%rip), %ymm4, %ymm6 vsubps %ymm6, %ymm3, %ymm3 vmulps LCPI8_39(%rip), %ymm4, %ymm4 vsubps %ymm4, %ymm3, %ymm3 vmulps LCPI8_40(%rip), %ymm3, %ymm4 vaddps LCPI8_41(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_42(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_43(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_44(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_45(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm10, %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm3 vmovdqa LCPI8_46(%rip), %xmm1 ## xmm1 = [127,127,127,127] vpaddd %xmm1, %xmm5, %xmm4 vextractf128 $1, %ymm5, %xmm6 vpaddd %xmm1, %xmm6, %xmm7 vpcmpgtd %xmm1, %xmm6, %xmm6 vpcmpgtd %xmm1, %xmm5, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm5 vmovdqa LCPI8_47(%rip), %xmm1 ## xmm1 = [1,1,1,1] vpcmpgtd %xmm7, %xmm1, %xmm6 vpcmpgtd %xmm4, %xmm1, %xmm8 vinsertf128 $1, %xmm6, %ymm8, %ymm6 vpslld $23, %xmm4, %xmm4 vpslld $23, %xmm7, %xmm7 vinsertf128 $1, %xmm7, %ymm4, %ymm4 vaddps %ymm10, %ymm3, %ymm3 vmulps %ymm3, %ymm4, %ymm3 vblendvps %ymm5, LCPI8_48(%rip), %ymm3, %ymm3 vblendvps %ymm6, %ymm9, %ymm3, %ymm3 vminps %ymm10, %ymm11, %ymm1 vmovups 128(%rsp), %ymm11 ## 32-byte Reload vmulps %ymm1, %ymm0, %ymm0 movq 96(%r11), %rax vmulps 256(%rsp), %ymm3, %ymm1 ## 32-byte Folded Reload vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 movq 104(%r11), %rdx movq 112(%r11), %rdi vbroadcastss (%rax,%r10,4), %ymm1 vmulps 352(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm1, %ymm15, %ymm1 vblendvps %ymm2, %ymm1, %ymm15, %ymm15 vbroadcastss (%rdx,%r10,4), %ymm1 vmulps 320(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm1 vmovups -64(%rsp), %ymm3 ## 32-byte Reload vaddps %ymm1, %ymm3, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm3 vmovups %ymm3, -64(%rsp) ## 32-byte Spill vbroadcastss (%rdi,%r10,4), %ymm1 vmulps 288(%rsp), %ymm1, %ymm1 ## 32-byte Folded Reload vmulps %ymm1, %ymm0, %ymm0 vmovups -32(%rsp), %ymm1 ## 32-byte Reload vaddps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, %ymm0, %ymm1, %ymm1 LBB8_282: ## %cif_done1887 ## in Loop: Header=BB8_271 Depth=2 vmovups %ymm1, -32(%rsp) ## 32-byte Spill vmovups 160(%rsp), %ymm10 ## 32-byte Reload LBB8_279: ## %cif_done1887 ## in Loop: Header=BB8_271 Depth=2 addq $4, %r13 decl %r12d jne LBB8_271 jmp LBB8_223 LBB8_222: ## in Loop: Header=BB8_61 Depth=1 vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill movq %r14, %rbx vxorps %ymm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vxorps %ymm15, %ymm15, %ymm15 LBB8_223: ## %for_exit1816 ## in Loop: Header=BB8_61 Depth=1 movq %rbp, %r14 addl 1152(%rsp), %r15d ## 4-byte Folded Reload vxorps %ymm0, %ymm0, %ymm0 vmaxps %ymm0, %ymm15, %ymm0 vxorps %ymm9, %ymm9, %ymm9 vmovaps LCPI8_22(%rip), %ymm11 ## ymm11 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] vminps %ymm11, %ymm0, %ymm2 vcmpnleps %ymm2, %ymm9, %ymm0 vcmpnltps %ymm2, %ymm9, %ymm1 vblendvps %ymm1, %ymm11, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm4 vpsrad $23, %xmm4, %xmm4 vmovdqa LCPI8_23(%rip), %xmm12 ## xmm12 = [4294967170,4294967170,4294967170,4294967170] vpaddd %xmm12, %xmm4, %xmm4 vpaddd %xmm12, %xmm3, %xmm3 vinsertf128 $1, %xmm4, %ymm3, %ymm3 vmovups 1344(%rsp), %ymm5 ## 32-byte Reload vmovups 608(%rsp), %ymm15 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm5, %ymm5 vmovaps LCPI8_24(%rip), %ymm3 ## ymm3 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm3, %ymm2, %ymm2 vmovaps LCPI8_5(%rip), %ymm7 ## ymm7 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm7, %ymm2, %ymm2 vmovups 1376(%rsp), %ymm3 ## 32-byte Reload vblendvps %ymm15, %ymm2, %ymm3, %ymm3 vmovups %ymm3, 1376(%rsp) ## 32-byte Spill vsubps %ymm3, %ymm11, %ymm2 vmovaps LCPI8_25(%rip), %ymm3 ## ymm3 = [1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00,1.749101e+00] vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_26(%rip), %ymm4 ## ymm4 = [-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00,-2.489927e+00] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_27(%rip), %ymm4 ## ymm4 = [1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00,1.984423e+00] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_28(%rip), %ymm4 ## ymm4 = [-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01,-5.996323e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_29(%rip), %ymm4 ## ymm4 = [3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01,3.424419e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_30(%rip), %ymm4 ## ymm4 = [1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01,1.754176e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_31(%rip), %ymm4 ## ymm4 = [2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01,2.519190e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_32(%rip), %ymm4 ## ymm4 = [3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01,3.332604e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vmovaps LCPI8_33(%rip), %ymm4 ## ymm4 = [5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01,5.000010e-01] vaddps %ymm4, %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vsubps %ymm2, %ymm9, %ymm2 vmulps %ymm3, %ymm2, %ymm2 vmovups %ymm5, 1344(%rsp) ## 32-byte Spill vcvtdq2ps %ymm5, %ymm3 vmovaps LCPI8_34(%rip), %ymm4 ## ymm4 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm4, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI8_35(%rip), %ymm3 ## ymm3 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vmovaps LCPI8_36(%rip), %ymm7 ## ymm7 = [nan,nan,nan,nan,nan,nan,nan,nan] vblendvps %ymm0, %ymm7, %ymm3, %ymm0 vblendvps %ymm1, %ymm0, %ymm2, %ymm0 vmovaps LCPI8_49(%rip), %ymm1 ## ymm1 = [4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01,4.545454e-01] vmulps %ymm1, %ymm0, %ymm0 vmovaps LCPI8_37(%rip), %ymm1 ## ymm1 = [1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00,1.442695e+00] vmulps %ymm1, %ymm0, %ymm1 vroundps $9, %ymm1, %ymm1 vcvttps2dq %ymm1, %ymm2 vmovaps LCPI8_38(%rip), %ymm3 ## ymm3 = [6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01,6.931458e-01] vmulps %ymm3, %ymm1, %ymm3 vsubps %ymm3, %ymm0, %ymm0 vmovaps LCPI8_39(%rip), %ymm3 ## ymm3 = [1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06,1.428607e-06] vmulps %ymm3, %ymm1, %ymm1 vsubps %ymm1, %ymm0, %ymm0 vmovaps LCPI8_40(%rip), %ymm1 ## ymm1 = [2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04,2.755538e-04] vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_41(%rip), %ymm3 ## ymm3 = [1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03,1.304379e-03] vaddps %ymm3, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_42(%rip), %ymm6 ## ymm6 = [8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03,8.378831e-03] vaddps %ymm6, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_43(%rip), %ymm8 ## ymm8 = [4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02,4.165391e-02] vaddps %ymm8, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_44(%rip), %ymm10 ## ymm10 = [1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01,1.666684e-01] vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vmovaps LCPI8_45(%rip), %ymm13 ## ymm13 = [4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01,4.999999e-01] vaddps %ymm13, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm1 vaddps %ymm11, %ymm1, %ymm1 vmulps %ymm1, %ymm0, %ymm0 vaddps %ymm11, %ymm0, %ymm0 vmovdqa LCPI8_46(%rip), %xmm13 ## xmm13 = [127,127,127,127] vpaddd %xmm13, %xmm2, %xmm1 vextractf128 $1, %ymm2, %xmm3 vpaddd %xmm13, %xmm3, %xmm4 vpcmpgtd %xmm13, %xmm3, %xmm3 vpcmpgtd %xmm13, %xmm2, %xmm2 vinsertf128 $1, %xmm3, %ymm2, %ymm2 vmovdqa LCPI8_47(%rip), %xmm6 ## xmm6 = [1,1,1,1] vpcmpgtd %xmm4, %xmm6, %xmm3 vpcmpgtd %xmm1, %xmm6, %xmm5 vinsertf128 $1, %xmm3, %ymm5, %ymm3 vpslld $23, %xmm1, %xmm1 vpslld $23, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm1, %ymm1 vmulps %ymm0, %ymm1, %ymm0 vblendvps %ymm2, LCPI8_48(%rip), %ymm0, %ymm0 vblendvps %ymm3, %ymm9, %ymm0, %ymm0 vmovups %ymm0, 64(%rsp) ## 32-byte Spill vmovups -64(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm9, %ymm1, %ymm1 vminps %ymm11, %ymm1, %ymm3 vcmpnleps %ymm3, %ymm9, %ymm1 vcmpnltps %ymm3, %ymm9, %ymm2 vblendvps %ymm2, %ymm11, %ymm3, %ymm3 vpsrad $23, %xmm3, %xmm4 vextractf128 $1, %ymm3, %xmm5 vpsrad $23, %xmm5, %xmm5 vpaddd %xmm12, %xmm5, %xmm5 vpaddd %xmm12, %xmm4, %xmm4 vinsertf128 $1, %xmm5, %ymm4, %ymm4 vmovups 1408(%rsp), %ymm5 ## 32-byte Reload vblendvps %ymm15, %ymm4, %ymm5, %ymm5 vmovaps LCPI8_24(%rip), %ymm7 ## ymm7 = [2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255,2155872255] vandps %ymm7, %ymm3, %ymm3 vmovaps LCPI8_5(%rip), %ymm0 ## ymm0 = [5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01,5.000000e-01] vorps %ymm0, %ymm3, %ymm3 vmovups 1440(%rsp), %ymm4 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm4, %ymm4 vmovups %ymm4, 1440(%rsp) ## 32-byte Spill vsubps %ymm4, %ymm11, %ymm3 vmulps LCPI8_25(%rip), %ymm3, %ymm4 vaddps LCPI8_26(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_27(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_28(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_29(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_30(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_31(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_32(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps LCPI8_33(%rip), %ymm4, %ymm4 vmulps %ymm4, %ymm3, %ymm4 vaddps %ymm11, %ymm4, %ymm4 vsubps %ymm3, %ymm9, %ymm3 vmulps %ymm4, %ymm3, %ymm3 vmovups %ymm5, 1408(%rsp) ## 32-byte Spill vcvtdq2ps %ymm5, %ymm4 vmovaps LCPI8_34(%rip), %ymm8 ## ymm8 = [6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01,6.931472e-01] vmulps %ymm8, %ymm4, %ymm4 vaddps %ymm3, %ymm4, %ymm3 vmovaps LCPI8_35(%rip), %ymm4 ## ymm4 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI8_36(%rip), %ymm4, %ymm1 vblendvps %ymm2, %ymm1, %ymm3, %ymm1 vmulps LCPI8_49(%rip), %ymm1, %ymm1 vmulps LCPI8_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm14 vmulps LCPI8_38(%rip), %ymm2, %ymm3 vsubps %ymm3, %ymm1, %ymm1 vmulps LCPI8_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm5 vmulps LCPI8_40(%rip), %ymm5, %ymm1 vaddps LCPI8_41(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI8_42(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI8_43(%rip), %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps %ymm10, %ymm1, %ymm1 vmulps %ymm1, %ymm5, %ymm1 vaddps LCPI8_45(%rip), %ymm1, %ymm10 vpaddd %xmm13, %xmm14, %xmm1 vpcmpgtd %xmm13, %xmm14, %xmm2 vmovdqu %ymm2, 192(%rsp) ## 32-byte Spill vpcmpgtd %xmm1, %xmm6, %xmm2 vmovdqu %ymm2, -64(%rsp) ## 32-byte Spill vpslld $23, %xmm1, %xmm1 vmovdqu %ymm1, 384(%rsp) ## 32-byte Spill vmovups -32(%rsp), %ymm1 ## 32-byte Reload vmaxps %ymm9, %ymm1, %ymm1 vminps %ymm11, %ymm1, %ymm2 vcmpnleps %ymm2, %ymm9, %ymm1 vcmpnltps %ymm2, %ymm9, %ymm4 vblendvps %ymm4, %ymm11, %ymm2, %ymm2 vpsrad $23, %xmm2, %xmm3 vextractf128 $1, %ymm2, %xmm6 vpsrad $23, %xmm6, %xmm6 vpaddd %xmm12, %xmm6, %xmm6 vpaddd %xmm12, %xmm3, %xmm3 vinsertf128 $1, %xmm6, %ymm3, %ymm3 vmovups 1472(%rsp), %ymm6 ## 32-byte Reload vblendvps %ymm15, %ymm3, %ymm6, %ymm6 vandps %ymm7, %ymm2, %ymm2 vorps %ymm0, %ymm2, %ymm2 vmovups 1504(%rsp), %ymm3 ## 32-byte Reload vblendvps %ymm15, %ymm2, %ymm3, %ymm3 vxorps %ymm15, %ymm15, %ymm15 vmovups %ymm3, 1504(%rsp) ## 32-byte Spill vsubps %ymm3, %ymm11, %ymm2 vmulps LCPI8_25(%rip), %ymm2, %ymm3 vaddps LCPI8_26(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_27(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_28(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_29(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_30(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_31(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_32(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps LCPI8_33(%rip), %ymm3, %ymm3 vmulps %ymm3, %ymm2, %ymm3 vaddps %ymm11, %ymm3, %ymm3 vsubps %ymm2, %ymm15, %ymm2 vmulps %ymm3, %ymm2, %ymm2 vmovups %ymm6, 1472(%rsp) ## 32-byte Spill vcvtdq2ps %ymm6, %ymm3 vmulps %ymm8, %ymm3, %ymm3 vaddps %ymm2, %ymm3, %ymm2 vmovaps LCPI8_35(%rip), %ymm0 ## ymm0 = [-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf] vblendvps %ymm1, LCPI8_36(%rip), %ymm0, %ymm1 vblendvps %ymm4, %ymm1, %ymm2, %ymm1 vmulps LCPI8_49(%rip), %ymm1, %ymm1 vmulps LCPI8_37(%rip), %ymm1, %ymm2 vroundps $9, %ymm2, %ymm2 vcvttps2dq %ymm2, %ymm7 vmulps LCPI8_38(%rip), %ymm2, %ymm3 vsubps %ymm3, %ymm1, %ymm1 vmulps LCPI8_39(%rip), %ymm2, %ymm2 vsubps %ymm2, %ymm1, %ymm4 vmulps LCPI8_40(%rip), %ymm4, %ymm1 vaddps LCPI8_41(%rip), %ymm1, %ymm2 vpaddd %xmm13, %xmm7, %xmm3 vpcmpgtd %xmm13, %xmm7, %xmm11 vmovups 64(%rsp), %ymm0 ## 32-byte Reload vmulps LCPI8_50(%rip), %ymm0, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmovdqa LCPI8_51(%rip), %xmm6 ## xmm6 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm6, %xmm1, %xmm1 vpshufb %xmm6, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm1 ## xmm1 = xmm0[0],xmm1[0] vmovdqa LCPI8_47(%rip), %xmm0 ## xmm0 = [1,1,1,1] vpcmpgtd %xmm3, %xmm0, %xmm12 vpslld $23, %xmm3, %xmm13 movslq %r15d, %rax testq %rbx, %rbx movq 2688(%rsp), %rsi je LBB8_225 ## BB#224: ## %pl_dolane.i13400 ## in Loop: Header=BB8_61 Depth=1 vpextrb $0, %xmm1, (%rsi,%rax) LBB8_225: ## %pl_loopend.i13403 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm2, %ymm4, %ymm3 vmulps %ymm10, %ymm5, %ymm0 vextractf128 $1, %ymm14, %xmm2 movq 672(%rsp), %rdi ## 8-byte Reload testq %rdi, %rdi movl -68(%rsp), %edx ## 4-byte Reload movq 2704(%rsp), %rcx movq %rcx, %r10 movl 2672(%rsp), %r8d movq 704(%rsp), %rcx ## 8-byte Reload movq 1280(%rsp), %rbp ## 8-byte Reload movq 1248(%rsp), %r15 ## 8-byte Reload movq 1216(%rsp), %r12 ## 8-byte Reload movq 1184(%rsp), %r13 ## 8-byte Reload je LBB8_227 ## BB#226: ## %pl_dolane.1.i13406 ## in Loop: Header=BB8_61 Depth=1 vpextrb $2, %xmm1, 1(%rsi,%rax) LBB8_227: ## %pl_loopend.1.i13409 ## in Loop: Header=BB8_61 Depth=1 vaddps LCPI8_42(%rip), %ymm3, %ymm9 vaddps LCPI8_22(%rip), %ymm0, %ymm3 vpaddd LCPI8_46(%rip), %xmm2, %xmm0 testq %rcx, %rcx vmovaps 544(%rsp), %xmm10 ## 16-byte Reload vmovups 416(%rsp), %ymm14 ## 32-byte Reload je LBB8_229 ## BB#228: ## %pl_dolane.2.i13412 ## in Loop: Header=BB8_61 Depth=1 vpextrb $4, %xmm1, 2(%rsi,%rax) LBB8_229: ## %pl_loopend.2.i13415 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm9, %ymm4, %ymm9 vmulps %ymm3, %ymm5, %ymm5 vpslld $23, %xmm0, %xmm3 testq %rbp, %rbp je LBB8_231 ## BB#230: ## %pl_dolane.3.i13418 ## in Loop: Header=BB8_61 Depth=1 vpextrb $6, %xmm1, 3(%rsi,%rax) LBB8_231: ## %pl_loopend.3.i13421 ## in Loop: Header=BB8_61 Depth=1 vaddps LCPI8_43(%rip), %ymm9, %ymm9 vaddps LCPI8_22(%rip), %ymm5, %ymm5 vpcmpgtd LCPI8_46(%rip), %xmm2, %xmm2 vmovups 384(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm3, %ymm6, %ymm3 testq %r15, %r15 je LBB8_233 ## BB#232: ## %pl_dolane.4.i13424 ## in Loop: Header=BB8_61 Depth=1 vpextrb $8, %xmm1, 4(%rsi,%rax) LBB8_233: ## %pl_loopend.4.i13427 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm9, %ymm4, %ymm8 vmovups 192(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm2, %ymm6, %ymm2 vmovdqa LCPI8_47(%rip), %xmm6 ## xmm6 = [1,1,1,1] vpcmpgtd %xmm0, %xmm6, %xmm0 vmulps %ymm5, %ymm3, %ymm3 testq %r12, %r12 je LBB8_235 ## BB#234: ## %pl_dolane.5.i13430 ## in Loop: Header=BB8_61 Depth=1 vpextrb $10, %xmm1, 5(%rsi,%rax) LBB8_235: ## %pl_loopend.5.i13433 ## in Loop: Header=BB8_61 Depth=1 vaddps LCPI8_44(%rip), %ymm8, %ymm5 vmovups -64(%rsp), %ymm6 ## 32-byte Reload vinsertf128 $1, %xmm0, %ymm6, %ymm0 vblendvps %ymm2, LCPI8_48(%rip), %ymm3, %ymm2 testq %r13, %r13 je LBB8_237 ## BB#236: ## %pl_dolane.6.i13436 ## in Loop: Header=BB8_61 Depth=1 vpextrb $12, %xmm1, 6(%rsi,%rax) LBB8_237: ## %pl_loopend.6.i13438 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm5, %ymm4, %ymm3 vblendvps %ymm0, %ymm15, %ymm2, %ymm2 testb %r14b, %r14b vmovdqa 560(%rsp), %xmm6 ## 16-byte Reload jns LBB8_239 ## BB#238: ## %pl_dolane.7.i13441 ## in Loop: Header=BB8_61 Depth=1 vpextrb $14, %xmm1, 7(%rsi,%rax) LBB8_239: ## %__masked_store_i8.exit13442 ## in Loop: Header=BB8_61 Depth=1 vaddps LCPI8_45(%rip), %ymm3, %ymm0 vmulps LCPI8_50(%rip), %ymm2, %ymm1 vcvttps2dq %ymm1, %ymm1 vextractf128 $1, %ymm1, %xmm2 vmovdqa LCPI8_51(%rip), %xmm3 ## xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm3, %xmm2, %xmm2 vpshufb %xmm3, %xmm1, %xmm1 vpunpcklqdq %xmm2, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm2[0] testq %rbx, %rbx je LBB8_241 ## BB#240: ## %pl_dolane.i13350 ## in Loop: Header=BB8_61 Depth=1 vpextrb $0, %xmm1, (%r9,%rax) LBB8_241: ## %pl_loopend.i13353 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm0, %ymm4, %ymm0 vextractf128 $1, %ymm7, %xmm2 testq %rdi, %rdi je LBB8_243 ## BB#242: ## %pl_dolane.1.i13356 ## in Loop: Header=BB8_61 Depth=1 vpextrb $2, %xmm1, 1(%r9,%rax) LBB8_243: ## %pl_loopend.1.i13359 ## in Loop: Header=BB8_61 Depth=1 vaddps LCPI8_22(%rip), %ymm0, %ymm3 vpaddd LCPI8_46(%rip), %xmm2, %xmm0 testq %rcx, %rcx vmovaps 528(%rsp), %xmm7 ## 16-byte Reload je LBB8_245 ## BB#244: ## %pl_dolane.2.i13362 ## in Loop: Header=BB8_61 Depth=1 vpextrb $4, %xmm1, 2(%r9,%rax) LBB8_245: ## %pl_loopend.2.i13365 ## in Loop: Header=BB8_61 Depth=1 vmulps %ymm3, %ymm4, %ymm3 vpslld $23, %xmm0, %xmm4 testq %rbp, %rbp je LBB8_247 ## BB#246: ## %pl_dolane.3.i13368 ## in Loop: Header=BB8_61 Depth=1 vpextrb $6, %xmm1, 3(%r9,%rax) LBB8_247: ## %pl_loopend.3.i13371 ## in Loop: Header=BB8_61 Depth=1 vaddps LCPI8_22(%rip), %ymm3, %ymm3 vpcmpgtd LCPI8_46(%rip), %xmm2, %xmm2 vinsertf128 $1, %xmm4, %ymm13, %ymm4 testq %r15, %r15 je LBB8_249 ## BB#248: ## %pl_dolane.4.i13374 ## in Loop: Header=BB8_61 Depth=1 vpextrb $8, %xmm1, 4(%r9,%rax) LBB8_249: ## %pl_loopend.4.i13377 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm2, %ymm11, %ymm2 vmovdqa LCPI8_47(%rip), %xmm5 ## xmm5 = [1,1,1,1] vpcmpgtd %xmm0, %xmm5, %xmm0 vmulps %ymm3, %ymm4, %ymm3 testq %r12, %r12 vmovups 448(%rsp), %ymm5 ## 32-byte Reload je LBB8_251 ## BB#250: ## %pl_dolane.5.i13380 ## in Loop: Header=BB8_61 Depth=1 vpextrb $10, %xmm1, 5(%r9,%rax) LBB8_251: ## %pl_loopend.5.i13383 ## in Loop: Header=BB8_61 Depth=1 vinsertf128 $1, %xmm0, %ymm12, %ymm0 vblendvps %ymm2, LCPI8_48(%rip), %ymm3, %ymm2 testq %r13, %r13 vxorps %xmm8, %xmm8, %xmm8 je LBB8_253 ## BB#252: ## %pl_dolane.6.i13386 ## in Loop: Header=BB8_61 Depth=1 vpextrb $12, %xmm1, 6(%r9,%rax) LBB8_253: ## %pl_loopend.6.i13388 ## in Loop: Header=BB8_61 Depth=1 vblendvps %ymm0, %ymm15, %ymm2, %ymm0 testb %r14b, %r14b vmovdqu 640(%rsp), %ymm3 ## 32-byte Reload vmovups 480(%rsp), %ymm4 ## 32-byte Reload jns LBB8_255 ## BB#254: ## %pl_dolane.7.i13391 ## in Loop: Header=BB8_61 Depth=1 vpextrb $14, %xmm1, 7(%r9,%rax) LBB8_255: ## %__masked_store_i8.exit13392 ## in Loop: Header=BB8_61 Depth=1 vmulps LCPI8_50(%rip), %ymm0, %ymm0 vcvttps2dq %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm1 vmovdqa LCPI8_51(%rip), %xmm2 ## xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] vpshufb %xmm2, %xmm1, %xmm1 vpshufb %xmm2, %xmm0, %xmm0 vpunpcklqdq %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0],xmm1[0] testq %rbx, %rbx je LBB8_257 ## BB#256: ## %pl_dolane.i13307 ## in Loop: Header=BB8_61 Depth=1 vpextrb $0, %xmm0, (%r10,%rax) LBB8_257: ## %pl_loopend.i13310 ## in Loop: Header=BB8_61 Depth=1 testq %rdi, %rdi je LBB8_259 ## BB#258: ## %pl_dolane.1.i13312 ## in Loop: Header=BB8_61 Depth=1 vpextrb $2, %xmm0, 1(%r10,%rax) LBB8_259: ## %pl_loopend.1.i13315 ## in Loop: Header=BB8_61 Depth=1 testq %rcx, %rcx je LBB8_261 ## BB#260: ## %pl_dolane.2.i13317 ## in Loop: Header=BB8_61 Depth=1 vpextrb $4, %xmm0, 2(%r10,%rax) LBB8_261: ## %pl_loopend.2.i13320 ## in Loop: Header=BB8_61 Depth=1 testq %rbp, %rbp je LBB8_263 ## BB#262: ## %pl_dolane.3.i13322 ## in Loop: Header=BB8_61 Depth=1 vpextrb $6, %xmm0, 3(%r10,%rax) LBB8_263: ## %pl_loopend.3.i13325 ## in Loop: Header=BB8_61 Depth=1 testq %r15, %r15 je LBB8_265 ## BB#264: ## %pl_dolane.4.i13327 ## in Loop: Header=BB8_61 Depth=1 vpextrb $8, %xmm0, 4(%r10,%rax) LBB8_265: ## %pl_loopend.4.i13330 ## in Loop: Header=BB8_61 Depth=1 testq %r12, %r12 je LBB8_267 ## BB#266: ## %pl_dolane.5.i13332 ## in Loop: Header=BB8_61 Depth=1 vpextrb $10, %xmm0, 5(%r10,%rax) LBB8_267: ## %pl_loopend.5.i13335 ## in Loop: Header=BB8_61 Depth=1 testq %r13, %r13 je LBB8_269 ## BB#268: ## %pl_dolane.6.i13337 ## in Loop: Header=BB8_61 Depth=1 vpextrb $12, %xmm0, 6(%r10,%rax) LBB8_269: ## %pl_loopend.6.i13339 ## in Loop: Header=BB8_61 Depth=1 testb %r14b, %r14b jns LBB8_69 ## BB#68: ## %pl_dolane.7.i13341 ## in Loop: Header=BB8_61 Depth=1 vpextrb $14, %xmm0, 7(%r10,%rax) LBB8_69: ## %foreach_reset139 ## in Loop: Header=BB8_61 Depth=1 movl -108(%rsp), %edi ## 4-byte Reload LBB8_70: ## %foreach_reset139 ## in Loop: Header=BB8_61 Depth=1 incl %edx cmpl -100(%rsp), %edx ## 4-byte Folded Reload jne LBB8_61 jmp LBB8_71 LBB8_2: ## %if_then shll $2, %r8d cmpl $255, %r8d jl LBB8_4 ## BB#3: ## %if_then movb $-1, %r8b LBB8_4: ## %if_then cmpl -100(%rsp), %edx ## 4-byte Folded Reload jge LBB8_71 ## BB#5: ## %for_loop.lr.ph movl -104(%rsp), %esi ## 4-byte Reload movl %esi, %ecx subl %edi, %ecx movl %ecx, %eax sarl $31, %eax shrl $29, %eax addl %ecx, %eax andl $-8, %eax subl %eax, %ecx movl %esi, %eax subl %ecx, %eax vmovd %esi, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 movzbl %r8b, %ecx vmovd %ecx, %xmm1 vpshuflw $0, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,0,0,4,5,6,7] vpshufd $80, %xmm1, %xmm1 ## xmm1 = xmm1[0,0,1,1] movl -112(%rsp), %ecx ## 4-byte Reload imull %edx, %ecx addl %edi, %ecx vpshufb LCPI8_52(%rip), %xmm1, %xmm1 ## xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] vmovdqa LCPI8_3(%rip), %xmm2 ## xmm2 = [0,1,2,3] vmovdqa LCPI8_4(%rip), %xmm3 ## xmm3 = [4,5,6,7] vextractf128 $1, %ymm0, %xmm4 movl %edi, -108(%rsp) ## 4-byte Spill .p2align 4, 0x90 LBB8_6: ## %for_loop ## =>This Loop Header: Depth=1 ## Child Loop BB8_8 Depth 2 movl %edi, %ebp cmpl %edi, %eax jle LBB8_9 ## BB#7: ## %foreach_full_body.lr.ph ## in Loop: Header=BB8_6 Depth=1 movl %ecx, %esi movl %edi, %ebp .p2align 4, 0x90 LBB8_8: ## %foreach_full_body ## Parent Loop BB8_6 Depth=1 ## => This Inner Loop Header: Depth=2 movslq %esi, %rsi vmovq %xmm1, (%r15,%rsi) vmovq %xmm1, (%r9,%rsi) vmovq %xmm1, (%rbx,%rsi) addl $8, %ebp addl $8, %esi cmpl %eax, %ebp jl LBB8_8 LBB8_9: ## %partial_inner_all_outer ## in Loop: Header=BB8_6 Depth=1 cmpl -104(%rsp), %ebp ## 4-byte Folded Reload jge LBB8_58 ## BB#10: ## %partial_inner_only ## in Loop: Header=BB8_6 Depth=1 vmovd %ebp, %xmm5 vpshufd $0, %xmm5, %xmm5 ## xmm5 = xmm5[0,0,0,0] vpaddd %xmm2, %xmm5, %xmm6 vpaddd %xmm3, %xmm5, %xmm5 vpcmpgtd %xmm5, %xmm4, %xmm5 vpcmpgtd %xmm6, %xmm0, %xmm6 vinsertf128 $1, %xmm5, %ymm6, %ymm5 movl %edx, %esi imull -112(%rsp), %esi ## 4-byte Folded Reload addl %ebp, %esi movslq %esi, %rbp vmovmskps %ymm5, %esi movq %rsi, %rdi andq $1, %rdi je LBB8_12 ## BB#11: ## %pl_dolane.i ## in Loop: Header=BB8_6 Depth=1 movb %r8b, (%r15,%rbp) LBB8_12: ## %pl_loopend.i ## in Loop: Header=BB8_6 Depth=1 movq %rsi, %r10 andq $2, %r10 je LBB8_14 ## BB#13: ## %pl_dolane.1.i ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 1(%r15,%rbp) LBB8_14: ## %pl_loopend.1.i ## in Loop: Header=BB8_6 Depth=1 movq %rsi, %r11 andq $4, %r11 je LBB8_16 ## BB#15: ## %pl_dolane.2.i ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 2(%r15,%rbp) LBB8_16: ## %pl_loopend.2.i ## in Loop: Header=BB8_6 Depth=1 movq %rsi, %r14 andq $8, %r14 je LBB8_18 ## BB#17: ## %pl_dolane.3.i ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 3(%r15,%rbp) LBB8_18: ## %pl_loopend.3.i ## in Loop: Header=BB8_6 Depth=1 movq %rsi, %r12 andq $16, %r12 je LBB8_20 ## BB#19: ## %pl_dolane.4.i ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 4(%r15,%rbp) LBB8_20: ## %pl_loopend.4.i ## in Loop: Header=BB8_6 Depth=1 movq %rsi, %r13 andq $32, %r13 je LBB8_22 ## BB#21: ## %pl_dolane.5.i ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 5(%r15,%rbp) LBB8_22: ## %pl_loopend.5.i ## in Loop: Header=BB8_6 Depth=1 movq %rsi, %r15 andq $64, %r15 je LBB8_24 ## BB#23: ## %pl_dolane.6.i ## in Loop: Header=BB8_6 Depth=1 movq 2688(%rsp), %rbx movb %r8b, 6(%rbx,%rbp) LBB8_24: ## %pl_loopend.6.i ## in Loop: Header=BB8_6 Depth=1 testb %sil, %sil jns LBB8_26 ## BB#25: ## %pl_dolane.7.i ## in Loop: Header=BB8_6 Depth=1 movq 2688(%rsp), %rbx movb %r8b, 7(%rbx,%rbp) LBB8_26: ## %__masked_store_i8.exit ## in Loop: Header=BB8_6 Depth=1 testq %rdi, %rdi je LBB8_28 ## BB#27: ## %pl_dolane.i14804 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, (%r9,%rbp) LBB8_28: ## %pl_loopend.i14807 ## in Loop: Header=BB8_6 Depth=1 testq %r10, %r10 je LBB8_30 ## BB#29: ## %pl_dolane.1.i14809 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 1(%r9,%rbp) LBB8_30: ## %pl_loopend.1.i14812 ## in Loop: Header=BB8_6 Depth=1 testq %r11, %r11 je LBB8_32 ## BB#31: ## %pl_dolane.2.i14814 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 2(%r9,%rbp) LBB8_32: ## %pl_loopend.2.i14817 ## in Loop: Header=BB8_6 Depth=1 testq %r14, %r14 je LBB8_34 ## BB#33: ## %pl_dolane.3.i14819 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 3(%r9,%rbp) LBB8_34: ## %pl_loopend.3.i14822 ## in Loop: Header=BB8_6 Depth=1 testq %r12, %r12 je LBB8_36 ## BB#35: ## %pl_dolane.4.i14824 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 4(%r9,%rbp) LBB8_36: ## %pl_loopend.4.i14827 ## in Loop: Header=BB8_6 Depth=1 testq %r13, %r13 je LBB8_38 ## BB#37: ## %pl_dolane.5.i14829 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 5(%r9,%rbp) LBB8_38: ## %pl_loopend.5.i14832 ## in Loop: Header=BB8_6 Depth=1 testq %r15, %r15 je LBB8_40 ## BB#39: ## %pl_dolane.6.i14834 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 6(%r9,%rbp) LBB8_40: ## %pl_loopend.6.i14836 ## in Loop: Header=BB8_6 Depth=1 testb %sil, %sil jns LBB8_42 ## BB#41: ## %pl_dolane.7.i14838 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 7(%r9,%rbp) LBB8_42: ## %__masked_store_i8.exit14839 ## in Loop: Header=BB8_6 Depth=1 testq %rdi, %rdi movq 2704(%rsp), %rbx je LBB8_44 ## BB#43: ## %pl_dolane.i14762 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, (%rbx,%rbp) LBB8_44: ## %pl_loopend.i14765 ## in Loop: Header=BB8_6 Depth=1 testq %r10, %r10 je LBB8_46 ## BB#45: ## %pl_dolane.1.i14767 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 1(%rbx,%rbp) LBB8_46: ## %pl_loopend.1.i14770 ## in Loop: Header=BB8_6 Depth=1 testq %r11, %r11 movl -108(%rsp), %edi ## 4-byte Reload je LBB8_48 ## BB#47: ## %pl_dolane.2.i14772 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 2(%rbx,%rbp) LBB8_48: ## %pl_loopend.2.i14775 ## in Loop: Header=BB8_6 Depth=1 testq %r14, %r14 je LBB8_50 ## BB#49: ## %pl_dolane.3.i14777 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 3(%rbx,%rbp) LBB8_50: ## %pl_loopend.3.i14780 ## in Loop: Header=BB8_6 Depth=1 testq %r12, %r12 je LBB8_52 ## BB#51: ## %pl_dolane.4.i14782 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 4(%rbx,%rbp) LBB8_52: ## %pl_loopend.4.i14785 ## in Loop: Header=BB8_6 Depth=1 testq %r13, %r13 je LBB8_54 ## BB#53: ## %pl_dolane.5.i14787 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 5(%rbx,%rbp) LBB8_54: ## %pl_loopend.5.i14790 ## in Loop: Header=BB8_6 Depth=1 testq %r15, %r15 je LBB8_56 ## BB#55: ## %pl_dolane.6.i14792 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 6(%rbx,%rbp) LBB8_56: ## %pl_loopend.6.i14794 ## in Loop: Header=BB8_6 Depth=1 testb %sil, %sil movq 2688(%rsp), %r15 jns LBB8_58 ## BB#57: ## %pl_dolane.7.i14796 ## in Loop: Header=BB8_6 Depth=1 movb %r8b, 7(%rbx,%rbp) .p2align 4, 0x90 LBB8_58: ## %foreach_reset ## in Loop: Header=BB8_6 Depth=1 incl %edx addl -112(%rsp), %ecx ## 4-byte Folded Reload cmpl -100(%rsp), %edx ## 4-byte Folded Reload jne LBB8_6 LBB8_71: ## %if_exit addq $2600, %rsp ## imm = 0xA28 popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .globl _RenderStatic .p2align 4, 0x90 _RenderStatic: ## @RenderStatic ## BB#0: ## %all_on pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $40, %rsp movq %r9, 32(%rsp) ## 8-byte Spill movq %r8, 24(%rsp) ## 8-byte Spill movq %rcx, 16(%rsp) ## 8-byte Spill movl %edx, %r13d movq %rsi, %rbx movq %rdi, %rbp movq $0, 8(%rsp) movl 72(%rbp), %eax movl 76(%rbp), %ecx leal 15(%rax), %edx sarl $31, %edx shrl $28, %edx leal 15(%rax,%rdx), %r12d sarl $4, %r12d leal 15(%rcx), %eax sarl $31, %eax shrl $28, %eax leal 15(%rcx,%rax), %r14d sarl $4, %r14d movl %r14d, %r15d imull %r12d, %r15d leaq 8(%rsp), %rdi movl $96, %esi movl $32, %edx callq _ISPCAlloc movl %r12d, (%rax) movl %r14d, 4(%rax) movq %rbp, 8(%rax) movq %rbx, 16(%rax) movl %r13d, 24(%rax) movq 16(%rsp), %rcx ## 8-byte Reload movq %rcx, 32(%rax) movq 24(%rsp), %rcx ## 8-byte Reload movq %rcx, 40(%rax) movq 32(%rsp), %rcx ## 8-byte Reload movq %rcx, 48(%rax) vpcmpeqd %xmm0, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovaps %ymm0, 64(%rax) leaq _RenderTile___uniuniREFs_5B_unInputHeader_5D_REFs_5B_unInputDataArrays_5D_uniun_3C_unT_3E_un_3C_unT_3E_un_3C_unT_3E_(%rip), %rsi movl $1, %r8d movl $1, %r9d leaq 8(%rsp), %rdi movq %rax, %rdx movl %r15d, %ecx vzeroupper callq _ISPCLaunch movq 8(%rsp), %rdi testq %rdi, %rdi je LBB9_2 ## BB#1: ## %call_sync callq _ISPCSync movq $0, 8(%rsp) LBB9_2: ## %post_sync addq $40, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp retq .globl _ComputeZBoundsRow .p2align 4, 0x90 _ComputeZBoundsRow: ## @ComputeZBoundsRow ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $72, %rsp movq %r9, %r12 movl %ecx, %ebp movl %esi, %ebx testl %ebp, %ebp jle LBB10_3 ## BB#1: ## %for_loop.lr.ph movq 144(%rsp), %r14 movq 136(%rsp), %r15 imull %edx, %edi movl %edi, (%rsp) ## 4-byte Spill addl %edi, %edx movl %edx, 4(%rsp) ## 4-byte Spill xorl %edi, %edi vpcmpeqd %xmm4, %xmm4, %xmm4 vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmovups %ymm4, 32(%rsp) ## 32-byte Spill vmovss %xmm3, 20(%rsp) ## 4-byte Spill vmovss %xmm2, 16(%rsp) ## 4-byte Spill vmovss %xmm1, 12(%rsp) ## 4-byte Spill vmovss %xmm0, 8(%rsp) ## 4-byte Spill .p2align 4, 0x90 LBB10_2: ## %for_loop ## =>This Inner Loop Header: Depth=1 leal (%rbx,%rdi), %r13d movl %r13d, %esi movl (%rsp), %edx ## 4-byte Reload movl 4(%rsp), %ecx ## 4-byte Reload movq %r12, %r8 movl 128(%rsp), %r9d vmovss 12(%rsp), %xmm1 ## 4-byte Reload ## xmm1 = mem[0],zero,zero,zero vmovss 16(%rsp), %xmm2 ## 4-byte Reload ## xmm2 = mem[0],zero,zero,zero vmovss 20(%rsp), %xmm3 ## 4-byte Reload ## xmm3 = mem[0],zero,zero,zero vmovups 32(%rsp), %ymm4 ## 32-byte Reload leaq 24(%rsp), %rax pushq %rax leaq 36(%rsp), %rax pushq %rax callq _ComputeZBounds___uniuniuniuniun_3C_unf_3E_uniunfunfunfunfREFunfREFunf vmovss 24(%rsp), %xmm0 ## 4-byte Reload ## xmm0 = mem[0],zero,zero,zero addq $16, %rsp movl 28(%rsp), %eax movl %eax, (%r15) movl 24(%rsp), %eax movl %eax, (%r14) addq $4, %r15 addq $4, %r14 movl %r13d, %edi decl %ebp jne LBB10_2 LBB10_3: ## %for_exit addq $72, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .section __TEXT,__literal4,4byte_literals .p2align 2 LCPI11_0: .long 1056964608 ## float 0.5 LCPI11_1: .long 1077936128 ## float 3 .section __TEXT,__const .p2align 5 LCPI11_2: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 LCPI11_3: .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .long 2147483647 ## 0x7fffffff .section __TEXT,__literal16,16byte_literals .p2align 4 LCPI11_4: .long 0 ## 0x0 .long 1 ## 0x1 .long 2 ## 0x2 .long 3 ## 0x3 LCPI11_5: .long 4 ## 0x4 .long 5 ## 0x5 .long 6 ## 0x6 .long 7 ## 0x7 .section __TEXT,__text,regular,pure_instructions .globl _SplitTileMinMax .p2align 4, 0x90 _SplitTileMinMax: ## @SplitTileMinMax ## BB#0: ## %allocas pushq %rbp pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx subq $200, %rsp movq %rcx, %r13 movq %rdx, %r12 movl 312(%rsp), %eax movq 304(%rsp), %r15 movl 264(%rsp), %ebp vcvtsi2ssl %r8d, %xmm0, %xmm2 vmovss LCPI11_0(%rip), %xmm3 ## xmm3 = mem[0],zero,zero,zero vmulss %xmm3, %xmm2, %xmm2 vcvtsi2ssl %r9d, %xmm0, %xmm4 vmulss %xmm3, %xmm4, %xmm4 vmulss %xmm0, %xmm2, %xmm0 vxorps %xmm5, %xmm5, %xmm5 vsubss %xmm0, %xmm5, %xmm0 vmulss %xmm1, %xmm4, %xmm1 vcvtsi2ssl %edi, %xmm0, %xmm5 vsubss %xmm2, %xmm5, %xmm2 vcvtsi2ssl %esi, %xmm0, %xmm5 vsubss %xmm4, %xmm5, %xmm4 vmulss %xmm0, %xmm0, %xmm5 vmulss %xmm2, %xmm2, %xmm6 vaddss %xmm5, %xmm6, %xmm5 vrsqrtss %xmm5, %xmm0, %xmm6 vmulss %xmm6, %xmm5, %xmm5 vmulss %xmm5, %xmm6, %xmm5 vmovss LCPI11_1(%rip), %xmm8 ## xmm8 = mem[0],zero,zero,zero vsubss %xmm5, %xmm8, %xmm5 vmulss %xmm5, %xmm6, %xmm5 vmulss %xmm3, %xmm5, %xmm5 vmulss %xmm1, %xmm1, %xmm6 vmulss %xmm4, %xmm4, %xmm7 vaddss %xmm7, %xmm6, %xmm6 vrsqrtss %xmm6, %xmm0, %xmm7 vmulss %xmm7, %xmm6, %xmm6 vmulss %xmm6, %xmm7, %xmm6 vsubss %xmm6, %xmm8, %xmm6 vmulss %xmm6, %xmm7, %xmm6 vmulss %xmm3, %xmm6, %xmm3 vmulss %xmm5, %xmm0, %xmm0 vmovaps %xmm0, 16(%rsp) ## 16-byte Spill vmulss %xmm3, %xmm1, %xmm0 vmovaps %xmm0, 48(%rsp) ## 16-byte Spill vmulss %xmm5, %xmm2, %xmm0 vmovaps %xmm0, (%rsp) ## 16-byte Spill vmulss %xmm3, %xmm4, %xmm0 vmovaps %xmm0, 32(%rsp) ## 16-byte Spill leal (%rax,%rax), %edi leal (%rax,%rax,2), %esi movl %ebp, %ecx sarl $31, %ecx shrl $29, %ecx addl %ebp, %ecx andl $-8, %ecx xorl %ebp, %ebp movq %rbp, -80(%rsp) ## 8-byte Spill movl %esi, -68(%rsp) ## 4-byte Spill movq %rsi, -104(%rsp) ## 8-byte Spill movl %ecx, 64(%rsp) ## 4-byte Spill testl %ecx, %ecx movl %edi, -72(%rsp) ## 4-byte Spill movl %edi, %esi movq %rsi, -88(%rsp) ## 8-byte Spill movq %rax, -96(%rsp) ## 8-byte Spill movl $0, %r8d jle LBB11_15 ## BB#1: ## %foreach_full_body.lr.ph vbroadcastss (%r12), %ymm0 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, -64(%rsp) ## 32-byte Spill vpermilps $0, 16(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 160(%rsp) ## 32-byte Spill vpermilps $0, 32(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 128(%rsp) ## 32-byte Spill vpermilps $0, 48(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmovups %ymm0, 96(%rsp) ## 32-byte Spill xorl %r14d, %r14d vxorps %ymm7, %ymm7, %ymm7 movl -68(%rsp), %eax ## 4-byte Reload movq %rax, -104(%rsp) ## 8-byte Spill movl -72(%rsp), %eax ## 4-byte Reload movq %rax, -88(%rsp) ## 8-byte Spill movl 312(%rsp), %eax movq %rax, -96(%rsp) ## 8-byte Spill xorl %eax, %eax movq %rax, -80(%rsp) ## 8-byte Spill xorl %r8d, %r8d .p2align 4, 0x90 LBB11_2: ## %foreach_full_body ## =>This Inner Loop Header: Depth=1 movslq %r14d, %rax movq 256(%rsp), %rcx vmovdqu (%rcx,%rax), %xmm12 vmovdqu 16(%rcx,%rax), %xmm8 vpslld $2, %xmm8, %xmm0 vpslld $2, %xmm12, %xmm1 vmovq %xmm1, %rax movslq %eax, %r11 vpextrq $1, %xmm1, %rsi movslq %esi, %rbx sarq $32, %rsi sarq $32, %rax vmovq %xmm0, %rbp movslq %ebp, %r9 vpextrq $1, %xmm0, %rdi movq %r13, %rdx movslq %edi, %r13 sarq $32, %rdi sarq $32, %rbp movq 272(%rsp), %rcx vmovss (%rcx,%r9), %xmm0 ## xmm0 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rbp), %xmm0, %xmm0 ## xmm0 = xmm0[0],mem[0],xmm0[2,3] vinsertps $32, (%rcx,%r13), %xmm0, %xmm0 ## xmm0 = xmm0[0,1],mem[0],xmm0[3] vinsertps $48, (%rcx,%rdi), %xmm0, %xmm10 ## xmm10 = xmm0[0,1,2],mem[0] vmovss (%rcx,%r11), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rax), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rcx,%rbx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rcx,%rsi), %xmm1, %xmm13 ## xmm13 = xmm1[0,1,2],mem[0] movq 280(%rsp), %r10 vmovss (%r10,%r9), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%r10,%rbp), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%r10,%r13), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%r10,%rdi), %xmm1, %xmm11 ## xmm11 = xmm1[0,1,2],mem[0] vmovss (%r10,%r11), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%r10,%rax), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%r10,%rbx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%r10,%rsi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] movq 288(%rsp), %rcx vmovss (%rcx,%r9), %xmm3 ## xmm3 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rbp), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertps $32, (%rcx,%r13), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] vinsertps $48, (%rcx,%rdi), %xmm3, %xmm3 ## xmm3 = xmm3[0,1,2],mem[0] vmovss (%rcx,%r11), %xmm4 ## xmm4 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rax), %xmm4, %xmm4 ## xmm4 = xmm4[0],mem[0],xmm4[2,3] vinsertps $32, (%rcx,%rbx), %xmm4, %xmm4 ## xmm4 = xmm4[0,1],mem[0],xmm4[3] vinsertps $48, (%rcx,%rsi), %xmm4, %xmm4 ## xmm4 = xmm4[0,1,2],mem[0] vinsertf128 $1, %xmm3, %ymm4, %ymm6 movq 296(%rsp), %rcx vmovss (%rcx,%r9), %xmm3 ## xmm3 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rbp), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertps $32, (%rcx,%r13), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] movq %rdx, %r13 vinsertps $48, (%rcx,%rdi), %xmm3, %xmm3 ## xmm3 = xmm3[0,1,2],mem[0] vmovss (%rcx,%r11), %xmm4 ## xmm4 = mem[0],zero,zero,zero vinsertps $16, (%rcx,%rax), %xmm4, %xmm4 ## xmm4 = xmm4[0],mem[0],xmm4[2,3] vinsertps $32, (%rcx,%rbx), %xmm4, %xmm4 ## xmm4 = xmm4[0,1],mem[0],xmm4[3] vinsertps $48, (%rcx,%rsi), %xmm4, %xmm4 ## xmm4 = xmm4[0,1,2],mem[0] vinsertf128 $1, %xmm3, %ymm4, %ymm9 vsubps %ymm9, %ymm7, %ymm5 vsubps -32(%rsp), %ymm6, %ymm7 ## 32-byte Folded Reload vcmpnltps %ymm5, %ymm7, %ymm3 vmovaps LCPI11_2(%rip), %ymm4 ## ymm4 = [1,1,1,1,1,1,1,1] vxorps %ymm4, %ymm3, %ymm14 vpslld $31, %xmm14, %xmm2 vpsrad $31, %xmm2, %xmm2 vextractf128 $1, %ymm14, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm2, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax je LBB11_4 ## BB#3: ## %eval_1 ## in Loop: Header=BB11_2 Depth=1 vcmpnltps %ymm5, %ymm7, %ymm0 vbroadcastss (%r13), %ymm2 vsubps %ymm6, %ymm2, %ymm2 vcmpnltps %ymm5, %ymm2, %ymm2 vandps %ymm2, %ymm0, %ymm3 LBB11_4: ## %logical_op_done ## in Loop: Header=BB11_2 Depth=1 vbroadcastss 4(%r12), %ymm0 vsubps %ymm0, %ymm6, %ymm7 vcmpnltps %ymm5, %ymm7, %ymm15 vxorps %ymm4, %ymm15, %ymm0 vpslld $31, %xmm0, %xmm2 vpsrad $31, %xmm2, %xmm2 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm2, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax je LBB11_6 ## BB#5: ## %eval_1128 ## in Loop: Header=BB11_2 Depth=1 vcmpnltps %ymm5, %ymm7, %ymm0 vbroadcastss 4(%r13), %ymm2 vsubps %ymm6, %ymm2, %ymm2 vcmpnltps %ymm5, %ymm2, %ymm2 vandps %ymm2, %ymm0, %ymm15 LBB11_6: ## %logical_op_done129 ## in Loop: Header=BB11_2 Depth=1 vbroadcastss 8(%r12), %ymm0 vsubps %ymm0, %ymm6, %ymm7 vcmpnltps %ymm5, %ymm7, %ymm14 vxorps %ymm4, %ymm14, %ymm0 vpslld $31, %xmm0, %xmm2 vpsrad $31, %xmm2, %xmm2 vextractf128 $1, %ymm0, %xmm0 vpslld $31, %xmm0, %xmm0 vpsrad $31, %xmm0, %xmm0 vinsertf128 $1, %xmm0, %ymm2, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax je LBB11_8 ## BB#7: ## %eval_1158 ## in Loop: Header=BB11_2 Depth=1 vcmpnltps %ymm5, %ymm7, %ymm0 vbroadcastss 8(%r13), %ymm2 vsubps %ymm6, %ymm2, %ymm2 vcmpnltps %ymm5, %ymm2, %ymm2 vandps %ymm2, %ymm0, %ymm14 LBB11_8: ## %logical_op_done159 ## in Loop: Header=BB11_2 Depth=1 vinsertf128 $1, %xmm10, %ymm13, %ymm7 vinsertf128 $1, %xmm11, %ymm1, %ymm0 vbroadcastss 12(%r12), %ymm1 vsubps %ymm1, %ymm6, %ymm1 vcmpnltps %ymm5, %ymm1, %ymm13 vxorps %ymm4, %ymm13, %ymm2 vpslld $31, %xmm2, %xmm4 vpsrad $31, %xmm4, %xmm4 vextractf128 $1, %ymm2, %xmm2 vpslld $31, %xmm2, %xmm2 vpsrad $31, %xmm2, %xmm2 vinsertf128 $1, %xmm2, %ymm4, %ymm2 vmovmskps %ymm2, %eax cmpl $255, %eax je LBB11_10 ## BB#9: ## %eval_1188 ## in Loop: Header=BB11_2 Depth=1 vcmpnltps %ymm5, %ymm1, %ymm1 vbroadcastss 12(%r13), %ymm2 vsubps %ymm6, %ymm2, %ymm2 vcmpnltps %ymm5, %ymm2, %ymm2 vandps %ymm2, %ymm1, %ymm13 LBB11_10: ## %logical_op_done189 ## in Loop: Header=BB11_2 Depth=1 vmulps -64(%rsp), %ymm6, %ymm1 ## 32-byte Folded Reload vmulps 160(%rsp), %ymm7, %ymm2 ## 32-byte Folded Reload vaddps %ymm1, %ymm2, %ymm1 vmulps 128(%rsp), %ymm6, %ymm2 ## 32-byte Folded Reload vmulps 96(%rsp), %ymm0, %ymm0 ## 32-byte Folded Reload vaddps %ymm2, %ymm0, %ymm0 vmovaps LCPI11_3(%rip), %ymm4 ## ymm4 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647] vandps %ymm4, %ymm1, %ymm2 vcmpnleps %ymm9, %ymm2, %ymm5 vmovmskps %ymm5, %eax vxorps %ymm7, %ymm7, %ymm7 cmpl $255, %eax je LBB11_18 ## BB#11: ## %logical_op_done189 ## in Loop: Header=BB11_2 Depth=1 testl %eax, %eax je LBB11_12 ## BB#19: ## %cif_test_mixed ## in Loop: Header=BB11_2 Depth=1 vcmpnleps %ymm7, %ymm1, %ymm1 vblendvps %ymm1, %ymm3, %ymm7, %ymm2 vblendvps %ymm5, %ymm2, %ymm3, %ymm3 vblendvps %ymm1, %ymm7, %ymm15, %ymm2 vblendvps %ymm5, %ymm2, %ymm15, %ymm15 vblendvps %ymm1, %ymm14, %ymm7, %ymm2 vblendvps %ymm5, %ymm2, %ymm14, %ymm14 vblendvps %ymm1, %ymm7, %ymm13, %ymm1 vblendvps %ymm5, %ymm1, %ymm13, %ymm13 jmp LBB11_12 .p2align 4, 0x90 LBB11_18: ## %cif_test_all ## in Loop: Header=BB11_2 Depth=1 vcmpnleps %ymm7, %ymm1, %ymm2 vandps %ymm3, %ymm2, %ymm3 vcmpleps %ymm7, %ymm1, %ymm1 vandps %ymm15, %ymm1, %ymm15 vandps %ymm14, %ymm2, %ymm14 vandps %ymm13, %ymm1, %ymm13 LBB11_12: ## %cif_done ## in Loop: Header=BB11_2 Depth=1 vandps %ymm4, %ymm0, %ymm1 vcmpnleps %ymm9, %ymm1, %ymm1 vmovmskps %ymm1, %eax testl %eax, %eax je LBB11_21 ## BB#13: ## %cif_done ## in Loop: Header=BB11_2 Depth=1 cmpl $255, %eax jne LBB11_20 ## BB#14: ## %cif_test_all378 ## in Loop: Header=BB11_2 Depth=1 vcmpnleps %ymm7, %ymm0, %ymm1 vandps %ymm3, %ymm1, %ymm3 vandps %ymm15, %ymm1, %ymm15 vcmpleps %ymm7, %ymm0, %ymm0 vandps %ymm14, %ymm0, %ymm14 vandps %ymm13, %ymm0, %ymm13 jmp LBB11_21 .p2align 4, 0x90 LBB11_20: ## %cif_test_mixed416 ## in Loop: Header=BB11_2 Depth=1 vcmpnleps %ymm7, %ymm0, %ymm0 vblendvps %ymm0, %ymm3, %ymm7, %ymm2 vblendvps %ymm1, %ymm2, %ymm3, %ymm3 vblendvps %ymm0, %ymm15, %ymm7, %ymm2 vblendvps %ymm1, %ymm2, %ymm15, %ymm15 vblendvps %ymm0, %ymm7, %ymm14, %ymm2 vblendvps %ymm1, %ymm2, %ymm14, %ymm14 vblendvps %ymm0, %ymm7, %ymm13, %ymm0 vblendvps %ymm1, %ymm0, %ymm13, %ymm13 LBB11_21: ## %cif_done371 ## in Loop: Header=BB11_2 Depth=1 vinsertf128 $1, %xmm8, %ymm12, %ymm12 vmovmskps %ymm3, %edi testl %edi, %edi je LBB11_24 ## BB#22: ## %cif_done371 ## in Loop: Header=BB11_2 Depth=1 cmpl $255, %edi jne LBB11_34 ## BB#23: ## %packed_store_active___un_3C_uni_3E_vyi.exit6452 ## in Loop: Header=BB11_2 Depth=1 movq -80(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r15,%rax,4) vmovups %xmm12, (%r15,%rax,4) addl $8, %eax movq %rax, -80(%rsp) ## 8-byte Spill jmp LBB11_24 .p2align 4, 0x90 LBB11_34: ## %cif_test_mixed556 ## in Loop: Header=BB11_2 Depth=1 movq -80(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rsi xorl %r11d, %r11d testb $1, %dil je LBB11_36 ## BB#35: ## %store.i.i6498 ## in Loop: Header=BB11_2 Depth=1 vmovd %xmm12, (%rsi) movl $1, %r11d LBB11_36: ## %loopend.i.i6503 ## in Loop: Header=BB11_2 Depth=1 testb $2, %dil je LBB11_38 ## BB#37: ## %store.i.i6498.1 ## in Loop: Header=BB11_2 Depth=1 vpextrd $1, %xmm12, (%rsi,%r11,4) incl %r11d LBB11_38: ## %loopend.i.i6503.1 ## in Loop: Header=BB11_2 Depth=1 testb $4, %dil je LBB11_40 ## BB#39: ## %store.i.i6498.2 ## in Loop: Header=BB11_2 Depth=1 movslq %r11d, %r11 vpextrd $2, %xmm12, (%rsi,%r11,4) incl %r11d LBB11_40: ## %loopend.i.i6503.2 ## in Loop: Header=BB11_2 Depth=1 testb $8, %dil je LBB11_42 ## BB#41: ## %store.i.i6498.3 ## in Loop: Header=BB11_2 Depth=1 movslq %r11d, %r11 vpextrd $3, %xmm12, (%rsi,%r11,4) incl %r11d LBB11_42: ## %loopend.i.i6503.3 ## in Loop: Header=BB11_2 Depth=1 testb $16, %dil je LBB11_44 ## BB#43: ## %store.i.i6498.4 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r11d, %r11 vmovd %xmm0, (%rsi,%r11,4) incl %r11d LBB11_44: ## %loopend.i.i6503.4 ## in Loop: Header=BB11_2 Depth=1 testb $32, %dil je LBB11_46 ## BB#45: ## %store.i.i6498.5 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r11d, %r11 vpextrd $1, %xmm0, (%rsi,%r11,4) incl %r11d LBB11_46: ## %loopend.i.i6503.5 ## in Loop: Header=BB11_2 Depth=1 testb $64, %dil je LBB11_48 ## BB#47: ## %store.i.i6498.6 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r11d, %r11 vpextrd $2, %xmm0, (%rsi,%r11,4) incl %r11d LBB11_48: ## %loopend.i.i6503.6 ## in Loop: Header=BB11_2 Depth=1 testb %dil, %dil jns LBB11_50 ## BB#49: ## %store.i.i6498.7 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %r11d, %r11 vpextrd $3, %xmm0, (%rsi,%r11,4) incl %r11d LBB11_50: ## %loopend.i.i6503.7 ## in Loop: Header=BB11_2 Depth=1 movq -80(%rsp), %rax ## 8-byte Reload addl %eax, %r11d movq %r11, -80(%rsp) ## 8-byte Spill LBB11_24: ## %cif_done526 ## in Loop: Header=BB11_2 Depth=1 vmovmskps %ymm15, %esi testl %esi, %esi je LBB11_27 ## BB#25: ## %cif_done526 ## in Loop: Header=BB11_2 Depth=1 cmpl $255, %esi jne LBB11_51 ## BB#26: ## %packed_store_active___un_3C_uni_3E_vyi.exit6559 ## in Loop: Header=BB11_2 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r15,%rax,4) vmovups %xmm12, (%r15,%rax,4) addl $8, %eax movq %rax, -96(%rsp) ## 8-byte Spill jmp LBB11_27 .p2align 4, 0x90 LBB11_51: ## %cif_test_mixed665 ## in Loop: Header=BB11_2 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rax xorl %edi, %edi testb $1, %sil je LBB11_53 ## BB#52: ## %store.i.i6611 ## in Loop: Header=BB11_2 Depth=1 vmovd %xmm12, (%rax) movl $1, %edi LBB11_53: ## %loopend.i.i6616 ## in Loop: Header=BB11_2 Depth=1 testb $2, %sil je LBB11_55 ## BB#54: ## %store.i.i6611.1 ## in Loop: Header=BB11_2 Depth=1 vpextrd $1, %xmm12, (%rax,%rdi,4) incl %edi LBB11_55: ## %loopend.i.i6616.1 ## in Loop: Header=BB11_2 Depth=1 testb $4, %sil je LBB11_57 ## BB#56: ## %store.i.i6611.2 ## in Loop: Header=BB11_2 Depth=1 movslq %edi, %rdi vpextrd $2, %xmm12, (%rax,%rdi,4) incl %edi LBB11_57: ## %loopend.i.i6616.2 ## in Loop: Header=BB11_2 Depth=1 testb $8, %sil je LBB11_59 ## BB#58: ## %store.i.i6611.3 ## in Loop: Header=BB11_2 Depth=1 movslq %edi, %rdi vpextrd $3, %xmm12, (%rax,%rdi,4) incl %edi LBB11_59: ## %loopend.i.i6616.3 ## in Loop: Header=BB11_2 Depth=1 testb $16, %sil je LBB11_61 ## BB#60: ## %store.i.i6611.4 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vmovd %xmm0, (%rax,%rdi,4) incl %edi LBB11_61: ## %loopend.i.i6616.4 ## in Loop: Header=BB11_2 Depth=1 testb $32, %sil je LBB11_63 ## BB#62: ## %store.i.i6611.5 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $1, %xmm0, (%rax,%rdi,4) incl %edi LBB11_63: ## %loopend.i.i6616.5 ## in Loop: Header=BB11_2 Depth=1 testb $64, %sil je LBB11_65 ## BB#64: ## %store.i.i6611.6 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $2, %xmm0, (%rax,%rdi,4) incl %edi LBB11_65: ## %loopend.i.i6616.6 ## in Loop: Header=BB11_2 Depth=1 testb %sil, %sil jns LBB11_67 ## BB#66: ## %store.i.i6611.7 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $3, %xmm0, (%rax,%rdi,4) incl %edi LBB11_67: ## %loopend.i.i6616.7 ## in Loop: Header=BB11_2 Depth=1 movq -96(%rsp), %rax ## 8-byte Reload addl %eax, %edi movq %rdi, -96(%rsp) ## 8-byte Spill LBB11_27: ## %cif_done634 ## in Loop: Header=BB11_2 Depth=1 vmovmskps %ymm14, %esi testl %esi, %esi je LBB11_30 ## BB#28: ## %cif_done634 ## in Loop: Header=BB11_2 Depth=1 cmpl $255, %esi jne LBB11_68 ## BB#29: ## %packed_store_active___un_3C_uni_3E_vyi.exit6670 ## in Loop: Header=BB11_2 Depth=1 movq -88(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r15,%rax,4) vmovups %xmm12, (%r15,%rax,4) addl $8, %eax movq %rax, -88(%rsp) ## 8-byte Spill jmp LBB11_30 .p2align 4, 0x90 LBB11_68: ## %cif_test_mixed774 ## in Loop: Header=BB11_2 Depth=1 movq -88(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rax xorl %edi, %edi testb $1, %sil je LBB11_70 ## BB#69: ## %store.i.i6718 ## in Loop: Header=BB11_2 Depth=1 vmovd %xmm12, (%rax) movl $1, %edi LBB11_70: ## %loopend.i.i6723 ## in Loop: Header=BB11_2 Depth=1 testb $2, %sil je LBB11_72 ## BB#71: ## %store.i.i6718.1 ## in Loop: Header=BB11_2 Depth=1 vpextrd $1, %xmm12, (%rax,%rdi,4) incl %edi LBB11_72: ## %loopend.i.i6723.1 ## in Loop: Header=BB11_2 Depth=1 testb $4, %sil je LBB11_74 ## BB#73: ## %store.i.i6718.2 ## in Loop: Header=BB11_2 Depth=1 movslq %edi, %rdi vpextrd $2, %xmm12, (%rax,%rdi,4) incl %edi LBB11_74: ## %loopend.i.i6723.2 ## in Loop: Header=BB11_2 Depth=1 testb $8, %sil je LBB11_76 ## BB#75: ## %store.i.i6718.3 ## in Loop: Header=BB11_2 Depth=1 movslq %edi, %rdi vpextrd $3, %xmm12, (%rax,%rdi,4) incl %edi LBB11_76: ## %loopend.i.i6723.3 ## in Loop: Header=BB11_2 Depth=1 testb $16, %sil je LBB11_78 ## BB#77: ## %store.i.i6718.4 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vmovd %xmm0, (%rax,%rdi,4) incl %edi LBB11_78: ## %loopend.i.i6723.4 ## in Loop: Header=BB11_2 Depth=1 testb $32, %sil je LBB11_80 ## BB#79: ## %store.i.i6718.5 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $1, %xmm0, (%rax,%rdi,4) incl %edi LBB11_80: ## %loopend.i.i6723.5 ## in Loop: Header=BB11_2 Depth=1 testb $64, %sil je LBB11_82 ## BB#81: ## %store.i.i6718.6 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $2, %xmm0, (%rax,%rdi,4) incl %edi LBB11_82: ## %loopend.i.i6723.6 ## in Loop: Header=BB11_2 Depth=1 testb %sil, %sil jns LBB11_84 ## BB#83: ## %store.i.i6718.7 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $3, %xmm0, (%rax,%rdi,4) incl %edi LBB11_84: ## %loopend.i.i6723.7 ## in Loop: Header=BB11_2 Depth=1 movq -88(%rsp), %rax ## 8-byte Reload addl %eax, %edi movq %rdi, -88(%rsp) ## 8-byte Spill LBB11_30: ## %cif_done743 ## in Loop: Header=BB11_2 Depth=1 vmovmskps %ymm13, %esi testl %esi, %esi je LBB11_33 ## BB#31: ## %cif_done743 ## in Loop: Header=BB11_2 Depth=1 cmpl $255, %esi jne LBB11_85 ## BB#32: ## %packed_store_active___un_3C_uni_3E_vyi.exit6749 ## in Loop: Header=BB11_2 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm12, 16(%r15,%rax,4) vmovups %xmm12, (%r15,%rax,4) addl $8, %eax movq %rax, -104(%rsp) ## 8-byte Spill jmp LBB11_33 .p2align 4, 0x90 LBB11_85: ## %cif_test_mixed883 ## in Loop: Header=BB11_2 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rax xorl %edi, %edi testb $1, %sil je LBB11_87 ## BB#86: ## %store.i.i6801 ## in Loop: Header=BB11_2 Depth=1 vmovd %xmm12, (%rax) movl $1, %edi LBB11_87: ## %loopend.i.i6806 ## in Loop: Header=BB11_2 Depth=1 testb $2, %sil je LBB11_89 ## BB#88: ## %store.i.i6801.1 ## in Loop: Header=BB11_2 Depth=1 vpextrd $1, %xmm12, (%rax,%rdi,4) incl %edi LBB11_89: ## %loopend.i.i6806.1 ## in Loop: Header=BB11_2 Depth=1 testb $4, %sil je LBB11_91 ## BB#90: ## %store.i.i6801.2 ## in Loop: Header=BB11_2 Depth=1 movslq %edi, %rdi vpextrd $2, %xmm12, (%rax,%rdi,4) incl %edi LBB11_91: ## %loopend.i.i6806.2 ## in Loop: Header=BB11_2 Depth=1 testb $8, %sil je LBB11_93 ## BB#92: ## %store.i.i6801.3 ## in Loop: Header=BB11_2 Depth=1 movslq %edi, %rdi vpextrd $3, %xmm12, (%rax,%rdi,4) incl %edi LBB11_93: ## %loopend.i.i6806.3 ## in Loop: Header=BB11_2 Depth=1 testb $16, %sil je LBB11_95 ## BB#94: ## %store.i.i6801.4 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vmovd %xmm0, (%rax,%rdi,4) incl %edi LBB11_95: ## %loopend.i.i6806.4 ## in Loop: Header=BB11_2 Depth=1 testb $32, %sil je LBB11_97 ## BB#96: ## %store.i.i6801.5 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $1, %xmm0, (%rax,%rdi,4) incl %edi LBB11_97: ## %loopend.i.i6806.5 ## in Loop: Header=BB11_2 Depth=1 testb $64, %sil je LBB11_99 ## BB#98: ## %store.i.i6801.6 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $2, %xmm0, (%rax,%rdi,4) incl %edi LBB11_99: ## %loopend.i.i6806.6 ## in Loop: Header=BB11_2 Depth=1 testb %sil, %sil jns LBB11_101 ## BB#100: ## %store.i.i6801.7 ## in Loop: Header=BB11_2 Depth=1 vextractf128 $1, %ymm12, %xmm0 movslq %edi, %rdi vpextrd $3, %xmm0, (%rax,%rdi,4) incl %edi LBB11_101: ## %loopend.i.i6806.7 ## in Loop: Header=BB11_2 Depth=1 movq -104(%rsp), %rax ## 8-byte Reload addl %eax, %edi movq %rdi, -104(%rsp) ## 8-byte Spill LBB11_33: ## %cif_done852 ## in Loop: Header=BB11_2 Depth=1 addl $8, %r8d addl $32, %r14d cmpl 64(%rsp), %r8d ## 4-byte Folded Reload jl LBB11_2 LBB11_15: ## %partial_inner_all_outer movq 320(%rsp), %r10 movl 264(%rsp), %eax cmpl %eax, %r8d jge LBB11_216 ## BB#16: ## %partial_inner_only vmovd %r8d, %xmm0 vpshufd $0, %xmm0, %xmm0 ## xmm0 = xmm0[0,0,0,0] vpaddd LCPI11_4(%rip), %xmm0, %xmm2 vpaddd LCPI11_5(%rip), %xmm0, %xmm10 vmovd %eax, %xmm1 vpshufd $0, %xmm1, %xmm8 ## xmm8 = xmm1[0,0,0,0] vpcmpgtd %xmm10, %xmm8, %xmm6 vmovdqu %ymm2, 64(%rsp) ## 32-byte Spill vpcmpgtd %xmm2, %xmm8, %xmm5 vinsertf128 $1, %xmm6, %ymm5, %ymm2 shll $2, %r8d movslq %r8d, %rax movq 256(%rsp), %rcx vmaskmovps (%rcx,%rax), %ymm2, %ymm0 vpslld $2, %xmm0, %xmm1 vmovups %ymm0, -32(%rsp) ## 32-byte Spill vextractf128 $1, %ymm0, %xmm0 vmovaps %xmm0, -64(%rsp) ## 16-byte Spill vpslld $2, %xmm0, %xmm3 vinsertf128 $1, %xmm3, %ymm1, %ymm3 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm2, %ymm3, %ymm1, %ymm3 vpextrq $1, %xmm3, %rax movslq %eax, %r8 sarq $32, %rax vmovq %xmm3, %rsi movslq %esi, %r14 sarq $32, %rsi vextractf128 $1, %ymm3, %xmm3 vpextrq $1, %xmm3, %rdi movslq %edi, %rcx sarq $32, %rdi vmovq %xmm3, %r11 movslq %r11d, %r9 sarq $32, %r11 movq 288(%rsp), %rbp vmovss (%rbp,%r9), %xmm3 ## xmm3 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%r11), %xmm3, %xmm3 ## xmm3 = xmm3[0],mem[0],xmm3[2,3] vinsertps $32, (%rbp,%rcx), %xmm3, %xmm3 ## xmm3 = xmm3[0,1],mem[0],xmm3[3] vinsertps $48, (%rbp,%rdi), %xmm3, %xmm3 ## xmm3 = xmm3[0,1,2],mem[0] vmovss (%rbp,%r14), %xmm4 ## xmm4 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rsi), %xmm4, %xmm4 ## xmm4 = xmm4[0],mem[0],xmm4[2,3] vinsertps $32, (%rbp,%r8), %xmm4, %xmm4 ## xmm4 = xmm4[0,1],mem[0],xmm4[3] vinsertps $48, (%rbp,%rax), %xmm4, %xmm4 ## xmm4 = xmm4[0,1,2],mem[0] vinsertf128 $1, %xmm3, %ymm4, %ymm3 movq 296(%rsp), %rbp vmovss (%rbp,%r9), %xmm4 ## xmm4 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%r11), %xmm4, %xmm4 ## xmm4 = xmm4[0],mem[0],xmm4[2,3] vinsertps $32, (%rbp,%rcx), %xmm4, %xmm4 ## xmm4 = xmm4[0,1],mem[0],xmm4[3] vinsertps $48, (%rbp,%rdi), %xmm4, %xmm4 ## xmm4 = xmm4[0,1,2],mem[0] vmovss (%rbp,%r14), %xmm7 ## xmm7 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rsi), %xmm7, %xmm7 ## xmm7 = xmm7[0],mem[0],xmm7[2,3] vinsertps $32, (%rbp,%r8), %xmm7, %xmm7 ## xmm7 = xmm7[0,1],mem[0],xmm7[3] vinsertps $48, (%rbp,%rax), %xmm7, %xmm7 ## xmm7 = xmm7[0,1,2],mem[0] vinsertf128 $1, %xmm4, %ymm7, %ymm12 vsubps %ymm12, %ymm1, %ymm4 vbroadcastss (%r12), %ymm1 vsubps %ymm1, %ymm3, %ymm9 vcmpltps %ymm4, %ymm9, %ymm1 vandps %ymm2, %ymm1, %ymm1 vextractf128 $1, %ymm1, %xmm7 vpcmpeqd %xmm6, %xmm7, %xmm7 movq 272(%rsp), %rbp vmovss (%rbp,%r9), %xmm6 ## xmm6 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%r11), %xmm6, %xmm6 ## xmm6 = xmm6[0],mem[0],xmm6[2,3] vinsertps $32, (%rbp,%rcx), %xmm6, %xmm6 ## xmm6 = xmm6[0,1],mem[0],xmm6[3] vinsertps $48, (%rbp,%rdi), %xmm6, %xmm6 ## xmm6 = xmm6[0,1,2],mem[0] vpcmpeqd %xmm5, %xmm1, %xmm1 vmovss (%rbp,%r14), %xmm5 ## xmm5 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rsi), %xmm5, %xmm5 ## xmm5 = xmm5[0],mem[0],xmm5[2,3] vinsertps $32, (%rbp,%r8), %xmm5, %xmm5 ## xmm5 = xmm5[0,1],mem[0],xmm5[3] vinsertps $48, (%rbp,%rax), %xmm5, %xmm5 ## xmm5 = xmm5[0,1,2],mem[0] vinsertf128 $1, %xmm7, %ymm1, %ymm7 movq 280(%rsp), %rbp vmovss (%rbp,%r9), %xmm1 ## xmm1 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%r11), %xmm1, %xmm1 ## xmm1 = xmm1[0],mem[0],xmm1[2,3] vinsertps $32, (%rbp,%rcx), %xmm1, %xmm1 ## xmm1 = xmm1[0,1],mem[0],xmm1[3] vinsertps $48, (%rbp,%rdi), %xmm1, %xmm1 ## xmm1 = xmm1[0,1,2],mem[0] vmovmskps %ymm7, %edi vmovss (%rbp,%r14), %xmm7 ## xmm7 = mem[0],zero,zero,zero vinsertps $16, (%rbp,%rsi), %xmm7, %xmm7 ## xmm7 = xmm7[0],mem[0],xmm7[2,3] vinsertps $32, (%rbp,%r8), %xmm7, %xmm7 ## xmm7 = xmm7[0,1],mem[0],xmm7[3] vinsertps $48, (%rbp,%rax), %xmm7, %xmm15 ## xmm15 = xmm7[0,1,2],mem[0] cmpl $255, %edi jne LBB11_102 ## BB#17: vcmpnltps %ymm4, %ymm9, %ymm11 jmp LBB11_103 LBB11_102: ## %eval_11012 vcmpnltps %ymm4, %ymm9, %ymm7 vbroadcastss (%r13), %ymm9 vsubps %ymm3, %ymm9, %ymm9 vcmpnltps %ymm4, %ymm9, %ymm9 vandps %ymm7, %ymm9, %ymm7 vandps %ymm2, %ymm7, %ymm11 LBB11_103: ## %logical_op_done1013 vbroadcastss 4(%r12), %ymm7 vsubps %ymm7, %ymm3, %ymm9 vcmpltps %ymm4, %ymm9, %ymm7 vandps %ymm2, %ymm7, %ymm7 vextractf128 $1, %ymm7, %xmm0 vextractf128 $1, %ymm2, %xmm13 vpcmpeqd %xmm13, %xmm0, %xmm0 vpcmpeqd %xmm2, %xmm7, %xmm7 vinsertf128 $1, %xmm0, %ymm7, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax jne LBB11_105 ## BB#104: vcmpnltps %ymm4, %ymm9, %ymm14 jmp LBB11_106 LBB11_105: ## %eval_11042 vcmpnltps %ymm4, %ymm9, %ymm0 vbroadcastss 4(%r13), %ymm7 vsubps %ymm3, %ymm7, %ymm7 vcmpnltps %ymm4, %ymm7, %ymm7 vandps %ymm0, %ymm7, %ymm0 vandps %ymm2, %ymm0, %ymm14 LBB11_106: ## %logical_op_done1043 vbroadcastss 8(%r12), %ymm0 vsubps %ymm0, %ymm3, %ymm9 vcmpltps %ymm4, %ymm9, %ymm0 vandps %ymm2, %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm7 vpcmpeqd %xmm13, %xmm7, %xmm7 vpcmpeqd %xmm2, %xmm0, %xmm0 vinsertf128 $1, %xmm7, %ymm0, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax jne LBB11_108 ## BB#107: vcmpnltps %ymm4, %ymm9, %ymm9 jmp LBB11_109 LBB11_108: ## %eval_11072 vcmpnltps %ymm4, %ymm9, %ymm0 vbroadcastss 8(%r13), %ymm7 vsubps %ymm3, %ymm7, %ymm7 vcmpnltps %ymm4, %ymm7, %ymm7 vandps %ymm0, %ymm7, %ymm0 vandps %ymm2, %ymm0, %ymm9 LBB11_109: ## %logical_op_done1073 vinsertf128 $1, %xmm6, %ymm5, %ymm6 vinsertf128 $1, %xmm1, %ymm15, %ymm1 vbroadcastss 12(%r12), %ymm0 vsubps %ymm0, %ymm3, %ymm5 vcmpltps %ymm4, %ymm5, %ymm0 vandps %ymm2, %ymm0, %ymm0 vextractf128 $1, %ymm0, %xmm7 vpcmpeqd %xmm13, %xmm7, %xmm7 vpcmpeqd %xmm2, %xmm0, %xmm0 vinsertf128 $1, %xmm7, %ymm0, %ymm0 vmovmskps %ymm0, %eax cmpl $255, %eax jne LBB11_111 ## BB#110: vcmpnltps %ymm4, %ymm5, %ymm5 jmp LBB11_112 LBB11_111: ## %eval_11102 vcmpnltps %ymm4, %ymm5, %ymm0 vbroadcastss 12(%r13), %ymm5 vsubps %ymm3, %ymm5, %ymm5 vcmpnltps %ymm4, %ymm5, %ymm4 vandps %ymm0, %ymm4, %ymm0 vandps %ymm2, %ymm0, %ymm5 LBB11_112: ## %logical_op_done1103 vmovups 64(%rsp), %ymm0 ## 32-byte Reload vinsertf128 $1, %xmm10, %ymm0, %ymm10 vinsertf128 $1, %xmm8, %ymm8, %ymm8 vpermilps $0, (%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmulps %ymm3, %ymm0, %ymm0 vpermilps $0, 16(%rsp), %xmm4 ## 16-byte Folded Reload ## xmm4 = mem[0,0,0,0] vinsertf128 $1, %xmm4, %ymm4, %ymm4 vmulps %ymm6, %ymm4, %ymm4 vaddps %ymm0, %ymm4, %ymm4 vpermilps $0, 32(%rsp), %xmm0 ## 16-byte Folded Reload ## xmm0 = mem[0,0,0,0] vinsertf128 $1, %xmm0, %ymm0, %ymm0 vmulps %ymm3, %ymm0, %ymm0 vpermilps $0, 48(%rsp), %xmm3 ## 16-byte Folded Reload ## xmm3 = mem[0,0,0,0] vinsertf128 $1, %xmm3, %ymm3, %ymm3 vmulps %ymm1, %ymm3, %ymm1 vaddps %ymm0, %ymm1, %ymm0 vandps LCPI11_3(%rip), %ymm4, %ymm1 vmovmskps %ymm2, %ecx cmpl $255, %ecx jne LBB11_116 ## BB#113: ## %cif_mask_all1149 vcmpnleps %ymm12, %ymm1, %ymm1 vmovmskps %ymm1, %eax testl %eax, %eax je LBB11_140 ## BB#114: ## %cif_mask_all1149 cmpl $255, %eax jne LBB11_139 ## BB#115: ## %cif_test_all1158 vxorps %ymm1, %ymm1, %ymm1 vcmpnleps %ymm1, %ymm4, %ymm2 vandps %ymm11, %ymm2, %ymm11 vcmpleps %ymm1, %ymm4, %ymm1 vandps %ymm14, %ymm1, %ymm14 vandps %ymm9, %ymm2, %ymm9 vandps %ymm5, %ymm1, %ymm5 jmp LBB11_140 LBB11_116: ## %cif_mask_mixed1150 vcmpnleps %ymm12, %ymm1, %ymm3 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm3, %ymm2, %ymm1, %ymm3 vmovmskps %ymm3, %eax testl %eax, %eax je LBB11_118 ## BB#117: ## %safe_if_run_true1253 vcmpnleps %ymm1, %ymm4, %ymm4 vblendvps %ymm4, %ymm11, %ymm1, %ymm6 vblendvps %ymm3, %ymm6, %ymm11, %ymm11 vblendvps %ymm4, %ymm1, %ymm14, %ymm6 vblendvps %ymm3, %ymm6, %ymm14, %ymm14 vblendvps %ymm4, %ymm9, %ymm1, %ymm6 vblendvps %ymm3, %ymm6, %ymm9, %ymm9 vblendvps %ymm4, %ymm1, %ymm5, %ymm4 vblendvps %ymm3, %ymm4, %ymm5, %ymm5 LBB11_118: ## %cif_mask_mixed1310 vmovdqu -32(%rsp), %ymm4 ## 32-byte Reload vandps LCPI11_3(%rip), %ymm0, %ymm3 vcmpnleps %ymm12, %ymm3, %ymm3 vblendvps %ymm3, %ymm2, %ymm1, %ymm1 vmovmskps %ymm1, %eax testl %eax, %eax je LBB11_120 ## BB#119: ## %safe_if_run_true1413 vxorps %ymm2, %ymm2, %ymm2 vcmpnleps %ymm2, %ymm0, %ymm0 vblendvps %ymm0, %ymm11, %ymm2, %ymm3 vblendvps %ymm1, %ymm3, %ymm11, %ymm11 vblendvps %ymm0, %ymm14, %ymm2, %ymm3 vblendvps %ymm1, %ymm3, %ymm14, %ymm14 vblendvps %ymm0, %ymm2, %ymm9, %ymm3 vblendvps %ymm1, %ymm3, %ymm9, %ymm9 vblendvps %ymm0, %ymm2, %ymm5, %ymm0 vblendvps %ymm1, %ymm0, %ymm5, %ymm5 LBB11_120: ## %cif_mask_mixed1467 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm11, %ymm1, %ymm0 vmovmskps %ymm0, %edx testl %edx, %edx je LBB11_121 ## BB#176: ## %loop.i.i6889.preheader movq -80(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rdi xorl %eax, %eax testb $1, %dl je LBB11_178 ## BB#177: ## %store.i.i6893 vmovd %xmm4, (%rdi) movl $1, %eax LBB11_178: ## %loopend.i.i6898 testb $2, %dl je LBB11_180 ## BB#179: ## %store.i.i6893.1 vpextrd $1, %xmm4, (%rdi,%rax,4) incl %eax LBB11_180: ## %loopend.i.i6898.1 vmovdqa -64(%rsp), %xmm2 ## 16-byte Reload jmp LBB11_164 LBB11_121: vmovdqa -64(%rsp), %xmm2 ## 16-byte Reload jmp LBB11_122 LBB11_139: ## %cif_test_mixed1198 vxorps %ymm2, %ymm2, %ymm2 vcmpnleps %ymm2, %ymm4, %ymm3 vblendvps %ymm3, %ymm11, %ymm2, %ymm4 vblendvps %ymm1, %ymm4, %ymm11, %ymm11 vblendvps %ymm3, %ymm2, %ymm14, %ymm4 vblendvps %ymm1, %ymm4, %ymm14, %ymm14 vblendvps %ymm3, %ymm9, %ymm2, %ymm4 vblendvps %ymm1, %ymm4, %ymm9, %ymm9 vblendvps %ymm3, %ymm2, %ymm5, %ymm2 vblendvps %ymm1, %ymm2, %ymm5, %ymm5 LBB11_140: ## %cif_mask_all1309 vandps LCPI11_3(%rip), %ymm0, %ymm1 vcmpnleps %ymm12, %ymm1, %ymm1 vmovmskps %ymm1, %eax testl %eax, %eax vmovdqu -32(%rsp), %ymm4 ## 32-byte Reload je LBB11_144 ## BB#141: ## %cif_mask_all1309 cmpl $255, %eax jne LBB11_143 ## BB#142: ## %cif_test_all1318 vxorps %ymm1, %ymm1, %ymm1 vcmpnleps %ymm1, %ymm0, %ymm2 vandps %ymm11, %ymm2, %ymm11 vandps %ymm14, %ymm2, %ymm14 vcmpleps %ymm1, %ymm0, %ymm0 vandps %ymm9, %ymm0, %ymm9 vandps %ymm5, %ymm0, %ymm5 jmp LBB11_144 LBB11_143: ## %cif_test_mixed1358 vxorps %ymm2, %ymm2, %ymm2 vcmpnleps %ymm2, %ymm0, %ymm0 vblendvps %ymm0, %ymm11, %ymm2, %ymm3 vblendvps %ymm1, %ymm3, %ymm11, %ymm11 vblendvps %ymm0, %ymm14, %ymm2, %ymm3 vblendvps %ymm1, %ymm3, %ymm14, %ymm14 vblendvps %ymm0, %ymm2, %ymm9, %ymm3 vblendvps %ymm1, %ymm3, %ymm9, %ymm9 vblendvps %ymm0, %ymm2, %ymm5, %ymm0 vblendvps %ymm1, %ymm0, %ymm5, %ymm5 LBB11_144: ## %cif_mask_all1466 vmovmskps %ymm11, %edx testl %edx, %edx je LBB11_145 ## BB#146: ## %cif_mask_all1466 cmpl $255, %edx vmovdqa -64(%rsp), %xmm2 ## 16-byte Reload jne LBB11_160 ## BB#147: ## %all_on.i.i6941 movq -80(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm4, 16(%r15,%rax,4) vmovdqu %xmm4, (%r15,%rax,4) movl $8, %eax jmp LBB11_148 LBB11_145: vmovdqa -64(%rsp), %xmm2 ## 16-byte Reload jmp LBB11_149 LBB11_160: ## %cif_test_mixed1499 movq -80(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rdi xorl %eax, %eax testb $1, %dl je LBB11_162 ## BB#161: ## %store.i.i6921 vmovd %xmm4, (%rdi) movl $1, %eax LBB11_162: ## %loopend.i.i6926 testb $2, %dl je LBB11_164 ## BB#163: ## %store.i.i6921.1 vpextrd $1, %xmm4, (%rdi,%rax,4) incl %eax LBB11_164: ## %loopend.i.i6926.1 testb $4, %dl je LBB11_166 ## BB#165: ## %store.i.i6921.2 cltq vpextrd $2, %xmm4, (%rdi,%rax,4) incl %eax LBB11_166: ## %loopend.i.i6926.2 testb $8, %dl je LBB11_168 ## BB#167: ## %store.i.i6921.3 cltq vpextrd $3, %xmm4, (%rdi,%rax,4) incl %eax LBB11_168: ## %loopend.i.i6926.3 testb $16, %dl je LBB11_170 ## BB#169: ## %store.i.i6921.4 cltq vmovd %xmm2, (%rdi,%rax,4) incl %eax LBB11_170: ## %loopend.i.i6926.4 testb $32, %dl je LBB11_172 ## BB#171: ## %store.i.i6921.5 cltq vpextrd $1, %xmm2, (%rdi,%rax,4) incl %eax LBB11_172: ## %loopend.i.i6926.5 testb $64, %dl je LBB11_174 ## BB#173: ## %store.i.i6921.6 cltq vpextrd $2, %xmm2, (%rdi,%rax,4) incl %eax LBB11_174: ## %loopend.i.i6926.6 testb %dl, %dl jns LBB11_148 ## BB#175: ## %store.i.i6921.7 cltq vpextrd $3, %xmm2, (%rdi,%rax,4) incl %eax LBB11_148: ## %cif_done1468 movq -80(%rsp), %rdx ## 8-byte Reload addl %edx, %eax movq %rax, -80(%rsp) ## 8-byte Spill cmpl $255, %ecx jne LBB11_122 LBB11_149: ## %cif_mask_all1575 vmovmskps %ymm14, %edx testl %edx, %edx je LBB11_153 ## BB#150: ## %cif_mask_all1575 cmpl $255, %edx jne LBB11_123 ## BB#151: ## %all_on.i.i6849 movq -96(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm4, 16(%r15,%rax,4) vmovdqu %xmm4, (%r15,%rax,4) movl $8, %eax jmp LBB11_152 LBB11_122: ## %cif_mask_mixed1576 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm14, %ymm1, %ymm0 vmovmskps %ymm0, %edx testl %edx, %edx je LBB11_181 LBB11_123: ## %cif_test_mixed1608 movq -96(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rdi xorl %eax, %eax testb $1, %dl je LBB11_125 ## BB#124: ## %store.i.i6829 vmovd %xmm4, (%rdi) movl $1, %eax LBB11_125: ## %loopend.i.i6834 testb $2, %dl je LBB11_127 ## BB#126: ## %store.i.i6829.1 vpextrd $1, %xmm4, (%rdi,%rax,4) incl %eax LBB11_127: ## %loopend.i.i6834.1 testb $4, %dl je LBB11_129 ## BB#128: ## %store.i.i6829.2 cltq vpextrd $2, %xmm4, (%rdi,%rax,4) incl %eax LBB11_129: ## %loopend.i.i6834.2 testb $8, %dl je LBB11_131 ## BB#130: ## %store.i.i6829.3 cltq vpextrd $3, %xmm4, (%rdi,%rax,4) incl %eax LBB11_131: ## %loopend.i.i6834.3 testb $16, %dl je LBB11_133 ## BB#132: ## %store.i.i6829.4 cltq vmovd %xmm2, (%rdi,%rax,4) incl %eax LBB11_133: ## %loopend.i.i6834.4 testb $32, %dl je LBB11_135 ## BB#134: ## %store.i.i6829.5 cltq vpextrd $1, %xmm2, (%rdi,%rax,4) incl %eax LBB11_135: ## %loopend.i.i6834.5 testb $64, %dl je LBB11_137 ## BB#136: ## %store.i.i6829.6 cltq vpextrd $2, %xmm2, (%rdi,%rax,4) incl %eax LBB11_137: ## %loopend.i.i6834.6 testb %dl, %dl jns LBB11_152 ## BB#138: ## %store.i.i6829.7 cltq vpextrd $3, %xmm2, (%rdi,%rax,4) incl %eax LBB11_152: ## %cif_done1577 movq -96(%rsp), %rdx ## 8-byte Reload addl %edx, %eax movq %rax, -96(%rsp) ## 8-byte Spill cmpl $255, %ecx jne LBB11_181 LBB11_153: ## %cif_mask_all1684 vmovmskps %ymm9, %edx testl %edx, %edx je LBB11_157 ## BB#154: ## %cif_mask_all1684 cmpl $255, %edx jne LBB11_182 ## BB#155: ## %all_on.i.i6683 movq -88(%rsp), %rax ## 8-byte Reload cltq vextractf128 $1, %ymm4, 16(%r15,%rax,4) vmovdqu %xmm4, (%r15,%rax,4) movl $8, %eax jmp LBB11_156 LBB11_181: ## %cif_mask_mixed1685 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm9, %ymm1, %ymm0 vmovmskps %ymm0, %edx testl %edx, %edx je LBB11_198 LBB11_182: ## %cif_test_mixed1717 movq -88(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rdi xorl %eax, %eax testb $1, %dl je LBB11_184 ## BB#183: ## %store.i.i6639 vmovd %xmm4, (%rdi) movl $1, %eax LBB11_184: ## %loopend.i.i6644 testb $2, %dl je LBB11_186 ## BB#185: ## %store.i.i6639.1 vpextrd $1, %xmm4, (%rdi,%rax,4) incl %eax LBB11_186: ## %loopend.i.i6644.1 testb $4, %dl je LBB11_188 ## BB#187: ## %store.i.i6639.2 cltq vpextrd $2, %xmm4, (%rdi,%rax,4) incl %eax LBB11_188: ## %loopend.i.i6644.2 testb $8, %dl je LBB11_190 ## BB#189: ## %store.i.i6639.3 cltq vpextrd $3, %xmm4, (%rdi,%rax,4) incl %eax LBB11_190: ## %loopend.i.i6644.3 testb $16, %dl je LBB11_192 ## BB#191: ## %store.i.i6639.4 cltq vmovd %xmm2, (%rdi,%rax,4) incl %eax LBB11_192: ## %loopend.i.i6644.4 testb $32, %dl je LBB11_194 ## BB#193: ## %store.i.i6639.5 cltq vpextrd $1, %xmm2, (%rdi,%rax,4) incl %eax LBB11_194: ## %loopend.i.i6644.5 testb $64, %dl je LBB11_196 ## BB#195: ## %store.i.i6639.6 cltq vpextrd $2, %xmm2, (%rdi,%rax,4) incl %eax LBB11_196: ## %loopend.i.i6644.6 testb %dl, %dl jns LBB11_156 ## BB#197: ## %store.i.i6639.7 cltq vpextrd $3, %xmm2, (%rdi,%rax,4) incl %eax LBB11_156: ## %cif_done1686 movq -88(%rsp), %rdx ## 8-byte Reload addl %edx, %eax movq %rax, -88(%rsp) ## 8-byte Spill cmpl $255, %ecx jne LBB11_198 LBB11_157: ## %cif_mask_all1793 vmovmskps %ymm5, %ecx testl %ecx, %ecx je LBB11_216 ## BB#158: ## %cif_mask_all1793 cmpl $255, %ecx jne LBB11_199 ## BB#159: ## %packed_store_active___un_3C_uni_3E_vyi.exit6535 movq -104(%rsp), %rcx ## 8-byte Reload movslq %ecx, %rax vextractf128 $1, %ymm4, 16(%r15,%rax,4) vmovdqu %xmm4, (%r15,%rax,4) addl $8, %ecx movq %rcx, -104(%rsp) ## 8-byte Spill jmp LBB11_216 LBB11_198: ## %cif_mask_mixed1794 vextractf128 $1, %ymm10, %xmm0 vextractf128 $1, %ymm8, %xmm1 vpcmpgtd %xmm0, %xmm1, %xmm0 vpcmpgtd %xmm10, %xmm8, %xmm1 vinsertf128 $1, %xmm0, %ymm1, %ymm0 vxorps %ymm1, %ymm1, %ymm1 vblendvps %ymm0, %ymm5, %ymm1, %ymm0 vmovmskps %ymm0, %ecx testl %ecx, %ecx je LBB11_216 LBB11_199: ## %cif_test_mixed1826 movq -104(%rsp), %rax ## 8-byte Reload cltq leaq (%r15,%rax,4), %rdx xorl %eax, %eax testb $1, %cl je LBB11_201 ## BB#200: ## %store.i.i6473 vmovd %xmm4, (%rdx) movl $1, %eax LBB11_201: ## %loopend.i.i6478 testb $2, %cl je LBB11_203 ## BB#202: ## %store.i.i6473.1 vpextrd $1, %xmm4, (%rdx,%rax,4) incl %eax LBB11_203: ## %loopend.i.i6478.1 testb $4, %cl je LBB11_205 ## BB#204: ## %store.i.i6473.2 cltq vpextrd $2, %xmm4, (%rdx,%rax,4) incl %eax LBB11_205: ## %loopend.i.i6478.2 testb $8, %cl je LBB11_207 ## BB#206: ## %store.i.i6473.3 cltq vpextrd $3, %xmm4, (%rdx,%rax,4) incl %eax LBB11_207: ## %loopend.i.i6478.3 testb $16, %cl je LBB11_209 ## BB#208: ## %store.i.i6473.4 cltq vmovd %xmm2, (%rdx,%rax,4) incl %eax LBB11_209: ## %loopend.i.i6478.4 testb $32, %cl je LBB11_211 ## BB#210: ## %store.i.i6473.5 cltq vpextrd $1, %xmm2, (%rdx,%rax,4) incl %eax LBB11_211: ## %loopend.i.i6478.5 testb $64, %cl je LBB11_213 ## BB#212: ## %store.i.i6473.6 cltq vpextrd $2, %xmm2, (%rdx,%rax,4) incl %eax LBB11_213: ## %loopend.i.i6478.6 testb %cl, %cl jns LBB11_215 ## BB#214: ## %store.i.i6473.7 cltq vpextrd $3, %xmm2, (%rdx,%rax,4) incl %eax LBB11_215: ## %packed_store_active___un_3C_uni_3E_vyi.exit6428 movq -104(%rsp), %rcx ## 8-byte Reload addl %ecx, %eax movq %rax, -104(%rsp) ## 8-byte Spill LBB11_216: ## %foreach_reset movq -80(%rsp), %rax ## 8-byte Reload movl %eax, (%r10) movl 312(%rsp), %eax movq -96(%rsp), %rcx ## 8-byte Reload subl %eax, %ecx movl %ecx, 4(%r10) movq -88(%rsp), %rax ## 8-byte Reload subl -72(%rsp), %eax ## 4-byte Folded Reload movl %eax, 8(%r10) movq -104(%rsp), %rax ## 8-byte Reload subl -68(%rsp), %eax ## 4-byte Folded Reload movl %eax, 12(%r10) addq $200, %rsp popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 popq %rbp vzeroupper retq .subsections_via_symbols