Skip to content
Snippets Groups Projects
Commit 76f5542c authored by Juzhe-Zhong's avatar Juzhe-Zhong Committed by Pan Li
Browse files

RISC-V: Make dynamic LMUL cost model more accurate for conversion codes

Notice current dynamic LMUL is not accurate for conversion codes.
Refine for it, there is current case is changed from choosing LMUL = 4 into LMUL = 8.

Tested no regression, committed.

Before this patch (LMUL = 4):                  After this patch (LMUL = 8):
        lw      a7,56(sp)                             lw	a7,56(sp)
        ld      t5,0(sp)                              ld	t5,0(sp)
        ld      t1,8(sp)                              ld	t1,8(sp)
        ld      t6,16(sp)                             ld	t6,16(sp)
        ld      t0,24(sp)                             ld	t0,24(sp)
        ld      t3,32(sp)                             ld	t3,32(sp)
        ld      t4,40(sp)                             ld	t4,40(sp)
        ble     a7,zero,.L5                           ble	a7,zero,.L5
.L3:                                               .L3:
        vsetvli a4,a7,e32,m2,ta,ma                    vsetvli	a4,a7,e32,m4,ta
        vle8.v  v1,0(a2)                              vle8.v	v3,0(a2)
        vle8.v  v4,0(a1)                              vle8.v	v16,0(t0)
        vsext.vf4       v8,v1                         vle8.v	v7,0(a1)
        vsext.vf4       v2,v4                         vle8.v	v12,0(t6)
        vsetvli zero,zero,e8,mf2,ta,ma                vle8.v	v2,0(a5)
        vadd.vv v4,v4,v1                              vle8.v	v1,0(t5)
        vsetvli zero,zero,e32,m2,ta,ma                vsext.vf4	v20,v3
        vle8.v  v5,0(t0)                              vsext.vf4	v8,v7
        vle8.v  v6,0(t6)                              vadd.vv	v8,v8,v20
        vadd.vv v2,v2,v8                              vadd.vv	v8,v8,v8
        vadd.vv v2,v2,v2                              vadd.vv	v8,v8,v20
        vadd.vv v2,v2,v8                              vsetvli	zero,zero,e8,m1
        vsetvli zero,zero,e8,mf2,ta,ma                vadd.vv	v15,v12,v16
        vadd.vv v6,v6,v5                              vsetvli	zero,zero,e32,m4
        vsetvli zero,zero,e32,m2,ta,ma                vsext.vf4	v12,v15
        vle8.v  v8,0(t5)                              vadd.vv	v8,v8,v12
        vle8.v  v9,0(a5)                              vsetvli	zero,zero,e8,m1
        vsext.vf4       v10,v4                        vadd.vv	v7,v7,v3
        vsext.vf4       v12,v6                        vsetvli	zero,zero,e32,m4
        vadd.vv v2,v2,v12                             vsext.vf4	v4,v7
        vadd.vv v2,v2,v10                             vadd.vv	v8,v8,v4
        vsetvli zero,zero,e16,m1,ta,ma                vsetvli	zero,zero,e16,m2
        vncvt.x.x.w     v4,v2                         vncvt.x.x.w	v4,v8
        vsetvli zero,zero,e32,m2,ta,ma                vsetvli	zero,zero,e8,m1
        vadd.vv v6,v2,v2                              vncvt.x.x.w	v4,v4
        vsetvli zero,zero,e8,mf2,ta,ma                vadd.vv	v15,v3,v4
        vncvt.x.x.w     v4,v4                         vadd.vv	v2,v2,v4
        vadd.vv v5,v5,v4                              vse8.v	v15,0(t4)
        vadd.vv v9,v9,v4                              vadd.vv	v3,v16,v4
        vadd.vv v1,v1,v4                              vse8.v	v2,0(a3)
        vadd.vv v4,v8,v4                              vadd.vv	v1,v1,v4
        vse8.v  v1,0(t4)                              vse8.v	v1,0(a6)
        vse8.v  v9,0(a3)                              vse8.v	v3,0(t1)
        vsetvli zero,zero,e32,m2,ta,ma                vsetvli	zero,zero,e32,m4
        vse8.v  v4,0(a6)                              vsext.vf4	v4,v3
        vsext.vf4       v8,v5                         vadd.vv	v4,v4,v8
        vse8.v  v5,0(t1)                              vsetvli	zero,zero,e64,m8
        vadd.vv v2,v8,v2                              vsext.vf2	v16,v4
        vsetvli zero,zero,e64,m4,ta,ma                vse64.v	v16,0(t3)
        vsext.vf2       v8,v2                         vsetvli	zero,zero,e32,m4
        vsetvli zero,zero,e32,m2,ta,ma                vadd.vv	v8,v8,v8
        slli    t2,a4,3                               vsext.vf4	v4,v15
        vse64.v v8,0(t3)                              slli	t2,a4,3
        vsext.vf4       v2,v1                         vadd.vv	v4,v8,v4
        sub     a7,a7,a4                              sub	a7,a7,a4
        vadd.vv v2,v6,v2                              vsetvli	zero,zero,e64,m8
        vsetvli zero,zero,e64,m4,ta,ma                vsext.vf2	v8,v4
        vsext.vf2       v4,v2                         vse64.v	v8,0(a0)
        vse64.v v4,0(a0)                              add	a1,a1,a4
        add     a2,a2,a4                              add	a2,a2,a4
        add     a1,a1,a4                              add	a5,a5,a4
        add     t6,t6,a4                              add	t5,t5,a4
        add     t0,t0,a4                              add	t6,t6,a4
        add     a5,a5,a4                              add	t0,t0,a4
        add     t5,t5,a4                              add	t4,t4,a4
        add     t4,t4,a4                              add	a3,a3,a4
        add     a3,a3,a4                              add	a6,a6,a4
        add     a6,a6,a4                              add	t1,t1,a4
        add     t1,t1,a4                              add	t3,t3,t2
        add     t3,t3,t2                              add	a0,a0,t2
        add     a0,a0,t2                              bne	a7,zero,.L3
        bne     a7,zero,.L3                         .L5:
.L5:                                                  ret
        ret

gcc/ChangeLog:

	* config/riscv/riscv-vector-costs.cc (is_gimple_assign_or_call): Change interface.
	(get_live_range): New function.

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Adapt test.
	* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c: Ditto.
parent fb57e402
No related branches found
No related tags found
No related merge requests found
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment