mirror of
				https://github.com/nyanmisaka/ffmpeg-rockchip.git
				synced 2025-10-31 04:26:37 +08:00 
			
		
		
		
	 4a963ee698
			
		
	
	4a963ee698
	
	
	
		
			
			1.the codes are compatible with O32 ABI 2.use uld and mtc1 to workaround cpu 3A2000 gslwlc1 bug (gslwlc1 instruction extension bug in O32 ABI) Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
		
			
				
	
	
		
			1258 lines
		
	
	
		
			67 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1258 lines
		
	
	
		
			67 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Loongson SIMD optimized qpeldsp
 | |
|  *
 | |
|  * Copyright (c) 2016 Loongson Technology Corporation Limited
 | |
|  * Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
 | |
|  *
 | |
|  * This file is part of FFmpeg.
 | |
|  *
 | |
|  * FFmpeg is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * FFmpeg is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with FFmpeg; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | |
|  */
 | |
| 
 | |
| #include "hpeldsp_mips.h"
 | |
| #include "libavcodec/bit_depth_template.c"
 | |
| #include "libavutil/mips/asmdefs.h"
 | |
| #include "constants.h"
 | |
| 
 | |
| void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     double ftmp[2];
 | |
|     mips_reg addr[2];
 | |
|     uint64_t low32;
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr1],   %[line_size],   %[line_size]            \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "uld        %[low32],   0x00(%[pixels])                         \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
 | |
| 
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "uld        %[low32],   0x00(%[pixels])                         \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [low32]"=&r"(low32),
 | |
|           [block]"+&r"(block),              [pixels]"+&r"(pixels),
 | |
|           [h]"+&r"(h)
 | |
|         : [line_size]"r"((mips_reg)line_size)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     double ftmp[2];
 | |
|     mips_reg addr[2];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr1],   %[line_size],   %[line_size]            \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [block]"+&r"(block),              [pixels]"+&r"(pixels),
 | |
|           [h]"+&r"(h)
 | |
|         : [line_size]"r"((mips_reg)line_size)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     double ftmp[4];
 | |
|     mips_reg addr[2];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr1],   %[line_size],   %[line_size]            \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x0f(%[pixels])                         \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x08(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x0f(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x08(%[addr0])                          \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         "sdc1       %[ftmp2],   0x08(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp3],   0x08(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x0f(%[pixels])                         \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x08(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x0f(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x08(%[addr0])                          \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         "sdc1       %[ftmp2],   0x08(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp3],   0x08(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr1]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr1]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [block]"+&r"(block),              [pixels]"+&r"(pixels),
 | |
|           [h]"+&r"(h)
 | |
|         : [line_size]"r"((mips_reg)line_size)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     double ftmp[4];
 | |
|     mips_reg addr[3];
 | |
|     uint64_t low32;
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[line_size],   %[line_size]            \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "uld        %[low32],   0x00(%[pixels])                         \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
 | |
|         "uld        %[low32],   0x00(%[block])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp2]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr1])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp3]                                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
 | |
| 
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "uld        %[low32],   0x00(%[pixels])                         \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
 | |
|         "uld        %[low32],   0x00(%[block])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp2]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr1])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp3]                                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),
 | |
|           [low32]"=&r"(low32),
 | |
|           [block]"+&r"(block),              [pixels]"+&r"(pixels),
 | |
|           [h]"+&r"(h)
 | |
|         : [line_size]"r"((mips_reg)line_size)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     double ftmp[4];
 | |
|     mips_reg addr[3];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[line_size],   %[line_size]            \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[block])                          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[block])                          \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[block])                          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[block])                          \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),
 | |
|           [block]"+&r"(block),              [pixels]"+&r"(pixels),
 | |
|           [h]"+&r"(h)
 | |
|         : [line_size]"r"((mips_reg)line_size)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     double ftmp[8];
 | |
|     mips_reg addr[3];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[line_size],   %[line_size]            \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x0f(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x08(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp5],   0x0f(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp5],   0x08(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[block])                          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[block])                          \n\t"
 | |
|         "gsldlc1    %[ftmp6],   0x0f(%[block])                          \n\t"
 | |
|         "gsldrc1    %[ftmp6],   0x08(%[block])                          \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "gsldlc1    %[ftmp7],   0x0f(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp7],   0x08(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         "sdc1       %[ftmp4],   0x08(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp5],   0x08(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[pixels],      %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x0f(%[pixels])                         \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[block],       %[line_size]            \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x08(%[pixels])                         \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp5],   0x0f(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp5],   0x08(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[block])                          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[block])                          \n\t"
 | |
|         "gsldlc1    %[ftmp6],   0x0f(%[block])                          \n\t"
 | |
|         "gsldrc1    %[ftmp6],   0x08(%[block])                          \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "gsldlc1    %[ftmp7],   0x0f(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp7],   0x08(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[block],  %[line_size])           \n\t"
 | |
|         "sdc1       %[ftmp4],   0x08(%[block])                          \n\t"
 | |
|         "gssdxc1    %[ftmp5],   0x08(%[block],  %[line_size])           \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[addr2]                \n\t"
 | |
|         PTR_ADDU   "%[block],   %[block],       %[addr2]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
 | |
|           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),
 | |
|           [block]"+&r"(block),              [pixels]"+&r"(pixels),
 | |
|           [h]"+&r"(h)
 | |
|         : [line_size]"r"((mips_reg)line_size)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| inline void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
 | |
|     const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
 | |
|     int h)
 | |
| {
 | |
|     double ftmp[4];
 | |
|     mips_reg addr[5];
 | |
|     uint64_t low32;
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
 | |
|         PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
 | |
|         PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[src1])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[src2])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp2]                                \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr1])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp3]                                \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[src1])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[src2])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp2]                                \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr1])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp3]                                \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
 | |
|           [addr4]"=&r"(addr[4]),
 | |
|           [low32]"=&r"(low32),
 | |
|           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
 | |
|           [src2]"+&r"(src2),                [h]"+&r"(h)
 | |
|         : [dst_stride]"r"((mips_reg)dst_stride),
 | |
|           [src_stride1]"r"((mips_reg)src_stride1),
 | |
|           [src_stride2]"r"((mips_reg)src_stride2)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| inline void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
 | |
|     const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
 | |
|     int h)
 | |
| {
 | |
|     double ftmp[4];
 | |
|     mips_reg addr[5];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
 | |
|         PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
 | |
|         PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
 | |
|           [addr4]"=&r"(addr[4]),
 | |
|           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
 | |
|           [src2]"+&r"(src2),                [h]"+&r"(h)
 | |
|         : [dst_stride]"r"((mips_reg)dst_stride),
 | |
|           [src_stride1]"r"((mips_reg)src_stride1),
 | |
|           [src_stride2]"r"((mips_reg)src_stride2)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| inline void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
 | |
|     const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
 | |
|     int h)
 | |
| {
 | |
|     double ftmp[8];
 | |
|     mips_reg addr[5];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
 | |
|         PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
 | |
|         PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x0f(%[src1])                           \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x08(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp5],   0x0f(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp5],   0x08(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp6],   0x0f(%[src2])                           \n\t"
 | |
|         "gsldrc1    %[ftmp6],   0x08(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "gsldlc1    %[ftmp7],   0x0f(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp7],   0x08(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         "sdc1       %[ftmp4],   0x08(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp5],   0x08(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x0f(%[src1])                           \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x08(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp5],   0x0f(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp5],   0x08(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp6],   0x0f(%[src2])                           \n\t"
 | |
|         "gsldrc1    %[ftmp6],   0x08(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "gsldlc1    %[ftmp7],   0x0f(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp7],   0x08(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "pavgb      %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         "sdc1       %[ftmp4],   0x08(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp5],   0x08(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
 | |
|           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
 | |
|           [addr4]"=&r"(addr[4]),
 | |
|           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
 | |
|           [src2]"+&r"(src2),                [h]"+&r"(h)
 | |
|         : [dst_stride]"r"((mips_reg)dst_stride),
 | |
|           [src_stride1]"r"((mips_reg)src_stride1),
 | |
|           [src_stride2]"r"((mips_reg)src_stride2)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| inline void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
 | |
|     const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
 | |
|     int h)
 | |
| {
 | |
|     double ftmp[6];
 | |
|     mips_reg addr[6];
 | |
|     uint64_t low32;
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
 | |
|         PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
 | |
|         PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[src1])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[src2])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp2]                                \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr1])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp3]                                \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
 | |
|         "uld        %[low32],   0x00(%[dst])                            \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp4]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr5])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp5]                                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[src1])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp0]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr0])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp1]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[src2])                           \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp2]                                \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr1])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp3]                                \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
 | |
|         "uld        %[low32],   0x00(%[dst])                            \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp4]                                \n\t"
 | |
|         "uld        %[low32],   0x00(%[addr5])                          \n\t"
 | |
|         "mtc1       %[low32],   %[ftmp5]                                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
 | |
|         "swc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gsswxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
 | |
|           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
 | |
|           [low32]"=&r"(low32),
 | |
|           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
 | |
|           [src2]"+&r"(src2),                [h]"+&r"(h)
 | |
|         : [dst_stride]"r"((mips_reg)dst_stride),
 | |
|           [src_stride1]"r"((mips_reg)src_stride1),
 | |
|           [src_stride2]"r"((mips_reg)src_stride2)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| inline void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
 | |
|     const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
 | |
|     int h)
 | |
| {
 | |
|     double ftmp[6];
 | |
|     mips_reg addr[6];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
 | |
|         PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
 | |
|         PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x07(%[dst])                            \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x00(%[dst])                            \n\t"
 | |
|         "gsldlc1    %[ftmp5],   0x07(%[addr5])                          \n\t"
 | |
|         "gsldrc1    %[ftmp5],   0x00(%[addr5])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         PTR_ADDU   "%[addr5],   %[dst],         %[dst_stride]           \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x07(%[dst])                            \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x00(%[dst])                            \n\t"
 | |
|         "gsldlc1    %[ftmp5],   0x07(%[addr5])                          \n\t"
 | |
|         "gsldrc1    %[ftmp5],   0x00(%[addr5])                          \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
 | |
|           [addr4]"=&r"(addr[4]),            [addr5]"=&r"(addr[5]),
 | |
|           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
 | |
|           [src2]"+&r"(src2),                [h]"+&r"(h)
 | |
|         : [dst_stride]"r"((mips_reg)dst_stride),
 | |
|           [src_stride1]"r"((mips_reg)src_stride1),
 | |
|           [src_stride2]"r"((mips_reg)src_stride2)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| inline void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
 | |
|     const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
 | |
|     int h)
 | |
| {
 | |
|     ff_avg_pixels8_l2_8_mmi(dst, src1, src2, dst_stride, src_stride1,
 | |
|             src_stride2, h);
 | |
|     ff_avg_pixels8_l2_8_mmi(dst + 8, src1 + 8, src2 + 8, dst_stride,
 | |
|             src_stride1, src_stride2, h);
 | |
| }
 | |
| 
 | |
| void ff_put_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
 | |
|             line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
 | |
|             line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_pixels16_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
 | |
|             line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels4_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
 | |
|             line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size, line_size,
 | |
|             line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_avg_pixels8_x2_8_mmi(block, pixels, line_size, h);
 | |
|     ff_avg_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
 | |
| }
 | |
| 
 | |
| inline void ff_put_no_rnd_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1,
 | |
|     const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
 | |
|     int h)
 | |
| {
 | |
|     double ftmp[5];
 | |
|     mips_reg addr[5];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         "pcmpeqb    %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
 | |
|         PTR_ADDU   "%[addr2],   %[src_stride1], %[src_stride1]          \n\t"
 | |
|         PTR_ADDU   "%[addr3],   %[src_stride2], %[src_stride2]          \n\t"
 | |
|         PTR_ADDU   "%[addr4],   %[dst_stride],  %[dst_stride]           \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "xor        %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "xor        %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[src1])                           \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[src1],        %[src_stride1]          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[src1])                           \n\t"
 | |
|         "gsldlc1    %[ftmp1],   0x07(%[addr0])                          \n\t"
 | |
|         "gsldrc1    %[ftmp1],   0x00(%[addr0])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[src2])                           \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[src2],        %[src_stride2]          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[src2])                           \n\t"
 | |
|         "gsldlc1    %[ftmp3],   0x07(%[addr1])                          \n\t"
 | |
|         PTR_ADDU   "%[src1],    %[src1],        %[addr2]                \n\t"
 | |
|         "gsldrc1    %[ftmp3],   0x00(%[addr1])                          \n\t"
 | |
|         "xor        %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
 | |
|         "pavgb      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "pavgb      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "xor        %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "xor        %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
 | |
|         "sdc1       %[ftmp0],   0x00(%[dst])                            \n\t"
 | |
|         "gssdxc1    %[ftmp1],   0x00(%[dst],    %[dst_stride])          \n\t"
 | |
|         PTR_ADDU   "%[src2],    %[src2],        %[addr3]                \n\t"
 | |
|         PTR_ADDU   "%[dst],     %[dst],         %[addr4]                \n\t"
 | |
| 
 | |
|         PTR_ADDI   "%[h],       %[h],           -0x04                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [ftmp4]"=&f"(ftmp[4]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [addr2]"=&r"(addr[2]),            [addr3]"=&r"(addr[3]),
 | |
|           [addr4]"=&r"(addr[4]),
 | |
|           [dst]"+&r"(dst),                  [src1]"+&r"(src1),
 | |
|           [src2]"+&r"(src2),                [h]"+&r"(h)
 | |
|         : [dst_stride]"r"((mips_reg)dst_stride),
 | |
|           [src_stride1]"r"((mips_reg)src_stride1),
 | |
|           [src_stride2]"r"((mips_reg)src_stride2)
 | |
|         : "memory"
 | |
|     );
 | |
| }
 | |
| 
 | |
| void ff_put_no_rnd_pixels8_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + 1, line_size,
 | |
|             line_size, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_no_rnd_pixels16_x2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_no_rnd_pixels8_x2_8_mmi(block, pixels, line_size, h);
 | |
|     ff_put_no_rnd_pixels8_x2_8_mmi(block + 8, pixels + 8, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
 | |
|             line_size, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
 | |
|             line_size, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_pixels16_l2_8_mmi(block, pixels, pixels + line_size, line_size,
 | |
|             line_size, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels4_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_avg_pixels4_l2_8_mmi(block, pixels, pixels + line_size, line_size,
 | |
|             line_size, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_avg_pixels8_l2_8_mmi(block, pixels, pixels + line_size, line_size,
 | |
|             line_size, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_avg_pixels8_y2_8_mmi(block, pixels, line_size, h);
 | |
|     ff_avg_pixels8_y2_8_mmi(block + 8, pixels + 8, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_no_rnd_pixels8_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_no_rnd_pixels8_l2_8_mmi(block, pixels, pixels + line_size,
 | |
|             line_size, line_size, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_no_rnd_pixels16_y2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_no_rnd_pixels8_y2_8_mmi(block, pixels, line_size, h);
 | |
|     ff_put_no_rnd_pixels8_y2_8_mmi(block + 8 , pixels + 8, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     /* FIXME HIGH BIT DEPTH */
 | |
|     int i;
 | |
|     const uint32_t a = AV_RN32(pixels);
 | |
|     const uint32_t b = AV_RN32(pixels + 1);
 | |
|     uint32_t l0 = (a & 0x03030303UL) +
 | |
|                   (b & 0x03030303UL) +
 | |
|                        0x02020202UL;
 | |
|     uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                   ((b & 0xFCFCFCFCUL) >> 2);
 | |
|     uint32_t l1, h1;
 | |
| 
 | |
|     pixels += line_size;
 | |
|     for (i = 0; i < h; i += 2) {
 | |
|         uint32_t a = AV_RN32(pixels);
 | |
|         uint32_t b = AV_RN32(pixels + 1);
 | |
|         l1 = (a & 0x03030303UL) +
 | |
|              (b & 0x03030303UL);
 | |
|         h1 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|              ((b & 0xFCFCFCFCUL) >> 2);
 | |
|         *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
 | |
|         pixels += line_size;
 | |
|         block  += line_size;
 | |
|         a  = AV_RN32(pixels);
 | |
|         b  = AV_RN32(pixels + 1);
 | |
|         l0 = (a & 0x03030303UL) +
 | |
|              (b & 0x03030303UL) +
 | |
|                   0x02020202UL;
 | |
|         h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|              ((b & 0xFCFCFCFCUL) >> 2);
 | |
|         *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
 | |
|         pixels += line_size;
 | |
|         block  += line_size;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void ff_put_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
| #if 1
 | |
|     double ftmp[10];
 | |
|     mips_reg addr[2];
 | |
| 
 | |
|     __asm__ volatile (
 | |
|         "xor        %[ftmp7],   %[ftmp7],       %[ftmp7]                \n\t"
 | |
|         "dli        %[addr0],   0x0f                                    \n\t"
 | |
|         "pcmpeqw    %[ftmp6],   %[ftmp6],       %[ftmp6]                \n\t"
 | |
|         "dmtc1      %[addr0],   %[ftmp8]                                \n\t"
 | |
|         "dli        %[addr0],   0x01                                    \n\t"
 | |
|         "psrlh      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
 | |
|         "dmtc1      %[addr0],   %[ftmp8]                                \n\t"
 | |
|         "psllh      %[ftmp6],   %[ftmp6],       %[ftmp8]                \n\t"
 | |
| 
 | |
|         "dli        %[addr0],   0x02                                    \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[pixels])                         \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[pixels])                         \n\t"
 | |
|         "dmtc1      %[addr0],   %[ftmp9]                                \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x08(%[pixels])                         \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x01(%[pixels])                         \n\t"
 | |
|         "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
 | |
|         "mov.d      %[ftmp5],   %[ftmp4]                                \n\t"
 | |
|         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
 | |
|         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
 | |
|         "punpckhbh  %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
 | |
|         "punpckhbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
 | |
|         "paddush    %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
 | |
|         "paddush    %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
 | |
|         "xor        %[addr0],   %[addr0],       %[addr0]                \n\t"
 | |
|         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
 | |
|         ".p2align   3                                                   \n\t"
 | |
|         "1:                                                             \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[pixels],      %[addr0]                \n\t"
 | |
|         "gsldlc1    %[ftmp0],   0x07(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp0],   0x00(%[addr1])                          \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x08(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x01(%[addr1])                          \n\t"
 | |
|         "mov.d      %[ftmp1],   %[ftmp0]                                \n\t"
 | |
|         "mov.d      %[ftmp3],   %[ftmp2]                                \n\t"
 | |
|         "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp7]                \n\t"
 | |
|         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
 | |
|         "punpckhbh  %[ftmp1],   %[ftmp1],       %[ftmp7]                \n\t"
 | |
|         "punpckhbh  %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
 | |
|         "paddush    %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
 | |
|         "paddush    %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
 | |
|         "paddush    %[ftmp4],   %[ftmp4],       %[ftmp6]                \n\t"
 | |
|         "paddush    %[ftmp5],   %[ftmp5],       %[ftmp6]                \n\t"
 | |
|         "paddush    %[ftmp4],   %[ftmp4],       %[ftmp0]                \n\t"
 | |
|         "paddush    %[ftmp5],   %[ftmp5],       %[ftmp1]                \n\t"
 | |
|         "psrlh      %[ftmp4],   %[ftmp4],       %[ftmp9]                \n\t"
 | |
|         "psrlh      %[ftmp5],   %[ftmp5],       %[ftmp9]                \n\t"
 | |
|         "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
 | |
|         "gssdxc1    %[ftmp4],   0x00(%[block],  %[addr0])               \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[addr0],       %[line_size]            \n\t"
 | |
|         PTR_ADDU   "%[addr1],   %[pixels],      %[addr0]                \n\t"
 | |
|         "gsldlc1    %[ftmp2],   0x07(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp2],   0x00(%[addr1])                          \n\t"
 | |
|         "gsldlc1    %[ftmp4],   0x08(%[addr1])                          \n\t"
 | |
|         "gsldrc1    %[ftmp4],   0x01(%[addr1])                          \n\t"
 | |
|         "mov.d      %[ftmp3],   %[ftmp2]                                \n\t"
 | |
|         "mov.d      %[ftmp5],   %[ftmp4]                                \n\t"
 | |
|         "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp7]                \n\t"
 | |
|         "punpcklbh  %[ftmp4],   %[ftmp4],       %[ftmp7]                \n\t"
 | |
|         "punpckhbh  %[ftmp3],   %[ftmp3],       %[ftmp7]                \n\t"
 | |
|         "punpckhbh  %[ftmp5],   %[ftmp5],       %[ftmp7]                \n\t"
 | |
|         "paddush    %[ftmp4],   %[ftmp4],       %[ftmp2]                \n\t"
 | |
|         "paddush    %[ftmp5],   %[ftmp5],       %[ftmp3]                \n\t"
 | |
|         "paddush    %[ftmp0],   %[ftmp0],       %[ftmp6]                \n\t"
 | |
|         "paddush    %[ftmp1],   %[ftmp1],       %[ftmp6]                \n\t"
 | |
|         "paddush    %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
 | |
|         "paddush    %[ftmp1],   %[ftmp1],       %[ftmp5]                \n\t"
 | |
|         "psrlh      %[ftmp0],   %[ftmp0],       %[ftmp9]                \n\t"
 | |
|         "psrlh      %[ftmp1],   %[ftmp1],       %[ftmp9]                \n\t"
 | |
|         "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
 | |
|         "gssdxc1    %[ftmp0],   0x00(%[block],  %[addr0])               \n\t"
 | |
|         PTR_ADDU   "%[addr0],   %[addr0],       %[line_size]            \n\t"
 | |
|         PTR_ADDU   "%[h],       %[h],           -0x02                   \n\t"
 | |
|         "bnez       %[h],       1b                                      \n\t"
 | |
|         : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
 | |
|           [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
 | |
|           [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
 | |
|           [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
 | |
|           [ftmp8]"=&f"(ftmp[8]),            [ftmp9]"=&f"(ftmp[9]),
 | |
|           [addr0]"=&r"(addr[0]),            [addr1]"=&r"(addr[1]),
 | |
|           [h]"+&r"(h),                      [pixels]"+&r"(pixels)
 | |
|         : [block]"r"(block),                [line_size]"r"((mips_reg)line_size)
 | |
|         : "memory"
 | |
|     );
 | |
| #else
 | |
|     /* FIXME HIGH BIT DEPTH */
 | |
|     int j;
 | |
| 
 | |
|     for (j = 0; j < 2; j++) {
 | |
|         int i;
 | |
|         const uint32_t a = AV_RN32(pixels);
 | |
|         const uint32_t b = AV_RN32(pixels + 1);
 | |
|         uint32_t l0 = (a & 0x03030303UL) +
 | |
|                       (b & 0x03030303UL) +
 | |
|                            0x02020202UL;
 | |
|         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                       ((b & 0xFCFCFCFCUL) >> 2);
 | |
|         uint32_t l1, h1;
 | |
| 
 | |
|         pixels += line_size;
 | |
|         for (i = 0; i < h; i += 2) {
 | |
|             uint32_t a = AV_RN32(pixels);
 | |
|             uint32_t b = AV_RN32(pixels + 1);
 | |
|             l1 = (a & 0x03030303UL) +
 | |
|                  (b & 0x03030303UL);
 | |
|             h1 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                  ((b & 0xFCFCFCFCUL) >> 2);
 | |
|             *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
 | |
|             pixels += line_size;
 | |
|             block  += line_size;
 | |
|             a  = AV_RN32(pixels);
 | |
|             b  = AV_RN32(pixels + 1);
 | |
|             l0 = (a & 0x03030303UL) +
 | |
|                  (b & 0x03030303UL) +
 | |
|                       0x02020202UL;
 | |
|             h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                  ((b & 0xFCFCFCFCUL) >> 2);
 | |
|             *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
 | |
|             pixels += line_size;
 | |
|             block  += line_size;
 | |
|         }
 | |
|         pixels += 4 - line_size * (h + 1);
 | |
|         block  += 4 - line_size * h;
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| void ff_put_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_pixels8_xy2_8_mmi(block, pixels, line_size, h);
 | |
|     ff_put_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels4_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     /* FIXME HIGH BIT DEPTH */
 | |
|     int i;
 | |
|     const uint32_t a = AV_RN32(pixels);
 | |
|     const uint32_t b = AV_RN32(pixels + 1);
 | |
|     uint32_t l0 = (a & 0x03030303UL) +
 | |
|                   (b & 0x03030303UL) +
 | |
|                        0x02020202UL;
 | |
|     uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                   ((b & 0xFCFCFCFCUL) >> 2);
 | |
|     uint32_t l1, h1;
 | |
| 
 | |
|     pixels += line_size;
 | |
|     for (i = 0; i < h; i += 2) {
 | |
|         uint32_t a = AV_RN32(pixels);
 | |
|         uint32_t b = AV_RN32(pixels + 1);
 | |
|         l1 = (a & 0x03030303UL) +
 | |
|              (b & 0x03030303UL);
 | |
|         h1 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|              ((b & 0xFCFCFCFCUL) >> 2);
 | |
|         *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
 | |
|         pixels += line_size;
 | |
|         block  += line_size;
 | |
|         a  = AV_RN32(pixels);
 | |
|         b  = AV_RN32(pixels + 1);
 | |
|         l0 = (a & 0x03030303UL) +
 | |
|              (b & 0x03030303UL) +
 | |
|                   0x02020202UL;
 | |
|         h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|              ((b & 0xFCFCFCFCUL) >> 2);
 | |
|         *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
 | |
|         pixels += line_size;
 | |
|         block  += line_size;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     /* FIXME HIGH BIT DEPTH */
 | |
|     int j;
 | |
| 
 | |
|     for (j = 0; j < 2; j++) {
 | |
|         int i;
 | |
|         const uint32_t a = AV_RN32(pixels);
 | |
|         const uint32_t b = AV_RN32(pixels + 1);
 | |
|         uint32_t l0 = (a & 0x03030303UL) +
 | |
|                       (b & 0x03030303UL) +
 | |
|                            0x02020202UL;
 | |
|         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                       ((b & 0xFCFCFCFCUL) >> 2);
 | |
|         uint32_t l1, h1;
 | |
| 
 | |
|         pixels += line_size;
 | |
|         for (i = 0; i < h; i += 2) {
 | |
|             uint32_t a = AV_RN32(pixels);
 | |
|             uint32_t b = AV_RN32(pixels + 1);
 | |
|             l1 = (a & 0x03030303UL) +
 | |
|                  (b & 0x03030303UL);
 | |
|             h1 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                  ((b & 0xFCFCFCFCUL) >> 2);
 | |
|             *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
 | |
|             pixels += line_size;
 | |
|             block  += line_size;
 | |
|             a  = AV_RN32(pixels);
 | |
|             b  = AV_RN32(pixels + 1);
 | |
|             l0 = (a & 0x03030303UL) +
 | |
|                  (b & 0x03030303UL) +
 | |
|                       0x02020202UL;
 | |
|             h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                  ((b & 0xFCFCFCFCUL) >> 2);
 | |
|             *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
 | |
|             pixels += line_size;
 | |
|             block  += line_size;
 | |
|         }
 | |
|         pixels += 4 - line_size * (h + 1);
 | |
|         block  += 4 - line_size * h;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void ff_avg_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_avg_pixels8_xy2_8_mmi(block, pixels, line_size, h);
 | |
|     ff_avg_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
 | |
| }
 | |
| 
 | |
| void ff_put_no_rnd_pixels8_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     /* FIXME HIGH BIT DEPTH */
 | |
|     int j;
 | |
| 
 | |
|     for (j = 0; j < 2; j++) {
 | |
|         int i;
 | |
|         const uint32_t a = AV_RN32(pixels);
 | |
|         const uint32_t b = AV_RN32(pixels + 1);
 | |
|         uint32_t l0 = (a & 0x03030303UL) +
 | |
|                       (b & 0x03030303UL) +
 | |
|                            0x01010101UL;
 | |
|         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                       ((b & 0xFCFCFCFCUL) >> 2);
 | |
|         uint32_t l1, h1;
 | |
| 
 | |
|         pixels += line_size;
 | |
|         for (i = 0; i < h; i += 2) {
 | |
|             uint32_t a = AV_RN32(pixels);
 | |
|             uint32_t b = AV_RN32(pixels + 1);
 | |
|             l1 = (a & 0x03030303UL) +
 | |
|                  (b & 0x03030303UL);
 | |
|             h1 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                  ((b & 0xFCFCFCFCUL) >> 2);
 | |
|             *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
 | |
|             pixels += line_size;
 | |
|             block  += line_size;
 | |
|             a  = AV_RN32(pixels);
 | |
|             b  = AV_RN32(pixels + 1);
 | |
|             l0 = (a & 0x03030303UL) +
 | |
|                  (b & 0x03030303UL) +
 | |
|                       0x01010101UL;
 | |
|             h0 = ((a & 0xFCFCFCFCUL) >> 2) +
 | |
|                  ((b & 0xFCFCFCFCUL) >> 2);
 | |
|             *((uint32_t *) block) = h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL);
 | |
|             pixels += line_size;
 | |
|             block  += line_size;
 | |
|         }
 | |
|         pixels += 4 - line_size * (h + 1);
 | |
|         block  += 4 - line_size * h;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void ff_put_no_rnd_pixels16_xy2_8_mmi(uint8_t *block, const uint8_t *pixels,
 | |
|     ptrdiff_t line_size, int h)
 | |
| {
 | |
|     ff_put_no_rnd_pixels8_xy2_8_mmi(block, pixels, line_size, h);
 | |
|     ff_put_no_rnd_pixels8_xy2_8_mmi(block + 8, pixels + 8, line_size, h);
 | |
| }
 |