|
36 | 36 |
|
37 | 37 | #include "image.h"
|
38 | 38 | #include "uvg_math.h"
|
39 |
| -#include "mip_data.h" |
40 | 39 | #include "rdo.h"
|
41 | 40 | #include "search.h"
|
42 | 41 | #include "search_intra.h"
|
@@ -86,17 +85,6 @@ static const uint8_t num_ref_pixels_left[16][16] = {
|
86 | 85 | { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
|
87 | 86 | };
|
88 | 87 |
|
89 |
| - |
90 |
| -static void mip_predict( |
91 |
| - const encoder_state_t* const state, |
92 |
| - const uvg_intra_references* const refs, |
93 |
| - const uint16_t pred_block_width, |
94 |
| - const uint16_t pred_block_height, |
95 |
| - uvg_pixel* dst, |
96 |
| - const int mip_mode, |
97 |
| - const bool mip_transp); |
98 |
| - |
99 |
| - |
100 | 88 | int8_t uvg_intra_get_dir_luma_predictor(
|
101 | 89 | const uint32_t x,
|
102 | 90 | const uint32_t y,
|
@@ -646,298 +634,6 @@ uint8_t uvg_get_mip_flag_context(
|
646 | 634 | }
|
647 | 635 |
|
648 | 636 |
|
649 |
| -void uvg_mip_boundary_downsampling_1D(int* reduced_dst, const int* const ref_src, int src_len, int dst_len) |
650 |
| -{ |
651 |
| - if (dst_len < src_len) |
652 |
| - { |
653 |
| - // Create reduced boundary by downsampling |
654 |
| - uint16_t down_smp_factor = src_len / dst_len; |
655 |
| - const int log2_factor = uvg_math_floor_log2(down_smp_factor); |
656 |
| - const int rounding_offset = (1 << (log2_factor - 1)); |
657 |
| - |
658 |
| - uint16_t src_idx = 0; |
659 |
| - for (uint16_t dst_idx = 0; dst_idx < dst_len; dst_idx++) |
660 |
| - { |
661 |
| - int sum = 0; |
662 |
| - for (int k = 0; k < down_smp_factor; k++) |
663 |
| - { |
664 |
| - sum += ref_src[src_idx++]; |
665 |
| - } |
666 |
| - reduced_dst[dst_idx] = (sum + rounding_offset) >> log2_factor; |
667 |
| - } |
668 |
| - } |
669 |
| - else |
670 |
| - { |
671 |
| - // Copy boundary if no downsampling is needed |
672 |
| - for (uint16_t i = 0; i < dst_len; ++i) |
673 |
| - { |
674 |
| - reduced_dst[i] = ref_src[i]; |
675 |
| - } |
676 |
| - } |
677 |
| -} |
678 |
| - |
679 |
| - |
680 |
| -void uvg_mip_reduced_pred(int* const output, |
681 |
| - const int* const input, |
682 |
| - const uint8_t* matrix, |
683 |
| - const bool transpose, |
684 |
| - const int red_bdry_size, |
685 |
| - const int red_pred_size, |
686 |
| - const int size_id, |
687 |
| - const int in_offset, |
688 |
| - const int in_offset_tr) |
689 |
| -{ |
690 |
| - const int input_size = 2 * red_bdry_size; |
691 |
| - |
692 |
| - // Use local buffer for transposed result |
693 |
| - int out_buf_transposed[LCU_WIDTH * LCU_WIDTH]; |
694 |
| - int* const out_ptr = transpose ? out_buf_transposed : output; |
695 |
| - |
696 |
| - int sum = 0; |
697 |
| - for (int i = 0; i < input_size; i++) { |
698 |
| - sum += input[i]; |
699 |
| - } |
700 |
| - const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum; |
701 |
| - assert((input_size == 4 * (input_size >> 2)) && "MIP input size must be divisible by four"); |
702 |
| - |
703 |
| - const uint8_t* weight = matrix; |
704 |
| - const int input_offset = transpose ? in_offset_tr : in_offset; |
705 |
| - |
706 |
| - const bool red_size = (size_id == 2); |
707 |
| - int pos_res = 0; |
708 |
| - for (int y = 0; y < red_pred_size; y++) { |
709 |
| - for (int x = 0; x < red_pred_size; x++) { |
710 |
| - if (red_size) { |
711 |
| - weight -= 1; |
712 |
| - } |
713 |
| - int tmp0 = red_size ? 0 : (input[0] * weight[0]); |
714 |
| - int tmp1 = input[1] * weight[1]; |
715 |
| - int tmp2 = input[2] * weight[2]; |
716 |
| - int tmp3 = input[3] * weight[3]; |
717 |
| - for (int i = 4; i < input_size; i += 4) { |
718 |
| - tmp0 += input[i] * weight[i]; |
719 |
| - tmp1 += input[i + 1] * weight[i + 1]; |
720 |
| - tmp2 += input[i + 2] * weight[i + 2]; |
721 |
| - tmp3 += input[i + 3] * weight[i + 3]; |
722 |
| - } |
723 |
| - out_ptr[pos_res] = CLIP_TO_PIXEL(((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) + input_offset); |
724 |
| - pos_res++; |
725 |
| - weight += input_size; |
726 |
| - } |
727 |
| - } |
728 |
| - |
729 |
| - if (transpose) { |
730 |
| - for (int y = 0; y < red_pred_size; y++) { |
731 |
| - for (int x = 0; x < red_pred_size; x++) { |
732 |
| - output[y * red_pred_size + x] = out_ptr[x * red_pred_size + y]; |
733 |
| - } |
734 |
| - } |
735 |
| - } |
736 |
| -} |
737 |
| - |
738 |
| - |
739 |
| -void uvg_mip_pred_upsampling_1D(int* const dst, const int* const src, const int* const boundary, |
740 |
| - const uint16_t src_size_ups_dim, const uint16_t src_size_orth_dim, |
741 |
| - const uint16_t src_step, const uint16_t src_stride, |
742 |
| - const uint16_t dst_step, const uint16_t dst_stride, |
743 |
| - const uint16_t boundary_step, |
744 |
| - const uint16_t ups_factor) |
745 |
| -{ |
746 |
| - const int log2_factor = uvg_math_floor_log2(ups_factor); |
747 |
| - assert(ups_factor >= 2 && "Upsampling factor must be at least 2."); |
748 |
| - const int rounding_offset = 1 << (log2_factor - 1); |
749 |
| - |
750 |
| - uint16_t idx_orth_dim = 0; |
751 |
| - const int* src_line = src; |
752 |
| - int* dst_line = dst; |
753 |
| - const int* boundary_line = boundary + boundary_step - 1; |
754 |
| - while (idx_orth_dim < src_size_orth_dim) |
755 |
| - { |
756 |
| - uint16_t idx_upsample_dim = 0; |
757 |
| - const int* before = boundary_line; |
758 |
| - const int* behind = src_line; |
759 |
| - int* cur_dst = dst_line; |
760 |
| - while (idx_upsample_dim < src_size_ups_dim) |
761 |
| - { |
762 |
| - uint16_t pos = 1; |
763 |
| - int scaled_before = (*before) << log2_factor; |
764 |
| - int scaled_behind = 0; |
765 |
| - while (pos <= ups_factor) |
766 |
| - { |
767 |
| - scaled_before -= *before; |
768 |
| - scaled_behind += *behind; |
769 |
| - *cur_dst = (scaled_before + scaled_behind + rounding_offset) >> log2_factor; |
770 |
| - |
771 |
| - pos++; |
772 |
| - cur_dst += dst_step; |
773 |
| - } |
774 |
| - |
775 |
| - idx_upsample_dim++; |
776 |
| - before = behind; |
777 |
| - behind += src_step; |
778 |
| - } |
779 |
| - |
780 |
| - idx_orth_dim++; |
781 |
| - src_line += src_stride; |
782 |
| - dst_line += dst_stride; |
783 |
| - boundary_line += boundary_step; |
784 |
| - } |
785 |
| -} |
786 |
| - |
787 |
| - |
788 |
| - |
789 |
| -/** \brief Matrix weighted intra prediction. |
790 |
| -*/ |
791 |
| -static void mip_predict( |
792 |
| - const encoder_state_t* const state, |
793 |
| - const uvg_intra_references* const refs, |
794 |
| - const uint16_t pred_block_width, |
795 |
| - const uint16_t pred_block_height, |
796 |
| - uvg_pixel* dst, |
797 |
| - const int mip_mode, |
798 |
| - const bool mip_transp) |
799 |
| -{ |
800 |
| - // MIP prediction uses int values instead of uvg_pixel as some temp values may be negative |
801 |
| - |
802 |
| - uvg_pixel* out = dst; |
803 |
| - int result[32*32] = {0}; |
804 |
| - const int mode_idx = mip_mode; |
805 |
| - |
806 |
| - // *** INPUT PREP *** |
807 |
| - |
808 |
| - // Initialize prediction parameters START |
809 |
| - uint16_t width = pred_block_width; |
810 |
| - uint16_t height = pred_block_height; |
811 |
| - |
812 |
| - int size_id; // Prediction block type |
813 |
| - if (width == 4 && height == 4) { |
814 |
| - size_id = 0; |
815 |
| - } |
816 |
| - else if (width == 4 || height == 4 || (width == 8 && height == 8)) { |
817 |
| - size_id = 1; |
818 |
| - } |
819 |
| - else { |
820 |
| - size_id = 2; |
821 |
| - } |
822 |
| - |
823 |
| - // Reduced boundary and prediction sizes |
824 |
| - int red_bdry_size = (size_id == 0) ? 2 : 4; |
825 |
| - int red_pred_size = (size_id < 2) ? 4 : 8; |
826 |
| - |
827 |
| - // Upsampling factors |
828 |
| - uint16_t ups_hor_factor = width / red_pred_size; |
829 |
| - uint16_t ups_ver_factor = height / red_pred_size; |
830 |
| - |
831 |
| - // Upsampling factors must be powers of two |
832 |
| - assert(!((ups_hor_factor < 1) || ((ups_hor_factor & (ups_hor_factor - 1))) != 0) && "Horizontal upsampling factor must be power of two."); |
833 |
| - assert(!((ups_ver_factor < 1) || ((ups_ver_factor & (ups_ver_factor - 1))) != 0) && "Vertical upsampling factor must be power of two."); |
834 |
| - |
835 |
| - // Initialize prediction parameters END |
836 |
| - |
837 |
| - int ref_samples_top[INTRA_REF_LENGTH]; |
838 |
| - int ref_samples_left[INTRA_REF_LENGTH]; |
839 |
| - |
840 |
| - for (int i = 1; i < INTRA_REF_LENGTH; i++) { |
841 |
| - ref_samples_top[i-1] = (int)refs->ref.top[i]; // NOTE: in VTM code these are indexed as x + 1 & y + 1 during init |
842 |
| - ref_samples_left[i-1] = (int)refs->ref.left[i]; |
843 |
| - } |
844 |
| - |
845 |
| - // Compute reduced boundary with Haar-downsampling |
846 |
| - const int input_size = 2 * red_bdry_size; |
847 |
| - |
848 |
| - int red_bdry[MIP_MAX_INPUT_SIZE]; |
849 |
| - int red_bdry_trans[MIP_MAX_INPUT_SIZE]; |
850 |
| - |
851 |
| - int* const top_reduced = &red_bdry[0]; |
852 |
| - int* const left_reduced = &red_bdry[red_bdry_size]; |
853 |
| - |
854 |
| - uvg_mip_boundary_downsampling_1D(top_reduced, ref_samples_top, width, red_bdry_size); |
855 |
| - uvg_mip_boundary_downsampling_1D(left_reduced, ref_samples_left, height, red_bdry_size); |
856 |
| - |
857 |
| - // Transposed reduced boundaries |
858 |
| - int* const left_reduced_trans = &red_bdry_trans[0]; |
859 |
| - int* const top_reduced_trans = &red_bdry_trans[red_bdry_size]; |
860 |
| - |
861 |
| - for (int x = 0; x < red_bdry_size; x++) { |
862 |
| - top_reduced_trans[x] = top_reduced[x]; |
863 |
| - } |
864 |
| - for (int y = 0; y < red_bdry_size; y++) { |
865 |
| - left_reduced_trans[y] = left_reduced[y]; |
866 |
| - } |
867 |
| - |
868 |
| - int input_offset = red_bdry[0]; |
869 |
| - int input_offset_trans = red_bdry_trans[0]; |
870 |
| - |
871 |
| - const bool has_first_col = (size_id < 2); |
872 |
| - // First column of matrix not needed for large blocks |
873 |
| - red_bdry[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset) : 0; |
874 |
| - red_bdry_trans[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset_trans) : 0; |
875 |
| - |
876 |
| - for (int i = 1; i < input_size; ++i) { |
877 |
| - red_bdry[i] -= input_offset; |
878 |
| - red_bdry_trans[i] -= input_offset_trans; |
879 |
| - } |
880 |
| - |
881 |
| - // *** INPUT PREP *** END |
882 |
| - |
883 |
| - // *** BLOCK PREDICT *** |
884 |
| - |
885 |
| - const bool need_upsampling = (ups_hor_factor > 1) || (ups_ver_factor > 1); |
886 |
| - const bool transpose = mip_transp; |
887 |
| - |
888 |
| - const uint8_t* matrix; |
889 |
| - switch (size_id) { |
890 |
| - case 0: |
891 |
| - matrix = &uvg_mip_matrix_4x4[mode_idx][0][0]; |
892 |
| - break; |
893 |
| - case 1: |
894 |
| - matrix = &uvg_mip_matrix_8x8[mode_idx][0][0]; |
895 |
| - break; |
896 |
| - case 2: |
897 |
| - matrix = &uvg_mip_matrix_16x16[mode_idx][0][0]; |
898 |
| - break; |
899 |
| - default: |
900 |
| - assert(false && "Invalid MIP size id."); |
901 |
| - } |
902 |
| - |
903 |
| - // Max possible size is red_pred_size * red_pred_size, red_pred_size can be either 4 or 8 |
904 |
| - int red_pred_buffer[8*8]; |
905 |
| - int* const reduced_pred = need_upsampling ? red_pred_buffer : result; |
906 |
| - |
907 |
| - const int* const reduced_bdry = transpose ? red_bdry_trans : red_bdry; |
908 |
| - |
909 |
| - uvg_mip_reduced_pred(reduced_pred, reduced_bdry, matrix, transpose, red_bdry_size, red_pred_size, size_id, input_offset, input_offset_trans); |
910 |
| - if (need_upsampling) { |
911 |
| - const int* ver_src = reduced_pred; |
912 |
| - uint16_t ver_src_step = width; |
913 |
| - |
914 |
| - if (ups_hor_factor > 1) { |
915 |
| - int* const hor_dst = result + (ups_ver_factor - 1) * width; |
916 |
| - ver_src = hor_dst; |
917 |
| - ver_src_step *= ups_ver_factor; |
918 |
| - |
919 |
| - uvg_mip_pred_upsampling_1D(hor_dst, reduced_pred, ref_samples_left, |
920 |
| - red_pred_size, red_pred_size, |
921 |
| - 1, red_pred_size, 1, ver_src_step, |
922 |
| - ups_ver_factor, ups_hor_factor); |
923 |
| - } |
924 |
| - |
925 |
| - if (ups_ver_factor > 1) { |
926 |
| - uvg_mip_pred_upsampling_1D(result, ver_src, ref_samples_top, |
927 |
| - red_pred_size, width, |
928 |
| - ver_src_step, 1, width, 1, |
929 |
| - 1, ups_ver_factor); |
930 |
| - } |
931 |
| - } |
932 |
| - |
933 |
| - // Assign and cast values from temp array to output |
934 |
| - for (int i = 0; i < 32 * 32; i++) { |
935 |
| - out[i] = (uvg_pixel)result[i]; |
936 |
| - } |
937 |
| - // *** BLOCK PREDICT *** END |
938 |
| -} |
939 |
| - |
940 |
| - |
941 | 637 | int8_t uvg_wide_angle_correction(
|
942 | 638 | int_fast8_t mode,
|
943 | 639 | const int log2_width,
|
@@ -1618,7 +1314,7 @@ void uvg_intra_predict(
|
1618 | 1314 | if (intra_mode < 68) {
|
1619 | 1315 | if (use_mip) {
|
1620 | 1316 | assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]");
|
1621 |
| - mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed); |
| 1317 | + uvg_mip_predict(refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed); |
1622 | 1318 | }
|
1623 | 1319 | else {
|
1624 | 1320 | intra_predict_regular(state, refs, &data->pred_cu, cu_loc, pu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode);
|
@@ -1804,7 +1500,7 @@ static void intra_recon_tb_leaf(
|
1804 | 1500 |
|
1805 | 1501 | uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode);
|
1806 | 1502 |
|
1807 |
| - uvg_pixel pred[32 * 32]; |
| 1503 | + ALIGNED(32) uvg_pixel pred[32 * 32]; |
1808 | 1504 | uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu);
|
1809 | 1505 |
|
1810 | 1506 | const int index = lcu_px.x + lcu_px.y * lcu_width;
|
|
0 commit comments