#pragma GCC optimize ("Ofast")
#include<bits/stdc++.h>
using namespace std;
void * wmem;
char memarr[ 96000000 ] ;
template < class S, class T> inline S min_L( S a,T b) {
return a<= b? a: b;
}
template < class S, class T> inline S max_L( S a,T b) {
return a>= b? a: b;
}
template < class T> inline void walloc1d( T ** arr, int x, void ** mem = & wmem) {
static int skip[ 16 ] = { 0 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 } ;
( * mem) = ( void * ) ( ( ( char * ) ( * mem) ) + skip[ ( ( unsigned long long ) ( * mem) ) & 15 ] ) ;
( * arr) = ( T* ) ( * mem) ;
( * mem) = ( ( * arr) + x) ;
}
template < class T> void malloc2d( T *** arr, int x, int y) {
int i;
( * arr) = ( T** ) malloc ( x* sizeof ( T* ) ) ;
( * arr) [ 0 ] = ( T* ) malloc ( x* y* sizeof ( T) ) ;
int jZyWAPpY = x;
for ( i= ( 1 ) ; i< ( jZyWAPpY) ; i++ ) {
( * arr) [ i] = ( * arr) [ i- 1 ] + y;
}
}
template < class T> void free2d( T ** arr) {
free ( arr[ 0 ] ) ;
free ( arr) ;
}
template < class T> struct DijkstraHeap{
int * hp;
int * place;
int size;
char * visited;
T * val;
void malloc ( int N) {
hp = ( int * ) std:: malloc ( N* sizeof ( int ) ) ;
place = ( int * ) std:: malloc ( N* sizeof ( int ) ) ;
visited = ( char * ) std:: malloc ( N* sizeof ( char ) ) ;
val = ( T* ) std:: malloc ( N* sizeof ( T) ) ;
}
void free ( ) {
std:: free ( hp) ;
std:: free ( place) ;
std:: free ( visited) ;
std:: free ( val) ;
}
void walloc( int N, void ** mem= & wmem) {
walloc1d( & hp, N, mem) ;
walloc1d( & place, N, mem) ;
walloc1d( & visited, N, mem) ;
walloc1d( & val, N, mem) ;
}
void init( int N) {
int i;
size = 0 ;
for ( i= ( 0 ) ; i< ( N) ; i++ ) {
place[ i] = - 1 ;
}
for ( i= ( 0 ) ; i< ( N) ; i++ ) {
visited[ i] = 0 ;
}
}
void up( int n) {
int m;
while ( n) {
m= ( n- 1 ) / 2 ;
if ( val[ hp[ m] ] <= val[ hp[ n] ] ) {
break ;
}
swap( hp[ m] ,hp[ n] ) ;
swap( place[ hp[ m] ] ,place[ hp[ n] ] ) ;
n= m;
}
}
void down( int n) {
int m;
for ( ;; ) {
m= 2 * n+ 1 ;
if ( m>= size) {
break ;
}
if ( m+ 1 < size&& val[ hp[ m] ] > val[ hp[ m+ 1 ] ] ) {
m++ ;
}
if ( val[ hp[ m] ] >= val[ hp[ n] ] ) {
break ;
}
swap( hp[ m] ,hp[ n] ) ;
swap( place[ hp[ m] ] ,place[ hp[ n] ] ) ;
n= m;
}
}
void change( int n, T v) {
if ( visited[ n] || ( place[ n] >= 0 && val[ n] <= v) ) {
return ;
}
val[ n] = v;
if ( place[ n] == - 1 ) {
place[ n] = size;
hp[ size++ ] = n;
up( place[ n] ) ;
}
else {
up( place[ n] ) ;
}
}
int pop( void ) {
int res= hp[ 0 ] ;
place[ res] = - 1 ;
size-- ;
if ( size) {
hp[ 0 ] = hp[ size] ;
place[ hp[ 0 ] ] = 0 ;
down( 0 ) ;
}
visited[ res] = 1 ;
return res;
}
}
;
template < class T> struct Grid2d{
int r;
int c;
T ** d;
int set_s;
int set_d;
T ** d_s;
int ** up;
int ** dw;
int ** lf;
int ** rg;
void malloc ( const int rr, const int cc) {
r = rr;
c = cc;
set_s = 0 ;
set_d = 0 ;
malloc2d( & d, r, c) ;
}
void free ( void ) {
free2d( d) ;
if ( set_s) {
free2d( d_s) ;
}
if ( set_d) {
free2d( up) ;
free2d( dw) ;
free2d( lf) ;
free2d( rg) ;
}
}
T* operator[ ] ( int a) {
return d[ a] ;
}
void setSum( void ) {
int i;
int j;
if ( set_s == 0 ) {
set_s = 1 ;
malloc2d( & d_s, r+ 1 , c+ 1 ) ;
}
for ( i= ( 0 ) ; i< ( r+ 1 ) ; i++ ) {
d_s[ i] [ 0 ] = 0 ;
}
for ( j= ( 0 ) ; j< ( c+ 1 ) ; j++ ) {
d_s[ 0 ] [ j] = 0 ;
}
for ( i= ( 0 ) ; i< ( r) ; i++ ) {
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
d_s[ i+ 1 ] [ j+ 1 ] = d_s[ i] [ j+ 1 ] + d_s[ i+ 1 ] [ j] - d_s[ i] [ j] + d[ i] [ j] ;
}
}
}
void setDir( void ) {
int i;
int j;
if ( set_d == 0 ) {
set_d = 1 ;
malloc2d( & up, r, c) ;
malloc2d( & dw, r, c) ;
malloc2d( & lf, r, c) ;
malloc2d( & rg, r, c) ;
}
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
up[ 0 ] [ j] = 1 ;
}
for ( i= ( 1 ) ; i< ( r) ; i++ ) {
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
if ( d[ i] [ j] == d[ i- 1 ] [ j] ) {
up[ i] [ j] = 1 + up[ i- 1 ] [ j] ;
}
else {
up[ i] [ j] = 1 ;
}
}
}
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
dw[ r- 1 ] [ j] = 1 ;
}
for ( i= r- 2 ; i>= 0 ; i-- ) {
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
if ( d[ i] [ j] == d[ i+ 1 ] [ j] ) {
dw[ i] [ j] = 1 + dw[ i+ 1 ] [ j] ;
}
else {
dw[ i] [ j] = 1 ;
}
}
}
for ( i= ( 0 ) ; i< ( r) ; i++ ) {
lf[ i] [ 0 ] = 1 ;
for ( j= ( 1 ) ; j< ( c) ; j++ ) {
if ( d[ i] [ j] == d[ i] [ j- 1 ] ) {
lf[ i] [ j] = 1 + lf[ i] [ j- 1 ] ;
}
else {
lf[ i] [ j] = 1 ;
}
}
}
for ( i= ( 0 ) ; i< ( r) ; i++ ) {
rg[ i] [ c- 1 ] = 1 ;
for ( j= c- 2 ; j>= 0 ; j-- ) {
if ( d[ i] [ j] == d[ i] [ j+ 1 ] ) {
rg[ i] [ j] = 1 + rg[ i] [ j+ 1 ] ;
}
else {
rg[ i] [ j] = 1 ;
}
}
}
}
void setDirMatch( const T v) {
int i;
int j;
if ( set_d == 0 ) {
set_d = 1 ;
malloc2d( & up, r, c) ;
malloc2d( & dw, r, c) ;
malloc2d( & lf, r, c) ;
malloc2d( & rg, r, c) ;
}
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
if ( d[ 0 ] [ j] == v) {
up[ 0 ] [ j] = 1 ;
}
else {
up[ 0 ] [ j] = 0 ;
}
}
for ( i= ( 1 ) ; i< ( r) ; i++ ) {
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
if ( d[ i] [ j] == v) {
up[ i] [ j] = 1 + up[ i- 1 ] [ j] ;
}
else {
up[ i] [ j] = 0 ;
}
}
}
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
if ( d[ r- 1 ] [ j] == v) {
dw[ r- 1 ] [ j] = 1 ;
}
else {
dw[ r- 1 ] [ j] = 0 ;
}
}
for ( i= r- 2 ; i>= 0 ; i-- ) {
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
if ( d[ i] [ j] == v) {
dw[ i] [ j] = 1 + dw[ i+ 1 ] [ j] ;
}
else {
dw[ i] [ j] = 0 ;
}
}
}
for ( i= ( 0 ) ; i< ( r) ; i++ ) {
if ( d[ i] [ 0 ] == v) {
lf[ i] [ 0 ] = 1 ;
}
else {
lf[ i] [ 0 ] = 0 ;
}
for ( j= ( 1 ) ; j< ( c) ; j++ ) {
if ( d[ i] [ j] == v) {
lf[ i] [ j] = 1 + lf[ i] [ j- 1 ] ;
}
else {
lf[ i] [ j] = 0 ;
}
}
}
for ( i= ( 0 ) ; i< ( r) ; i++ ) {
if ( d[ i] [ c- 1 ] == v) {
rg[ i] [ c- 1 ] = 1 ;
}
else {
rg[ i] [ c- 1 ] = 0 ;
}
for ( j= c- 2 ; j>= 0 ; j-- ) {
if ( d[ i] [ j] == v) {
rg[ i] [ j] = 1 + rg[ i] [ j+ 1 ] ;
}
else {
rg[ i] [ j] = 0 ;
}
}
}
}
inline T getSum( const int r1, const int c1, const int r2, const int c2) {
return d_s[ r2+ 1 ] [ c2+ 1 ] - d_s[ r1] [ c2+ 1 ] - d_s[ r2+ 1 ] [ c1] + d_s[ r1] [ c1] ;
}
template < class S> inline void getDist4( int sr, int sc, S ** res, void * mem = wmem) {
int i;
int j;
int k;
DijkstraHeap< S> hp;
hp.walloc ( r* c) ;
hp.init ( r* c) ;
if ( d[ sr] [ sc] >= 0 ) {
hp.change ( sr* c+ sc, d[ sr] [ sc] ) ;
}
while ( hp.size ) {
k = hp.pop ( ) ;
i = k / c;
j = k % c;
if ( i- 1 >= 0 && d[ i- 1 ] [ j] >= 0 ) {
hp.change ( ( i- 1 ) * c+ j, hp.val [ k] + d[ i- 1 ] [ j] ) ;
}
if ( i+ 1 < r && d[ i+ 1 ] [ j] >= 0 ) {
hp.change ( ( i+ 1 ) * c+ j, hp.val [ k] + d[ i+ 1 ] [ j] ) ;
}
if ( j- 1 >= 0 && d[ i] [ j- 1 ] >= 0 ) {
hp.change ( i* c+ ( j- 1 ) , hp.val [ k] + d[ i] [ j- 1 ] ) ;
}
if ( j+ 1 < c && d[ i] [ j+ 1 ] >= 0 ) {
hp.change ( i* c+ ( j+ 1 ) , hp.val [ k] + d[ i] [ j+ 1 ] ) ;
}
}
for ( i= ( 0 ) ; i< ( r) ; i++ ) {
for ( j= ( 0 ) ; j< ( c) ; j++ ) {
if ( hp.visited [ i* c+ j] ) {
res[ i] [ j] = hp.val [ i* c+ j] ;
}
else {
res[ i] [ j] = - 1 ;
}
}
}
}
}
;
#define main dummy_main
int main( ) {
wmem = memarr;
return 0 ;
}
#undef main
int x;
int y;
Grid2d< int > g;
class Solution{
public :
vector< vector< int >> matrixBlockSum( vector< vector< int >> & mat, int K) {
int cTE1_r3A, i, xr20shxY;
vector< vector< int >> res;
vector< int > tmp;
dummy_main( ) ;
x = mat.size ( ) ;
y = mat[ 0 ] .size ( ) ;
g.malloc ( x,y) ;
for ( i= ( 0 ) ; i< ( x) ; i++ ) {
int j;
for ( j= ( 0 ) ; j< ( y) ; j++ ) {
g[ i] [ j] = mat[ i] [ j] ;
}
}
g.setSum ( ) ;
for ( cTE1_r3A= ( 0 ) ; cTE1_r3A< ( y) ; cTE1_r3A++ ) {
tmp.push_back ( 0 ) ;
}
for ( xr20shxY= ( 0 ) ; xr20shxY< ( x) ; xr20shxY++ ) {
res.push_back ( tmp) ;
}
for ( i= ( 0 ) ; i< ( x) ; i++ ) {
int j;
for ( j= ( 0 ) ; j< ( y) ; j++ ) {
res[ i] [ j] = g.getSum ( max_L( 0 , i- K) ,max_L( 0 , j- K) ,min_L( x- 1 , i+ K) ,min_L( y- 1 , j+ K) ) ;
}
}
g.free ( ) ;
return res;
}
}
;
// cLay varsion 20200112-1
// --- original code ---
// #define main dummy_main
// {}
// #undef main
//
// int x, y;
// Grid2d<int> g;
//
// class Solution {
// public:
// vector<vector<int>> matrixBlockSum(vector<vector<int>>& mat, int K) {
// vector<vector<int>> res;
// vector<int> tmp;
// dummy_main();
// x = mat.size();
// y = mat[0].size();
// g.malloc(x,y);
// rep(i,x) rep(j,y) g[i][j] = mat[i][j];
// g.setSum();
//
// rep(y) tmp.push_back(0);
// rep(x) res.push_back(tmp);
//
// rep(i,x) rep(j,y) res[i][j] = g.getSum(max(0,i-K), max(0,j-K), min(x-1,i+K), min(y-1,j+K));
//
// g.free();
// return res;
// }
// };
#pragma GCC optimize ("Ofast")
#include<bits/stdc++.h>
using namespace std;
void *wmem;
char memarr[96000000];
template<class S, class T> inline S min_L(S a,T b){
  return a<=b?a:b;
}
template<class S, class T> inline S max_L(S a,T b){
  return a>=b?a:b;
}
template<class T> inline void walloc1d(T **arr, int x, void **mem = &wmem){
  static int skip[16] = {0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
  (*mem) = (void*)( ((char*)(*mem)) + skip[((unsigned long long)(*mem)) & 15] );
  (*arr)=(T*)(*mem);
  (*mem)=((*arr)+x);
}
template<class T> void malloc2d(T ***arr, int x, int y){
  int i;
  (*arr) = (T**)malloc(x*sizeof(T*));
  (*arr)[0] = (T*)malloc(x*y*sizeof(T));
  int jZyWAPpY = x;
  for(i=(1);i<(jZyWAPpY);i++){
    (*arr)[i]=(*arr)[i-1]+y;
  }
}
template<class T> void free2d(T **arr){
  free(arr[0]);
  free(arr);
}
template <class T> struct DijkstraHeap{
  int *hp;
  int *place;
  int size;
  char *visited;
  T *val;
  void malloc(int N){
    hp = (int*)std::malloc(N*sizeof(int));
    place = (int*)std::malloc(N*sizeof(int));
    visited = (char*)std::malloc(N*sizeof(char));
    val = (T*)std::malloc(N*sizeof(T));
  }
  void free(){
    std::free(hp);
    std::free(place);
    std::free(visited);
    std::free(val);
  }
  void walloc(int N, void **mem=&wmem){
    walloc1d(&hp, N, mem);
    walloc1d(&place, N, mem);
    walloc1d(&visited, N, mem);
    walloc1d(&val, N, mem);
  }
  void init(int N){
    int i;
    size = 0;
    for(i=(0);i<(N);i++){
      place[i]=-1;
    }
    for(i=(0);i<(N);i++){
      visited[i]=0;
    }
  }
  void up(int n){
    int m;
    while(n){
      m=(n-1)/2;
      if(val[hp[m]]<=val[hp[n]]){
        break;
      }
      swap(hp[m],hp[n]);
      swap(place[hp[m]],place[hp[n]]);
      n=m;
    }
  }
  void down(int n){
    int m;
    for(;;){
      m=2*n+1;
      if(m>=size){
        break;
      }
      if(m+1<size&&val[hp[m]]>val[hp[m+1]]){
        m++;
      }
      if(val[hp[m]]>=val[hp[n]]){
        break;
      }
      swap(hp[m],hp[n]);
      swap(place[hp[m]],place[hp[n]]);
      n=m;
    }
  }
  void change(int n, T v){
    if(visited[n]||(place[n]>=0&&val[n]<=v)){
      return;
    }
    val[n]=v;
    if(place[n]==-1){
      place[n]=size;
      hp[size++]=n;
      up(place[n]);
    }
    else{
      up(place[n]);
    }
  }
  int pop(void){
    int res=hp[0];
    place[res]=-1;
    size--;
    if(size){
      hp[0]=hp[size];
      place[hp[0]]=0;
      down(0);
    }
    visited[res]=1;
    return res;
  }
}
;
template<class T> struct Grid2d{
  int r;
  int c;
  T **d;
  int set_s;
  int set_d;
  T **d_s;
  int **up;
  int **dw;
  int **lf;
  int **rg;
  void malloc(const int rr, const int cc){
    r = rr;
    c = cc;
    set_s = 0;
    set_d = 0;
    malloc2d(&d, r, c);
  }
  void free(void){
    free2d(d);
    if(set_s){
      free2d(d_s);
    }
    if(set_d){
      free2d(up);
      free2d(dw);
      free2d(lf);
      free2d(rg);
    }
  }
  T*operator[](int a){
    return d[a];
  }
  void setSum(void){
    int i;
    int j;
    if(set_s == 0){
      set_s = 1;
      malloc2d(&d_s, r+1, c+1);
    }
    for(i=(0);i<(r+1);i++){
      d_s[i][0] = 0;
    }
    for(j=(0);j<(c+1);j++){
      d_s[0][j] = 0;
    }
    for(i=(0);i<(r);i++){
      for(j=(0);j<(c);j++){
        d_s[i+1][j+1] = d_s[i][j+1] + d_s[i+1][j] - d_s[i][j] + d[i][j];
      }
    }
  }
  void setDir(void){
    int i;
    int j;
    if(set_d == 0){
      set_d = 1;
      malloc2d(&up, r, c);
      malloc2d(&dw, r, c);
      malloc2d(&lf, r, c);
      malloc2d(&rg, r, c);
    }
    for(j=(0);j<(c);j++){
      up[0][j] = 1;
    }
    for(i=(1);i<(r);i++){
      for(j=(0);j<(c);j++){
        if(d[i][j]==d[i-1][j]){
          up[i][j] = 1 + up[i-1][j];
        }
        else{
          up[i][j] = 1 ;
        }
      }
    }
    for(j=(0);j<(c);j++){
      dw[r-1][j] = 1;
    }
    for(i=r-2;i>=0;i--){
      for(j=(0);j<(c);j++){
        if(d[i][j]==d[i+1][j]){
          dw[i][j] = 1 + dw[i+1][j];
        }
        else{
          dw[i][j] = 1 ;
        }
      }
    }
    for(i=(0);i<(r);i++){
      lf[i][0] = 1;
      for(j=(1);j<(c);j++){
        if(d[i][j]==d[i][j-1]){
          lf[i][j] = 1 + lf[i][j-1];
        }
        else{
          lf[i][j] = 1 ;
        }
      }
    }
    for(i=(0);i<(r);i++){
      rg[i][c-1] = 1;
      for(j=c-2;j>=0;j--){
        if(d[i][j]==d[i][j+1]){
          rg[i][j] = 1 + rg[i][j+1];
        }
        else{
          rg[i][j] = 1 ;
        }
      }
    }
  }
  void setDirMatch(const T v){
    int i;
    int j;
    if(set_d == 0){
      set_d = 1;
      malloc2d(&up, r, c);
      malloc2d(&dw, r, c);
      malloc2d(&lf, r, c);
      malloc2d(&rg, r, c);
    }
    for(j=(0);j<(c);j++){
      if(d[0][j]==v){
        up[0][j] =1;
      }
      else{
        up[0][j] =0;
      }
    }
    for(i=(1);i<(r);i++){
      for(j=(0);j<(c);j++){
        if(d[i][j]==v){
          up[i][j] =1 + up[i-1][j];
        }
        else{
          up[i][j] =0;
        }
      }
    }
    for(j=(0);j<(c);j++){
      if(d[r-1][j]==v){
        dw[r-1][j] =1;
      }
      else{
        dw[r-1][j] =0;
      }
    }
    for(i=r-2;i>=0;i--){
      for(j=(0);j<(c);j++){
        if(d[i][j]==v){
          dw[i][j] =1 + dw[i+1][j];
        }
        else{
          dw[i][j] =0;
        }
      }
    }
    for(i=(0);i<(r);i++){
      if(d[i][0]==v){
        lf[i][0] =1;
      }
      else{
        lf[i][0] =0;
      }
      for(j=(1);j<(c);j++){
        if(d[i][j]==v){
          lf[i][j] =1 + lf[i][j-1];
        }
        else{
          lf[i][j] =0;
        }
      }
    }
    for(i=(0);i<(r);i++){
      if(d[i][c-1]==v){
        rg[i][c-1] =1;
      }
      else{
        rg[i][c-1] =0;
      }
      for(j=c-2;j>=0;j--){
        if(d[i][j]==v){
          rg[i][j] =1 + rg[i][j+1];
        }
        else{
          rg[i][j] =0;
        }
      }
    }
  }
  inline T getSum(const int r1, const int c1, const int r2, const int c2){
    return d_s[r2+1][c2+1] - d_s[r1][c2+1] - d_s[r2+1][c1] + d_s[r1][c1];
  }
  template<class S> inline void getDist4(int sr, int sc, S **res, void *mem = wmem){
    int i;
    int j;
    int k;
    DijkstraHeap<S> hp;
    hp.walloc(r*c);
    hp.init(r*c);
    if(d[sr][sc] >= 0){
      hp.change(sr*c+sc, d[sr][sc]);
    }
    while(hp.size){
      k = hp.pop();
      i = k / c;
      j = k % c;
      if(i-1 >= 0 && d[i-1][j] >= 0){
        hp.change((i-1)*c+j, hp.val[k]+d[i-1][j]);
      }
      if(i+1 <  r && d[i+1][j] >= 0){
        hp.change((i+1)*c+j, hp.val[k]+d[i+1][j]);
      }
      if(j-1 >= 0 && d[i][j-1] >= 0){
        hp.change(i*c+(j-1), hp.val[k]+d[i][j-1]);
      }
      if(j+1 <  c && d[i][j+1] >= 0){
        hp.change(i*c+(j+1), hp.val[k]+d[i][j+1]);
      }
    }
    for(i=(0);i<(r);i++){
      for(j=(0);j<(c);j++){
        if(hp.visited[i*c+j]){
          res[i][j] =hp.val[i*c+j];
        }
        else{
          res[i][j] =-1;
        }
      }
    }
  }
}
;
#define main dummy_main
int main(){
  wmem = memarr;
  return 0;
}
#undef main
int x;
int y;
Grid2d<int> g;
class Solution{
  public:
  vector<vector<int>> matrixBlockSum(vector<vector<int>>& mat, int K){
    int cTE1_r3A, i, xr20shxY;
    vector<vector<int>> res;
    vector<int> tmp;
    dummy_main();
    x = mat.size();
    y = mat[0].size();
    g.malloc(x,y);
    for(i=(0);i<(x);i++){
      int j;
      for(j=(0);j<(y);j++){
        g[i][j] = mat[i][j];
      }
    }
    g.setSum();
    for(cTE1_r3A=(0);cTE1_r3A<(y);cTE1_r3A++){
      tmp.push_back(0);
    }
    for(xr20shxY=(0);xr20shxY<(x);xr20shxY++){
      res.push_back(tmp);
    }
    for(i=(0);i<(x);i++){
      int j;
      for(j=(0);j<(y);j++){
        res[i][j] = g.getSum(max_L(0, i-K),max_L(0, j-K),min_L(x-1, i+K),min_L(y-1, j+K));
      }
    }
    g.free();
    return res;
  }
}
;
// cLay varsion 20200112-1

// --- original code ---
// #define main dummy_main
// {}
// #undef main
// 
// int x, y;
// Grid2d<int> g;
// 
// class Solution {
// public:
//   vector<vector<int>> matrixBlockSum(vector<vector<int>>& mat, int K) {
//     vector<vector<int>> res;
//     vector<int> tmp;
//     dummy_main();
//     x = mat.size();
//     y = mat[0].size();
//     g.malloc(x,y);
//     rep(i,x) rep(j,y) g[i][j] = mat[i][j];
//     g.setSum();
// 
//     rep(y) tmp.push_back(0);
//     rep(x) res.push_back(tmp);
// 
//     rep(i,x) rep(j,y) res[i][j] = g.getSum(max(0,i-K), max(0,j-K), min(x-1,i+K), min(y-1,j+K));
// 
//     g.free();
//     return res;
//   }
// };
