#include <bits/stdc++.h>

using namespace std;
#define ll long long
typedef pair<ll,ll> ii;
ll n,m,a[300000],b[300000],lua[300000],res,trace[300000];
queue<ll> A[300000];
vector<ll> kq;
ii D[300000];
int main()
{
    cin >> n >> m;
    for(int i = 1;i<=n;i++){
        cin >> a[i];
        b[i] = a[i] % m;
        ++lua[b[i]];
        A[b[i]].push(a[i]);
        D[i] = ii(a[i],i);
    }
    res = 0;
    ll cur = n/m;
    if(lua[m-1] > cur){
        ll sl = lua[m-1] -cur;
        lua[m-1] = cur;
        trace[m-1] = sl;
        lua[0]+=sl;
        res+=sl;
    }
    for(int i = 0;i<m-1;i++){
        if(lua[i] > cur && lua[i+1] <= cur){
            res+= lua[i] - cur;
            trace[i] = lua[i] - cur;
            lua[i+1] += lua[i] - cur;
            lua[i] = cur;
        }
    }
    if(trace[m-1] != 0){
        for(int i = 1;i<=trace[m-1];i++){
            ll top = A[m-1].front();
            A[m-1].pop();
            A[0].push(top+1);
        }
    }
    for(int i = 0;i<m-1;i++){
        if(trace[i] != 0){
            for(int j = 1;j<=trace[i];j++){
                ll top = A[i].front();
                A[i].pop();
                A[i+1].push(top+1);
            }
        }
    }
    cout << res << "\n";
    for(int i = 0;i<=m-1;i++){
        while(A[i].size() > 0){
            kq.push_back(A[i].front());
            A[i].pop();
        }
    }
    sort(kq.begin(),kq.end());
    sort(D+1,D+n+1);
    for(int i = 1;i<=n;i++)
        trace[D[i].second] = kq[i-1];
    for(int i = 1;i<=n;i++) cout << trace[i] << " ";
    return 0;
}