#define x first #define y second #define y1 Y1 #define y2 Y2 #define mp make_pair #define pb push_back
usingnamespacestd;
typedeflonglong LL; typedef pair<int, int> pii;
template <typename T> inlineintChkmax(T &a, T b){ return a < b ? a = b, 1 : 0; } template <typename T> inlineintChkmin(T &a, T b){ return a > b ? a = b, 1 : 0; }
inlinevoidAdd(int &a, int b){ a += b; if (a >= Mod) a -= Mod; }
inlineintPow(int a, int b) { int ans = 1; for (; b; a = 1ll * a * a % Mod, b >>= 1) if (b & 1) ans = 1ll * ans * a % Mod; return ans; }
inlineintC(int n, int m){return1ll * fac[n] * ifac[m] % Mod * ifac[n - m] % Mod; }
structBIT { #define lowbit(x) (x & (-x)) int sum[Maxn]; inlinevoidadd(int x, int val){ for (; x <= N; x += lowbit(x)) Add (sum[x], val); } inlineintquery(int x){ int ans = 0; for (; x; x -= lowbit(x)) Add (ans, sum[x]); return ans; } } T[3];
structMatrix { int A[Maxk][Maxk];
inlinevoidinit() { memset(A, 0, sizeof A); for (int i = 0; i <= 6; ++i) A[i][i] = 1; }
inlinevoid _init () { for (int i = 0; i <= 6; ++i) A[i][i] = C(N - 2, 2); A[0][1] = A[0][5] = N - 2, A[0][2] = 1; A[1][0] = A[1][3] = A[1][4] = 1, Add (A[1][1], N - 3), A[1][6] = N - 3; A[2][0] = 1, A[2][3] = A[2][4] = N - 2; A[3][1] = A[3][2] = A[3][5] = 1, Add (A[3][3], N - 3), A[3][6] = N - 3; A[4][1] = A[4][2] = A[4][5] = 1, Add (A[4][4], N - 3), A[4][6] = N - 3; A[5][0] = A[5][3] = A[5][4] = 1, Add (A[5][5], N - 3), A[5][6] = N - 3; A[6][1] = A[6][3] = A[6][4] = A[6][5] = 1, Add (A[6][6], 2ll * (N - 4) % Mod + 1); }
inline Matrix operator * (const Matrix &rhs) const { Matrix c; for (int i = 0; i <= 6; ++i) for (int j = 0; j <= 6; ++j) { c.A[i][j] = 0; for (int k = 0; k <= 6; ++k) Add (c.A[i][j], 1ll * A[i][k] * rhs.A[k][j] % Mod); } return c; }
} trans, sum;
inlinevoidInit() { fac[0] = 1; for (int i = 1; i <= N; ++i) fac[i] = 1ll * fac[i - 1] * i % Mod; ifac[N] = Pow(fac[N], Mod - 2); for (int i = N - 1; i >= 0; --i) ifac[i] = 1ll * ifac[i + 1] * (i + 1) % Mod; sum.init(), trans._init(); }
int Ans[Maxk];
inlinevoidSolve() { Init(); for (int i = K; i; trans = trans * trans, i >>= 1) { if (i & 1) sum = sum * trans; } for (int i = 0; i <= 6; ++i) Ans[i] = sum.A[0][i];
int f_sum = 0, g_sum = 0, ans = 0; int inv = Pow(N - 2, Mod - 2); for (int i = 1; i <= N; ++i) { int num_a = T[0].query (A[i]), f_a = T[1].query (A[i]), g_a = T[2].query (A[i]); int num_b = i - 1 - num_a, f_b = f_sum - f_a, g_b = g_sum - g_a;
Add (ans, 1ll * num_b * Ans[0] % Mod); Add (ans, 1ll * (f_a + g_b) % Mod * Ans[1] % Mod * inv % Mod); Add (ans, 1ll * num_a * Ans[2] % Mod); Add (ans, 1ll * (f_b + g_a) % Mod * Ans[3] % Mod * inv % Mod); Add (ans, 1ll * (1ll * num_a * (i - 2) % Mod + 1ll * num_b * (N - i) % Mod) % Mod * Ans[4] % Mod * inv % Mod); Add (ans, 1ll * (1ll * num_b * (i - 2) % Mod + 1ll * num_a * (N - i) % Mod) % Mod * Ans[5] % Mod * inv % Mod);
T[0].add (A[i], 1), T[1].add (A[i], i - 1), T[2].add (A[i], N - i - 1); Add (f_sum, i - 1), Add (g_sum, N - i - 1); } Add (ans, 1ll * C(N, 2) * Pow(2, Mod - 2) % Mod * Ans[6] % Mod); cout<<ans<<endl; }
inlinevoidInput() { N = read<int>(), K = read<int>(); for (int i = 1; i <= N; ++i) A[i] = read<int>(); }