// test1.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; struct serie { vector vec; double label; double len; }; double INF = 1e10; serie series[10000]; double dpA[2000][2000]; double dpB[2000][2000]; int num = 637; double coe=0.2; int WIN = 0.2; double down_ratio; double DPA(int i, int j, int win) { if ((i == 0 && j == win + 1) || (j == 0 && i == win + 1)) { return dpA[i][j];; } if (abs(i - j) <= win) return dpA[i][j]; return INF; } double DPB(int i, int j, int win) { if ((i == 0 && j == win + 1) || (j == 0 && i == win + 1)) { return dpB[i][j];; } if (abs(i - j) <= win) return dpB[i][j]; return INF; } double getdis(int one, int two, int i, int j) { vector& vec1 = series[one].vec; vector& vec2 = series[two].vec; int n = vec1.size(); double ans = (vec1[i * 2] - vec2[j * 2])*(vec1[i * 2] - vec2[j * 2]) + (vec1[i * 2 + 1] - vec2[j * 2 + 1])*(vec1[i * 2 + 1] - vec2[j * 2 + 1]); return sqrt(ans); } double getdistance00(int one, int two) { int n = series[two].vec.size() + 1; dpA[0][0] = 0; dpB[0][0] = 0; dpA[1][0] = 0; dpB[0][1] = 0; n /= 2; for (int i = 2; i <= min(n - 1, WIN + 1); i++) { dpA[i][0] = dpA[i - 1][0] + getdis(one, one, i - 1, i - 1 - 1);// fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]); dpB[0][i] = dpB[0][i - 1] + getdis(two, two, i - 1, i - 1 - 1);// fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]);getdis(two, two, i - 1, j - 1-1) } int win = WIN; for (int i = 1; i < n; i++) for (int j = max(1, i - win); j < min(n, i + win + 1); j++) { if (i != 1) dpA[i][j] = min(DPA(i - 1, j, win) + getdis(one, one, i - 1, i - 1 - 1), DPB(i - 1, j, win) + coe*getdis(one, two, i - 1, j - 1)); else dpA[i][j] = DPB(i - 1, j, win) + coe*getdis(one, two, i - 1, j - 1); if (j != 1) dpB[i][j] = min(DPA(i, j - 1, win) + coe*getdis(one, two, i - 1, j - 1), DPB(i, j - 1, win) + getdis(two, two, i - 1, j - 1 - 1)); else dpB[i][j] = DPA(i, j - 1, win) + coe*getdis(one, two, i - 1, j - 1); } return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]); } double getdistance0(int one, int two) { int n = series[two].vec.size() + 1; dpA[0][0] = 0; dpB[0][0] = 0; dpA[1][0] = 0; dpB[0][1] = 0; for (int i = 2; i <= min(n-1,WIN + 1); i++) { dpA[i][0] = dpA[i - 1][0] + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]); dpB[0][i] = dpB[0][i - 1] + fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]); } int win = WIN; for (int i = 1; i < n; i++) for (int j = max(1, i - win); j < min(n, i + win + 1); j++) { if (i != 1) dpA[i][j] = min(DPA(i - 1, j, win) + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]), DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1])); else dpA[i][j] = DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]); if (j != 1) dpB[i][j] = min(DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]), DPB(i, j - 1, win) + fabs(series[two].vec[j - 1] - series[two].vec[j - 1 - 1])); else dpB[i][j] = DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]); } return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]) ; } double dp[2000][2000]; double getdistance1(int one, int two) { int n = series[two].vec.size(); dp[0][0] = fabs(series[one].vec[0] - series[two].vec[0]); int win = WIN; for (int i = 1; i < min(i+win,n); i++) { dp[i][0] = dp[i-1][0]+fabs(series[one].vec[i]-series[two].vec[0]); dp[0][i] = dp[0][i-1]+fabs(series[one].vec[0] - series[two].vec[i]); } for (int i = 1; i < n; i++) for (int j = max(1,i-win); j < min(i+win+1,n); j++) { if (j==i-win) dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) + min(dp[i - 1][j], dp[i - 1][j - 1]); else if (j==i+win) dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) + min(dp[i][j - 1], dp[i - 1][j - 1]); else dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) +min(dp[i - 1][j], min(dp[i][j - 1], dp[i - 1][j - 1])); } return dp[n - 1][n - 1]; return 0; } double getdistance2(int one, int two) { int n = series[two].vec.size(); double ans = 0; for (int i = 0; i < series[one].vec.size(); i++) { ans += fabs(series[one].vec[i] - series[two].vec[i]); } return ans; } double getdistance3(int one, int two) { int n = series[one].vec.size(); double ans = 0; for (int i = 0; i < n - 1; i++) { ans += abs(series[one].vec[i] - series[two].vec[i]); ans += abs(series[two].vec[i] - series[one].vec[i + 1]); ans += abs(series[one].vec[i] - series[two].vec[i + 1]); } ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]); return ans; } double getdistance4(int one, int two) { int n = series[two].vec.size(); double ans = 0; dp[0][0] = 0; for (int i = 1; i < n; i++) { dp[i][0] = dp[i - 1][0] + abs(series[one].vec[i - 1] - series[one].vec[i]); dp[0][i] = dp[0][i - 1] + abs(series[two].vec[i - 1] - series[two].vec[i]); } for (int i = 1; i < n; i++) for (int j = 1; j < n; j++) { dp[i][j] = dp[i - 1][j] + abs(series[one].vec[i - 1] - series[one].vec[i]); dp[i][j] = min(dp[i][j], dp[i][j - 1] + abs(series[two].vec[j - 1] - series[two].vec[j])); dp[i][j] = min(dp[i][j], dp[i - 1][j - 1] + abs(series[one].vec[i] - series[two].vec[j])); dp[i][j] = min(dp[i][j], dp[i - 1][j - 1] + abs(series[one].vec[i] - series[two].vec[j])); } return dp[n - 1][n - 1]; } double getdistance5(int one, int two) { int n = series[one].vec.size(); double ans = 0; ans += abs(series[one].vec[0] - series[two].vec[0]); ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]); for (int i = 1; i < n - 1; i++) { ans += min(abs(series[one].vec[i] - series[two].vec[i - 1]), min(abs(series[one].vec[i] - series[two].vec[i]), abs(series[one].vec[i] - series[two].vec[i + 1]))); } ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]); return ans; } map names; vector lens(38, 0); vector num1(38, 0); vector num2(38, 0); vector classes(38,0); vector dev_mean(38, 0); void readInfo() { ifstream file("C:/Users/xiefubao/Desktop/experiment/vldb_dataset/numhelp.txt"); if (!file.is_open()) { cout << "num.txt not open!" << endl; exit(0); } for (int i = 0; i < 38; i++) { string now; int no; file >> no; file >> now; file >> lens[i]; file >> num1[i]; file >> num2[i]; file >> classes[i]; file >> dev_mean[i]; names[i] = now; } } void down_sample(int count) { int newnum = num*(1 - down_ratio); int getout = num - newnum; for (int i = 0; i < count; i++) { vector down; vector help(num, 0); vector rem(num, 1); for (int j = 0; j < num; j++) { help[j] = j; } for (int j = 0; j < getout; j++) { int position = rand() % (num - j); rem[help[position]] = 0; swap(help[position], help[num-j-1]); } vector now(newnum, 0); int add = 0; for (int j = 0; j < num; j++) { if (rem[j]) now[add++] = series[i].vec[j]; } swap(series[i].vec, now); } num = newnum; } void readFile(int filenum,int& count,bool train) { num = lens[filenum - 1]; string filepath = "C:/Users/xiefubao/Desktop/experiment/vldb_dataset/" + to_string(filenum) + "/" + names[filenum - 1] + (train ? "_TRAIN" : "_TEST") + ".txt"; cout << filepath << endl; double now; ifstream in(filepath); if (!in.is_open()) { cout << "not open" << endl; exit(1); } while (in >> now) { series[count].label = now; double len = 0; series[count].vec.clear(); for (int i = 0; i < num; i++) { double point; in >> point; point = point; series[count].vec.push_back(point); if (i != 0) len += fabs(point - series[count].vec[i - 1]); } series[count].len = len; count++; } //down_sample(count); } int _tmain(int argc, _TCHAR* argv[]) { //cout << "xie" << << 123 << "123" << endl; getchar(); readInfo(); ofstream result; result.open("C:/Users/xiefubao/Desktop/experiment/vldb_dataset/result.txt"); //file location //int filenum = 31; bool train = false; for (int filenum = 1; filenum <= 38; filenum++){ down_ratio = 0.2; WIN = 5; coe = dev_mean[filenum - 1] / 2.0; //coe = 3; clock_t start, finish; start = clock(); vector hitnum(5, 0); int count = 0; cout << "reading" << endl; readFile(filenum, count, train); cout << "have read" << endl; double(*distance[5]) (int, int) = { getdistance00, getdistance0, getdistance1, getdistance2, getdistance3 }; vector computeDis; //computeDis.push_back(0); computeDis.push_back(1); computeDis.push_back(2); computeDis.push_back(3); //computeDis.push_back(4); for (int i = 0; i < count; i++) { /*double mist = INF; int bestnum = -1;*/ vector mist(5, INF); vector bestnum(5, -1); //cout << i << endl; for (int j = 0; j < count; j++) { if (i == j) continue; if (rand() %(num1[filenum]/classes[filenum]/5) != 0) continue; vector dist(5, 0); for (int k = 0; k < computeDis.size(); k++) { dist[computeDis[k]] = distance[computeDis[k]](i, j); } for (int k = 0; k < computeDis.size(); k++) if (dist[computeDis[k]] < mist[computeDis[k]]) { mist[computeDis[k]] = dist[computeDis[k]]; bestnum[computeDis[k]] = j; } } if (i == count - 1) { result << setw(2) << setfill(' ')<