From d6398e1001b77b4a8324be460920dbdbad49ec39 Mon Sep 17 00:00:00 2001 From: Orpaz Sondhelm Date: Thu, 11 Nov 2021 01:46:05 -0800 Subject: [PATCH] Y & O --- MainTrain.cpp | 17 +++++++- anomaly_detection_util.cpp | 89 +++++++++++++++++++++++++++----------- anomaly_detection_util.h | 18 ++++---- timeseries.h | 58 ++++++++++++++----------- 4 files changed, 120 insertions(+), 62 deletions(-) diff --git a/MainTrain.cpp b/MainTrain.cpp index 1572996..40bb524 100644 --- a/MainTrain.cpp +++ b/MainTrain.cpp @@ -10,7 +10,7 @@ #include using namespace std; - +/* // this is a simple test to put you on the right track void generateTrainCSV(float a1,float b1, float a2, float b2){ ofstream out("trainFile1.csv"); @@ -108,4 +108,17 @@ int main(){ cout<<"done"< -#include "anomaly_detection_util.h" float avg(float* x, int size){ - return 0; + float mu = 0; + for (int i = 0; i < size; ++i) { + mu = mu + x[i]; + } + return mu / size; } -// returns the variance of X and Y float var(float* x, int size){ - return 0; + float sum = 0, result = 0; + float m = pow(avg(x, size), 2); + for (int i = 0; i < size; ++i) { + sum = sum + pow(x[i], 2); + } + result = (sum / size) - m ; + return result; } -// returns the covariance of X and Y float cov(float* x, float* y, int size){ - return 0; + float sum = 0; + float muX = avg(x, size); + float muY = avg(y, size); + for (int i = 0; i < size; ++i) { + sum = sum + ((x[i] - muX) * (y[i] - muY)); + } + sum = sum / size; + return sum; } - -// returns the Pearson correlation coefficient of X and Y float pearson(float* x, float* y, int size){ - return 0; + float numerator = cov(x, y, size); + float denominator = sqrt(var(x, size)) * sqrt(var(y, size)); + return numerator / denominator; } -// performs a linear regression and returns the line equation -Line linear_reg(Point** points, int size){ - - return Line(0,0); -} +class Line{ +public: + float a,b; + Line():a(0),b(0){} + Line(float a, float b):a(a),b(b){} + float f(float x){ + return a*x+b; + } +}; +class Point{ +public: + float x,y; + Point(float x, float y):x(x),y(y){} +}; -// returns the deviation between point p and the line equation of the points -float dev(Point p,Point** points, int size){ - return 0; +Line linear_reg(Point** points, int size) { + float x[size]; + float y[size]; + for (int i = 0; i < size; ++i) { + x[i] = points[i]->x; + y[i] = points[i]->y; + } + float a, b; + a = cov(x, y, size) / var(x, size); + float muX = avg(x, size); + float muY = avg(y, size); + b = muY - (a * muX); + Line line (a, b); + return line; } -// returns the deviation between point p and the line float dev(Point p,Line l){ - return 0; + float lineY = l.f(p.x); + float dev = lineY - p.y; + if (dev < 0) + return -dev; + return dev; } - - +float dev(Point p,Point** points, int size){ + Line line = linear_reg(points, size); + return dev(p, line); +} diff --git a/anomaly_detection_util.h b/anomaly_detection_util.h index aabe991..7b31002 100644 --- a/anomaly_detection_util.h +++ b/anomaly_detection_util.h @@ -17,18 +17,18 @@ float pearson(float* x, float* y, int size); class Line{ public: - float a,b; - Line():a(0),b(0){}; - Line(float a, float b):a(a),b(b){} - float f(float x){ - return a*x+b; - } + float a,b; + Line():a(0),b(0){} + Line(float a, float b):a(a),b(b){} + float f(float x){ + return a*x+b; + } }; class Point{ public: - float x,y; - Point(float x, float y):x(x),y(y){} + float x,y; + Point(float x, float y):x(x),y(y){} }; // performs a linear regression and returns the line equation @@ -40,4 +40,4 @@ float dev(Point p,Point** points, int size); // returns the deviation between point p and the line float dev(Point p,Line l); -#endif +#endif \ No newline at end of file diff --git a/timeseries.h b/timeseries.h index 7b2fc2c..dff1dc2 100644 --- a/timeseries.h +++ b/timeseries.h @@ -1,24 +1,30 @@ #include #include //#include +#include +#include #include #ifndef TIMESERIES_H_ #define TIMESERIES_H_ using namespace std; -class TimeSeries{ +class TimeSeries { +private: + vector>> columns; + public: - TimeSeries(const char* CSVfileName){ + TimeSeries(const char *CSVfileName) { + + string line; - ifstream myFile (CSVfileName); - vector>> columns; - int flag = 1; - if (myFile.is_open()) - { - getline (myFile,line); + + std::ifstream myFile(CSVfileName); + + if (myFile.is_open()) { + getline(myFile, line, '\r'); std::istringstream ss(line); std::string token; while (std::getline(ss, token, ',')) { @@ -26,31 +32,33 @@ class TimeSeries{ columns.push_back(make_pair(token, v1)); } - while (std::getline (myFile,line)){ - std::if ss(line); + while (std::getline(myFile, line, '\r')) { + std::istringstream ss(line); std::string token; - if (flag == 1) { - - } + int index = 0; + while (std::getline(ss, token, ',')) { + float num = std::stof(token); + columns[index].second.push_back(num); } + } - /*else{ - for(int i = 0; i < columns.size(); i++){ - v1.push_back(); - } - columns[j].second = v1; - } - }*/ - flag = 0; + /*else{ + for(int i = 0; i < columns.size(); i++){ + v1.push_back(); + } + columns[j].second = v1; } - myFile.close(); + }*/ + } + myFile.close(); } -}; - - + vector>> getVector() { + return columns; + }; +}; #endif /* TIMESERIES_H_ */