From af24dfce7d1b2f12a9e7e0d211182b0891908eeb Mon Sep 17 00:00:00 2001 From: jkobject Date: Wed, 25 Aug 2021 21:17:24 +0000 Subject: [PATCH 1/9] sign mod on all components --- contrastive/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 6e00d28..03dfca4 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -295,8 +295,9 @@ def cpca_alpha(self, dataset, alpha=1): eig_idx = eig_idx[np.argsort(-w[eig_idx])] v_top = v[:,eig_idx] reduced_dataset = dataset.dot(v_top) - reduced_dataset[:,0] = reduced_dataset[:,0]*np.sign(reduced_dataset[0,0]) - reduced_dataset[:,1] = reduced_dataset[:,1]*np.sign(reduced_dataset[0,1]) + for comp in range(n_components): + reduced_dataset[:, comp] = reduced_dataset[:, comp] * \ + np.sign(reduced_dataset[0, comp]) return reduced_dataset """ From 45bdc34c043f63af89d91287b14f7e9b5f56a577 Mon Sep 17 00:00:00 2001 From: jkobject Date: Fri, 3 Sep 2021 20:54:39 +0000 Subject: [PATCH 2/9] making some changes --- contrastive/__init__.py | 1 + contrastive/__pycache__/__init__.cpython-37.pyc | Bin 0 -> 14588 bytes 2 files changed, 1 insertion(+) create mode 100644 contrastive/__pycache__/__init__.cpython-37.pyc diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 03dfca4..2af752a 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -125,6 +125,7 @@ def fit(self, foreground, background, preprocess_with_pca_dim=None): print("Covariance matrices computed") self.fitted = True + return self def transform(self, dataset, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, plot=False, gui=False, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): diff --git a/contrastive/__pycache__/__init__.cpython-37.pyc b/contrastive/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..822a340f24ab5608c34b2b1b9bda42f0d48a0d4e GIT binary patch literal 14588 zcmcgzTa0AKSw6SEU*>+_wcG2pciNZPwc{Aetgq{=nm1hGzbjk3r<2qAaV|UfBp6UU)8*r%cV8^%nOg%Ph8TpAMm69izD(p9{1CRrU^~x4Xvq{ zbe(adVU$d~&4$smN->pYHR8=gDWT%AMzWbIrF89OO~ggwElnhx^bNC=!FN)m@SSop z_|7^xk=`=iF=SU{N_mkjO^94+(lPg^oV+u!VI1hCX~(S3?9ZaMyptDsq?yjNFof}0 zgK3{NHjJ~{fF^erO@d)0tVg)^ZJh0B85B96Nrlkw`oolx|X}ezSno~eVVYPy=m{xhO)>@7&3!>I^ zS}umgZq&TJf^@`=8l{4_X?uko*KrF@t?IO{*^M2iAe>6O*=f5q4y0n^wF@=jw7lBJ z9(YMd6?VhYn>OmLwJOrFT}KqI*|KK0DojGYQ^2$|ZMj!i-DpdvD%(3PQB*vI?V*5& z#x!izc2r^)T-4nXs=&Hk*@_AjBI32e`L?K+K(uW2%c{2zVj z^k%#1oUS|4Y1W+66{pdtRa=f6jSVO1G&F*8}E~S z&W3nhbIJ{S-D$Y*(vc~4WQ_3mNTu_GK8=yQzwYHb6}vo?t#@g_G#fBZdGAOI76vUC zr+HsZD5axjqwKp>zc|u=)({E2Bkmh%#PLBRhWv(<@2e@nguSsr5#1{{ZBN#A-=#e> zsy*YR_m4DXaoCh`qKcApP?~sRc+|3c2oYcRw7Tx=LKntuV^PDG(f?Y}dWNU%n?CFS zEKT#G$Dm@TEfSDJP`n+iwN zx{fHTw8M*LYJ?=DIEBhw3xTE=^lm6^cM;uT&n};@C^|&>W|HdQF+_aB*UIQ7LftnJ zV%tFo^9fxtE}lm>G9z4rY^$ska|@c-4HDNJxehfL#5Zc5=ZK;euJ{5tONyeTC>-uf zvafI+L8hy0JUt%%B_dtZr@&b5cFX(n7zX|2)J|Xj@5H5=IY#S+~4QJO@LSv3r zAgnachAk|5LCoE>J5FG=${W=nz2Vqc?v7g)Oj)l6 zxnT-F)34Kl{g9MxDHq9zmQJV*X&(d`*j^Reb6R*LcN_T=45%rCnDQXFU&z3OFM-%j zGgxwVI}O!!G%qx5%Mu1_yBz4315-fj#L@gp`x+Fnddm~wR*Z+*_=9BsB-t+vN2h$f z=50pCLsKT0Yhn{Wq#NW1knvGG?n?+XL#b(#VP4N0Sv_ecb<14jw-JU}eO^!FZxMfS zBdMPOMbbQ>A45Ee^4(-#qf~Nai68eNdO%e0u7HNW6|2Le-!kO1hkaM+0<1^qhR1zF zSkXJiH}d0AKECBgeE9QxBRx4xcS~V8S@#nvzmEKr%2(fRH!Ame)NIh_cRvq`jH1Z6 z`%&qAq+!iKGsWHC^Aqx4{iM=R-|@_SrQJksNrQe!`pH`{Xt|Wi{XTs%`eS0c8bf-b zg&e;91XlGA_l}92(1=L94IdNnnMS&qc^lUM6|HrTrFq${H0U1Gk)q>E%(4YY-j*iC22pDp8(E5=P zz|24{Q1a=+{pii~W^X}HiU&oZYKT*}(c;}d^_KjZ8~RJy$G?8n=*{)!Z&~}xVpSCV zIkCE+9hji$cuSbi6+Fk#>-d3w0BwvCucO35ZxQLoneH!c!S3rVp^kg}d4Ead)Gol- zDr2BY_zT{>eBaRLHR$Ju#KT^}$Jj%+KXMzm)H}xZ7p`b%!+qjW_=)EJ34hj~17-RE zQjhka&7fJoqIvfY+jBz~tC$H>oyEG`)F?O4*Te1O@;kB+;$}c;cQwGBR?*coK1hWx7?xzz)F9|lFyDyDJWL_6;Mo$ z^nAw8^p^cBsIU4n{_+OU8h_>z_H68Sr!SuG`jH|8iOd7(Ai7 zKHT+A-@W4%_8bozRfDKQLGFZL1B$K|3vEdrL#ql&eOSmvj&!{>c@p|Ah&S!lj@<}M zyV2-gdWG;u4Lg~v?f@?F3aG!(u_ZQBY!-z>=IPUe9rqN?jeF2=g|nc=9vTFnY8~SPQ)TF#u<7jM8wr&3*f`D3p|h5)`0fXP!bA9vkUc|&K`vjW9qE8+gr#7aOduHI&JBp6`j4PYGw9y+Z=T8M(6fu z6uLmZxTat^bpWpwXnzzM5cK}+xtuTh^*X zS5=|43JeGJ>$@d0_;SC6V0XjQJbmB5qn!0fp@ z&kZ8ooV^Q?*`Q#+rlyeY?p4}tDcoWvNI6(Y5E@J~Emt6nnA#w|)2eOnI6=aN*@~2DU;I zPMv;B1GH}3FnY%B7h&FE+r?J@3vf6gFTUZKm_KV7ZW!P58s&lx;H|h1hpQL6MVAjo zUER6`iY4s^Bq1yrI)LCExrZnf_hS3;TMA#9(H6mHKXIV-61QLoM!ez?FM}!MwS7`; zoylmEPw}nfB6jf(hfNX$;=10bRb7BbYWj<5!ZB62D3}PlRdoP72vNq`q3)10ih$;0)fT9{0RaZLTfc9&CC##A>r{RV0#@t{^0e?{ggoPIFN{)&v{6dLM ze5V6?=iA`tvOAhSsi%#+eh&m@)>t;?@m`E2_3l0ICORR5?)=r&t3-D`_1Kx-r^40L zD_kw!`5*k`gJ2x3pg()AYd&@60gQPJ+Roc7#)FJ-+={GqxSj%&!)O94!svNUCzvA8 zUTf{NcieJV72u#q9m{R-y`yj*UDgm+%UsRrV6AuK=Nh)_7JwMONeZ=wVhWb-#$Vv- zf94JqUhzTY^2z5(RA3M;;C9P#kO`t`NS))?cr#owH9qP@(rfuDFxt-`K`dqgu-YvR7{>_$rnkJ zIgjnoy9D#ck+RO@t4vPx1^44j`UHbdGAJ{!8H75=t9cDT224oQ5EU-al-Enb7zZpnUOYImsOsy2gNuM_y+@?Iw2_>@iRp;R(cU;Bmi%K!YQW1%PKV zdN1Q0y6sGh<7?W8!!0+EI*GqLVzXh)nAN9A8_Q;w=S~`yq6%r!ie?)51<<~zpM=w& zRBk`HglW@ANg9?(+Bov20S9Dti**{^{P=RL5C-9oFaYD6QkRmW09|TX9IRi(ZrzLb z61@a>bD>w^w(=0Z7prQ$>+bE7p3Ytp|p<#q4F?wkNFM4-IjoGC)rd*TA_Q5vIZI5;oNIJS z6Y7*c+LR%pfkpS~@Z@k&+|S9GM?}K{ry5yQDk2oulX`bT3G7fCSNhgEg?k{Z|AG&- z;&E`$Kf=NM7!XU?)@nV+JZm3tH-y>Rj|=DpP=>;cS;PEb9$|yU>Pcvc)D7x|O%GPM znu)8u6iQ`q7=(ic*!i^K(!%u9TM1c$j!6T08JVYE#CkY;L^6J_4m#%2rZ$Y0xf7K@z&|0*7ifSFlJ_>j(CI$!BMyI9(j}``g--o9; z+#Qv1TAaa&w(V`oZzBJ&fc*i}Yy`j_)J+|4FI4|r8H&GL{jm@`N(Ag#L+YO^C3B`kGH>gDsr z%=z;Q5r82Pi3v2->)mwqz+87+~_C$s6PDa zws=#+!2&O4rf>lcz`DreBASJx1{|Xx785HxOhGJ;Lxx!~hgd@3!XK`uC2wjLWCYsw zc#yf^$d=P6bFQK(QP=Ts8BK1Y!V3GqAKx)Nlu);Tfakcl6ai?c3{`N0cRXk?m^>d) z?l&gS3gTOee-NwMP3)jCAYpJ^II{hNH}V>}2Ju~f{x$RUAXSFyhhv918xAVVUZohS zmMI0-ogL}LTK`(aQGzpmD}sGFhsV8)0Ig?#c=nFFEOp~5V>su=S-BB?v9C~W?it^d zPk9NRz;hcF2F}7>#Wyxk^xf)Zy`+ybFbr_)Vt1Pm-vFI<6)ZeJZihf^gdc|pbE^<` zh}Q(y3Rh~42G4bIeWEa85{ED#0C~FwC>&R06vpY;LVgt8>AuN&mEAb3eSluFwh$)( za7y_6uKEB*hJa!S>jF&-c|B8jJOofu!?WMecRq_+6=%4 z8+eo~aKU=m@Ob*C;gR{5DoF?e4)nkA` zBeNP`@cRH?KdsDY7|?_)TQ7Pkp18|5hN->@6UCU(kb4rjJa_kDRHVI32Itc-Vq(0% zBH}YJL=@}^GufK}#2N?Hq#v(OAr;1x&^_FJADYSD1a7ZD^NgmRh3?A0Ao3>YJ`HJS zHE){Kh1sdll}kv-Z!5O?Gs&o8bsCk*Z=LS z-!T8_?4ADxURqoUQ9Y$AhuDd-0;z)JcNr*?kH#O4KyftI7Vv0yJ8FLn(k_e}=|Foo zaU}*wB5uaRc1uS4s+R>~BdsTKy9$k!v) zpnL}%SO^RQEEIiosfCL!OjQC8!vR-d$ChJA6Q(K}D3_!8jL^X3IGWE$U2!6PJ_Q-q z^bn^ZrdQ#+1F_-NHhl#Mjt~51G_RjNnikR@MMoY`Xiz-w)o(1sunwFGMfC$|@C{YJ z74ePo`su3vG5VwE$b~zF)zxp;-Sl_Sq3`|iKf3>YSwR02P*8nDEEd{jR4&A%I(zbSh$@`| zoRKh+{Cfufh@fO#*u}F~GB4~h*xM~7FO)-da(ET~9q7NpRTx59c_7>2U)uP&@K5iK zZNLMqRE$MFhSHkK!T9kUKtu5hwQO2`o4xsS27keTp2A`OjT%I<13@lwF8EI%eFuZa zCCsqDA(1k({tB_5Wml<^;p@_4gPKVfC3EO@HgV+;9>*_%sK(pZp$kjb=?+a&)wDbG zbpaDR#RO)l*qy&+j9IluSo$3VaCwz4bQ#pj3Zem2T%lLHvBL|MXepIc5W^WXCRA-4 zs8y3+Wop9R!aW<=k_zsci@w&z{!d8VrVQfkb^hOtayM09!omAza?xMH{cO7<|3Q7c z7{qZvD%#B;P9=_m$pGo4a#)pFw sC~^E5lb_K(qy6;c&!lw=&Z&jdNhMMi-bO($sOKc!c=S~R>K)Jj0=EMPd;kCd literal 0 HcmV?d00001 From 39c7ff8f7f4c95920ed1800a634cd5629e0d69ce Mon Sep 17 00:00:00 2001 From: jkobject Date: Fri, 3 Sep 2021 20:56:21 +0000 Subject: [PATCH 3/9] adding only loading --- contrastive/__init__.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 03dfca4..ef2e79b 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -46,6 +46,7 @@ def __init__(self, n_components=2, standardize=True, verbose=False): self.n_components = n_components self.verbose = verbose self.fitted = False + self.only_loadings = False """ Finds the covariance matrices of the foreground and background datasets, @@ -127,7 +128,7 @@ def fit(self, foreground, background, preprocess_with_pca_dim=None): self.fitted = True - def transform(self, dataset, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, plot=False, gui=False, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): + def transform(self, dataset=None, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, plot=False, gui=False, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False, only_loadings=False): if (self.fitted==False): raise ValueError("This model has not been fit to a foreground/background dataset yet. Please run the fit() or fit_transform() functions first.") if not(alpha_selection=='auto' or alpha_selection=='manual' or alpha_selection=='all'): @@ -143,6 +144,10 @@ def transform(self, dataset, alpha_selection='auto', n_alphas=40, max_log_alpha= #you can't be plot or gui with non-2 components # Handle the plotting variables if (plot or gui): + if only_loadings: + raise ValueError('The only_loadings parameter cannot be set to True if plot or gui is set to True') + if dataset is None: + raise ValueError('The dataset parameter must be provided if plot or gui is set to True') if active_labels is None: active_labels = np.ones(dataset.shape[0]) self.active_labels = active_labels @@ -238,6 +243,9 @@ def update(value): return else: + if not only_loadings and dataset is None: + raise ValueError('The dataset parameter must be provided if only_loadings is not set to True') + self.only_loadings = only_loadings if (alpha_selection=='auto'): transformed_data, best_alphas = self.automated_cpca(dataset, n_alphas_to_return, n_alphas, max_log_alpha) alpha_values = best_alphas @@ -247,6 +255,7 @@ def update(value): else: transformed_data = self.cpca_alpha(dataset, alpha_value) alpha_values = alpha_value + self.only_loadings = False if return_alphas: return transformed_data, alpha_values else: @@ -294,6 +303,8 @@ def cpca_alpha(self, dataset, alpha=1): eig_idx = np.argpartition(w, -n_components)[-n_components:] eig_idx = eig_idx[np.argsort(-w[eig_idx])] v_top = v[:,eig_idx] + if self.only_loadings: + return v_top reduced_dataset = dataset.dot(v_top) for comp in range(n_components): reduced_dataset[:, comp] = reduced_dataset[:, comp] * \ From 51e84e698c6fe1c166f86341ea1c7e4eefea6a15 Mon Sep 17 00:00:00 2001 From: jkobject Date: Tue, 14 Sep 2021 14:12:31 +0000 Subject: [PATCH 4/9] making it much much faster --- contrastive/__init__.py | 27 +++++++++++------- .../__pycache__/__init__.cpython-35.pyc | Bin 8620 -> 0 bytes .../__pycache__/__init__.cpython-37.pyc | Bin 14588 -> 15218 bytes 3 files changed, 16 insertions(+), 11 deletions(-) delete mode 100644 contrastive/__pycache__/__init__.cpython-35.pyc diff --git a/contrastive/__init__.py b/contrastive/__init__.py index bd93873..31ec250 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -5,9 +5,9 @@ from __future__ import print_function import numpy as np -from numpy import linalg as LA +from scipy import linalg as LA from sklearn import cluster -from sklearn.decomposition import PCA +from sklearn.decomposition import PCA, IncrementalPCA class CPCA(object): """ @@ -41,12 +41,13 @@ def standardize_array(self, array): return np.nan_to_num(standardized_array) #stores - def __init__(self, n_components=2, standardize=True, verbose=False): + def __init__(self, n_components=2, standardize=True, verbose=False, low_memory=False): self.standardize = standardize self.n_components = n_components self.verbose = verbose self.fitted = False self.only_loadings = False + self.low_memory = low_memory """ Finds the covariance matrices of the foreground and background datasets, @@ -130,6 +131,7 @@ def fit(self, foreground, background, preprocess_with_pca_dim=None): def transform(self, dataset=None, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, plot=False, gui=False, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False, only_loadings=False): + print('transform') if (self.fitted==False): raise ValueError("This model has not been fit to a foreground/background dataset yet. Please run the fit() or fit_transform() functions first.") if not(alpha_selection=='auto' or alpha_selection=='manual' or alpha_selection=='all'): @@ -298,16 +300,19 @@ def all_cpca(self, dataset, n_alphas, max_log_alpha): If specified, it returns the top_cpca directions """ def cpca_alpha(self, dataset, alpha=1): - n_components = self.n_components - sigma = self.fg_cov - alpha*self.bg_cov - w, v = LA.eig(sigma) - eig_idx = np.argpartition(w, -n_components)[-n_components:] - eig_idx = eig_idx[np.argsort(-w[eig_idx])] - v_top = v[:,eig_idx] if self.only_loadings: - return v_top + if not self.low_memory: + pca = PCA(self.n_components, svd_solver="randomized", copy=False,) + else: + pca = IncrementalPCA(self.n_components, copy=False, batch_size=1000) + return pca.fit(self.fg_cov - alpha*self.bg_cov).components_ + else: + w, v = LA.eigh(self.fg_cov - alpha*self.bg_cov, overwrite_a=True, check_finite=False, driver="evd") + eig_idx = np.argpartition(w, -self.n_components)[-self.n_components:] + eig_idx = eig_idx[np.argsort(-w[eig_idx])] + v_top = v[:,eig_idx] reduced_dataset = dataset.dot(v_top) - for comp in range(n_components): + for comp in range(self.n_components): reduced_dataset[:, comp] = reduced_dataset[:, comp] * \ np.sign(reduced_dataset[0, comp]) return reduced_dataset diff --git a/contrastive/__pycache__/__init__.cpython-35.pyc b/contrastive/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 40e0bf3c58a57f378258d5c008c00368954e633e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8620 zcma)B%X1q?dhZ@Q34m{k4@s6`TQB&bmCN4R%|>h4kyeVcj@DUvqxFO;)DSZOhXe;; z&wwIfBB>O$D`_j0TMo$~IpnlexhJVga@k5%{)1Faa!8dADV}r7W%K)e4S)|zS%I7n zJ>6fwzW#l$#^U5;=~siFefdvsEA_QH@yVk6F`hU8@$s)!Td8fWT&*J-Z>Zg8sG@aleO0#UJ-tzeETIL*0mVhKy*C7kAYwKce<;lk1RB@$RZ zApuU&HJweYIDl3iAMb19RUD6Z%=v0-NvFMLlR(|CZ+nRe_g=D~d0EhD)|C@$Sw3FN zX{Lst9h@0FIXt3wzlFkqRtZ#3vtZ5Dq1MZ+a!yrU7&yY1Q3sAX)X>z-gEZ^yg|VR? z*RrV-b<^y2;6-jGPW-eKc~L#-)T7>ZEhD<(<~(D(eRnQSyvX;AANB*kE_G+sX!Zn) z<#-K~#90uf-_?DKzHcxcahqQ>xJiEa%^KP4A_`bK>`YyD&4Sozu{lm!J~3KaiRz8c zcDEA+Q4$ZAZ@2_=k%R{1gwhvBR_0CA4;MKxGA*OFdq>yNdc-vyaUTyFG!p#qdm4`{ z^K%reEyRIfy*QHtI0-Dgqz)ZNvkl9}F@R>(0h~ro9l%}W)d3tuK^?$F6x9K|&4fD0 zszX&+t(6|6h26lccVIp_Q(B-IjDrLx*h@NTF{*p*?uHkqliS{2z1?Zq_jGR5z>_pV z(lb$-?Y28fnrZdInv-UlVUkY3^hR;BW43E$Iq9^7m!M@jy$C>9_ZnNaY$yU#BcZ6w zWs)l-S4pmsaD8V5Jc(KA<4MRnl=4O0pA*cVVrUTI{N(YB(f$kI)ED6b0Kb2U;t2O&tPy^IBCu$&k@I&|#oop4A`GrbKYO^foTA=a_ zFf9JTR^b_$zf}pGaGp(az&YtKB^^c)0@ac|WL=xn%6u)&GeahHJ#NjZ%~`cMcd7{i z5>^dptN<9x7sxy>4A~QtAvBS2WHcFe4ZLBp&t+Y9GJwiXW?K<^jVBv5ZDN$sEDN&5 zpKERr7`6f2YOVYr)my1Hsa{WYE7gsD`7S_NZFf9B@D0^fEXxtiOaJ1X&aP)dFKPr; zF_DcRu2MyNFvqoXhOWsiuea(!*sAY(?by&$r1?gt-7&G5~0o%A6rGqNib-Z+5aNRc@Cn#NHac^Add+fo!vCwl%|6Pka^8PYj@PFFxDoV+e&-bBs0%zMM{t2(dy#S!Qm zG)4SSUR)*ZOZ+dedawy*UO&U>SaS#50CG5>ETQ{6r=DxYRvab%ZBs^?^6I&x_HPc| z0fXs`8Wh!bRt*qH=hOg!G+hM(?K}hRL0RdC(VWs+Con;R0sUp9)lbQl8;0M{VY#1X zRTEW!VL|;|splE>RH>(0J$2Mml(Xt-PCd=5reF{N0?z;%0$oCoUZR167TLgu%eY7Qm z%&Eb=a4ZZt4(8RCIM~6041P`>%&G%yO{=D(R)#NA@`8w-o57Cfpw$x3`x3V&ORU07 zONUNI@BEE^$Za7Js46sZK`ZKDnhRb*WPd30ErDI{xL`g1f+b^PTQF3f!+|b9CQ7q( zQA*Kgw1DO>euHQuyWDR8j)KQFA>V0kYn4Y;yZpe$6`8c~IP?+s#Ay*}Szr)lrTNdi z{Z21oKIVsUx9#os*S_dfdvQ=rHiGK?pM3gm6%5sFLkH>r?Z82FR8}8ZntBSxEgl%hf`ys;(ZhPtKFc{$U?*Tp>v`3&Etvx z2W0P086hR`4hWS%p;IE+(TRI<;?N>%!eKoTdx1~<2b{b;_t8PWu+6dOo>9+Wmf`WU zI6z4!85-=ot{&3qXpUrlC8k>f1861Zu`US?$euS`!4fh|=oK{T(9t@2_cwMC6Fy{@ zW16zZWIn|H)0|k_G$*k{>jU|GeY-z#c};3Af8SgHTR&gDj-0FZ-+%R&{QUB^Tin}k zbUMb5`;*=t?0S=jjMTg4n7im8-k$XjVyqd2r8-P?`&0}r$=yuLc-@WqacuJl z%y9`s>38&(FxekC{p(_y$HB^tTkTE*{^+9{XFE6O*!Urcm`e(f3aDlD4Ks;<>~mjGRWLROteqb)7ce&B78RnNp3tN@75 z#P9=rG$f$HyaFGg8RZCqanAVv8D_%VQWy=CS4m#M)jvao!7#xv&mc5R&oMV>j=a)^-@Vl_YF!B*=ehMgX3T&f{7!%^`5; z1ukORmg}@)^BHDxGv40&f)AFy?M?`k>!H6_E0|Bfkd|%eT$f0Ca`>|CMLn-=9&o`t z+w|NR5y*KMeOvyeZ&VL^5){T>DfGJF4?uG0RU(+Rh-nrtuhVM(clI~x6 z8S?A^<{|pJWj}};ChStb`ipDV*BI=7_5RJlS5`>})wNohfsCw5XOIt=Dmp?)Nzk(l-F!AHrB(0%#{QdHsfSLHDOl19zOhF<_^cC1&D30bw!%YHb$07~)lNr-MB0 z4gRh3h2?C)kmcwsY4DKE!5|gY-UqC_`{*48vK$K*5~c`t79-LEIS6eTWWspbOl9XD zTq+Ve8EUnb9eO)CE3T@ROBZl)1ET}vK>TA#ipeQMMc{~nh)$TX5kB=TJRtB3@<`zg=5tD%=oNyH-O6SiWH}d(jIYtA0{~AwB1yuSCNI$P9L1%S; zUZj3nsTx`G;Y0H?7X3C}#y$y>Wf>;lW)yM^LqTUn&^Zxw)(RSC-wGNchM*x?q`43@ zE-H|a;#Cr2@dKzpK!c%HYxxslg%uB_ZPr0*IjeX0JwT;JSg*M2H3GvmnuzVi6Z$lk zXPUQKU^?WJ!MZGonH3_4hTBnphbOLpD45u>v}QzFCrpmm$MIDY-BS(^M~;6DVxh`J zAKM2Q#Xmx73(N4oNk+CVnG#b8$ctBc@?|$-!(xMxug+Z}QwaRb@lZ<8BB&G5G0j!ozO# z-4efxln}cy*0BNou>2t^Y9)K8_S2&JNIY2(gA*OKzASgF%iJQ zZ~=u*6Qeu>2MBXVjV!4xToydxfGDqwp@J8|Q(3wr5l8Zk6>(b4yu2j!lg!QqjVQr* zB9FrgCj}mQ=LzCF%#CDIz{n`XkP#l660-vqxs!1V_GgiL znr(;36js?lw~oA_w{1g61Vywp;{0n<7KCG4W8L;H+TG&FFBrpD#7F$W#fWhiKOC}p zZoFnh7WM}KcM5Zj_7VoMTM71pZDhKJSUzjEwlK=S;fXJSC_TfABp9zrd{5|kINpjr z4;0Vi>8}WSU*xpL#*B%`7{|CIAEqL(xcCJYCQXmwOPB)6I8Go-tQ>qYPLyT`W-iBA zp0JivXQjZ0UBEB*oeQvSdk+YTyN~YCNmFNViv%->0_+yLGNEt?V)U~YsX)4xY`VnT zCu)Ge5tVwUsUGe<=Y^`B;O?WZc!{)Uqzd_NMlG?$!Gw<#ZiZ-aE6@et7A4@8w?K9# zR80}Po4EL#=9HWjmtk`CGr)}xE}me9`2{PwK=r~pCMkhiJO8d8?))>t2UHd~$P(id zjEn0W@p;%^9lw+^(J5MEiE9R@8;Z#!Lh$*o+b3+B0Da!g0wEiwkJhv3-D6!7#4RFj zMRcC*TfaETh722=9b=J8RO5%5UEh3{jkKv|g@k6WHg6$)l@)?(nNBb~0gQ^vjm@{% zOq`4Dh1sPiPW6tt$vVOJ21=OpCZ6|G-Ai?BdTc!7cORqfKk>wGf)K#h0Ne@yJBf&r z(aJmkT*BWqeHnHA3)Td`P~&S0DSnWNyzQV~PfPXscE|6vS+3OUJ4niGi=oFhbn}L0 z+su&6kr4gn9LaeSs+74w@-_)$UitOk;N+AQFV)X*3n#X998)kXpf<_gDo!Oc{&OnX zN^UNDIXidd(vK=rmCE!?WxviLx%FW@%qq}cCAxP0U_}c&iD1ss>Zd24~0LKavq(}?6 zKhnDA+*!U>v46Bn%s1zrd+zJp^BVqc?zanxlks>&f?wrF4;#lXd_K|BctMsVDp6ud zWs)TXY1xvq3cM97P!48Q!3$WSvX<2Z9kjybNH$WAW}`%URH7;ky(rO;8CzDeacFBa z4DGNPhjzkjqmfzprp%tB(QJ~&vh6gUO_|DChnY0n^9^~OWYeZn>RjuBB@ns&$7GvcP!aF-sboYnr@D7>^Pg5qPqoJC+IX&X=zI)O}jpH6Mp}P&(SqRyYHN2 zdn4X)`N6|IW}NoW-c^$AHG4~aW;a|;pV>qER|7;c`_1msz%p14Q0dZuwY^eTaJh7(6@B%7kvN7EDLa z7y7-ogPq=a^&@wVd{v#hlQ|T+yoX1OoKsvd^;*uz5Y{@o_iE_*I|m-t{&Wl_dC*|Y zSmG($F)Gwxw0O;=y5Q~FcQ}0N(ICj<58vQi(kXcCg8;7VO1k6{K;kNZGDTVu7;pnX z2dQ*U%E;4G8P!_}AG*Zj)rz>UCP=D+QZJtm;+0Y?iBRDHHV1uoq@#qX_#D4j}AM zB{aFF>%~gZ(e*>nvJ(IjQHkpPD*O+m-TTYP$wnKnG6^vukL0V&EU@Z)h4RRZk(>2t zb_`_Ly$It7M-c8oIEv7Ya38|`2oE4Uh|sc^cpf(5Fr#9_GUbOr!VX9q&qbeT50~$O!Q_#&~L0>Q|n|0%CxKS4OK9l(9p*|%05ya_-f#T6zwURR&vjPvp z3t-0)_IdxF_+)B7kQvq28ZOKp_Rh8q4rxGg1+1cFt84-~&U!DmU1$iEL2D46C_D;0 z_ICo3!Z|Dy-`=QM~<*HP}H}J#T ze0#mn-E+=rCE)=1?@J`)pel2^7S>D!*%}u$9Qa2r;qjWFbM`brj_tU#p2|ci?(u1pVy#rm@Rc{8^@KSQ8 zK6dCpW>0Kwe}@iy+qtQ*8UG-S?UU5g_0H>oQV-40y#<-xy$Z&+_&ulBO)rzv(xq=d zBiFm?-PZ$aeRPZ-a=YkQx3sQ+OwH+qTkVHu0L~p+C+lFiFybi~(No91LFBu=vyd0- zeXz!m+wJzzWM1xVNP-*#5-nYGdz?M^UM5{quS5^iac9`=Sti{QJ+c}k(oK2p`xhkG z#0b5|8HTAx-A=a)B%|vxO0C1DLFVNPlCx)fL(7DY!DT4yB8=R-4weD$efVxv{@k}` zP$uktZ9m*2OoitJO&q01w>0tpIpqf*vhn**sm!fypVEzPo~(8E!Vcm$1NMX~+zk#f z0SGXLI9#-cZ4+*`I((kZo7Ya`2x>S6Fuj{y{X@gW{O||MZ@k`R$mE0< z?>z`>KG6FZ>GK-Bqt9aY;EH8dxMox;RR?4EPN5i! zN5yJL-Os~@Rl8#7MY@>LSl0WC{^4j@h}A5Ig}rzB?@NhPCze;|YoHJFZ@fbT&yq3k zvjZ0{UIpRXm`Gr=Rd}%z2)LiZz2f)t6W7#JmSNk&8HMTCc^QBQEn~*CuBnr_{3Ewj z;9dK8i^88oh3~pDydqHH&TGmXV2|Ofm5_{JB;owl?B=$w8pi*(pSw(6xR>L7P$PU@_ zGiVMhhb)SaM=%iz2>vxYMaME(5gDS0SOAJga*&#Jp?r21FfxOF20x5qTa}3S?s)Q~ zC{8}G5W&H0_hHzx$a?{R2d)-rfqQQZjf!H3p$RYxdla~G9`kP>3Xype$}q#S{4Lp{ zXh4fV`8vXL2xuCtfq>;BdmiDB5zst@=e-1k{Vf2AXe0tp4?IbD29fB4Cq^_SLIObf ze@Q6XB3^!v(T!q2leEATQRNuOsoqcayrK%dT=pu%0}W9jJ&Fr{8Gx%st!7qeCb~m` zN3nOilzSDM>nKcg9z&PbovJSG?lN*awkkI$f14iPS~h(V1k(f#=Y>bU)L4dAMrQu5 zkPfn~ZE?-27VMglGsOnk*RWsPvAeB}!QRpQZT58#5q`nGfzbi=4cQ( zt^%czR8p~+FNM-l4`j2Fc207_rO0kRd>2ElN43=kR3P<0J?KQ&Vp!#ul&G{W7K%bF zehHFlU3FFHjYF^63l-)%5j^-BzJt+9_~cn)|^8;6M_HO!Eko;*!)$OXaU@{)aRclMd zcR_J*fvY)UC}Xep680WKr;%DRzdbED>|lb@L2AOnVZ$~__<+nl`{C?VhsjY zB4yfz`}!X!L16gdM@0%NZT1AhSpcl!%Qc8d701@O{E0~(fRBJH!fwD3S)pby2o=R@ zg@>SJSDC}6aqt`t4%o#)B@@|UMUBhY`6=uqS2wZH1HiE@zlARnFH^#PFeFdBtH1u+jbQ8O%P!d-jn+~8hA)Qe4fE^ zDrle#Y4K~w&H*$(<90Z;@bSMwVa;wX!fuJNBDL5|c)bXCUqN=t$cqbH(jjh_4TlvM zdBUDA>&VZ*%@!d9#>_>t4Dp|#B3o7URhZ`VM(nN<5&n-M9)541*ck{!{>yam0IYC@ z{RsM7&-=H?!?9e0gi?VkU-IKb8+ItzA=foO2vkF;IMd;qgZNw( zK}D=5ln{nPiWp?GPvcNj6uS(-gM!V;9D53RLP4)#hdg%=JV!a1=fq~;!_JmK^d|5w ziWuk|JRJ|P5on*En#x3{ro1=D)6r@8O=ZY*CO(br*wmEZ0E?aQNao%W1~7)z0!t#K z5Yh-;2t5eH2nP|wH?Hs@Ea?~qU=a#^ZQ*6GHxafsDX1v&Lf&hck&Z;9Gx9*BAtkkh z7K?OgY2g1j^G8X4-(0nd6%+p3QSmO_{q!WRx%nNv7v~s75dVGn8B+g({e*{gJ%{Ct zuKNp%<#4R!y?6J1@2|%@ydz`pkaJ$wA-#bMWx~zRdz@qv;UvN-gwqHgLwF1UeR0#! zA40~%2>@y%#=z8iGul2%$W{Ov8d=eFhT7JjAT;;%veb1NIzqAdFhY8pM53 z0$mn!Q~1x4Y4TvMYUc0ZWSWyKCuex@jP4XIuAGBe$t3qacWlqpEDnfg^%|>|^jVWt zOpC`a?ruK4v)OtlXe7%U|8Z~jcsVyHI$6X|ymehI+kGmG$mhR-Y_ zEG=^FjGlw&0~Kw{E&T%IzwIqOFp);e?mYPX?1Bl3d8kyyy;ar6H3sgXW_mea{Ma_32Wp)&5~vvq>)505{oDir4h+6 z?81KtmW^Ts{w?@)M2jd0p@N_w{s1&Ws8S_L{@@=}36K!w``+5|dK35u ziM8f8?|tuk$9KQ+UoZS~B61`g4hZm*j_=S0PdyiDtvoFX0u?ARggj9R!M13KsswMz z5c3|@%V{1%&ihm!Z+i_T?^pdqxL=?$^<5UIPY=vUY7qJg^+VsU2caL*!!$51UKiOV z8dM`Rq&Cp78r7xwMm?f8jEW0Htu*OUu6aHN+eP#UjQ~;7o2W>|Dmw9Toj^nD8x~=2?aqB2;f}_RZP|a|(J8ZjZyk^TK@s{T|(^$>? z%93n)l&)r|Um&hWW5O+-7;ULpWJN#AYgEVo`x-4YaYbyemt^H2&LQ#cEiHPGw$ipa zq9*i2u2qkNaoY73+A-%Lg5IXbbL}(WHPC9xwL4!F_r!=RJ$m$T8og}!7kphhAQHcG z(0e1Q0Fn7`LBRR5_mSI)_si<-#Mk7B+lhDip6pQrTH4B9&{L(fmL%-9Enf3|?DmO= zmDe^Sr|Z=i)27uX)6xo5V>Ek7rzuWbb;$kyTSy5u0VK8r zC{k=g6rB>1;>n?;$N7qX;GU$XdbnPg!g>(aAk?S>x~`H+WeZs=l^TGa$>Y`89{4qh zIwW{Ri2yMfVTyA+*x9oMr*B0_B5Xp~jDS;F)Y-D5wYzTh@F*;8F|A;Z7FmAJ;+w%? zQAq$hXs*}kh&D{Y80M-s2kC5FWK9;K;tcr&u|+D4i{@SM_AI@<_=a(L zX&n0OAsdnzzJac+&`q^EkL7G1AJ4(Kzrq!U8EPwsO^*l5Ia9eHY^s!oHCM8EH84^JKqd`?**^-EO^d|*W_yi zXfdE6MFNWNnh2VS)7Ehh9OcVa3vI)kG3y;Aw4HWbR<232!Yr8;XQf%stanzP^^GdD zlXhKJ%8ho!ZnVR>1ns6hw3n`dxN4w%w6b=N&~PYEEP(IBf#B4@Gcqh#Ju3&AA+#TiA+hZ+bL|3n2E5ndyYT(0yTaH9ppo@p#^8d8 zVuB{N(yhyysQh1~TozKAm|79ihR8}d_r=;PJg%>M7^r!*6Qb?xSh>A`F%Pi~JO@`h zC2(}u9l#N-k+RtimfP(RaG$fA?e#gien_}5@S_0!%E;rGfXRur670f89w`!#myb-N#jNsOcK{O zn$4I{!C1GWb@m01;c|SlwlybfnlX!hY)%Zu+q4mVnH|AKJ0T@xIP>6 znpHB2mh;E%esbFJ^>hV4iX4w23_II;;*~QvIigKkMc{>^o`#A7CNK=%V*8Qv2=WAn zjG|_pGO|=>EQjQCNcNeNBe(^70!il)o>`4KFoiIUfOchr05kBIp8*g^fGDIDp6JrM9o`|LND8OPfK8$-%0vWe z6#gR6id9>p6J9ga5P+@>%QO*L3=xl$Tl0LFOOK{cdfk|_&>wq*o0lC15_=xNm9hjiSyF>aODAH8%H*enavbd85`}w7jwNZme*Wz(IppDAKpaKzr7d0+9kDt z>2xv;5smgYb<6dni}_N$4bY|5q77Q0A%HugpSCL8Q3E8hxNXC3qHwPuEaBugc9XdC z%*L)tD;|P7n>C3;pj;R}08 z&zB61p>pcQe-Tz0T(tUggu6b*Km6Xkw>AcNu1+?)9@t=vy$a(iF~PX>-$I%O;3|+E z3t+_QYKZXkgRbm~6RavZH>zY_pzMB}#SPs7tqT8uNH$k>!o!#=hVO?;Q2ihy4?mMV zf>TjZOI3oCT{6KQMHlehJAc@8>_LfjL;v(p zGB`BE)*<_PgbfHA0ZzhiC`nF&Ki*+X;>C9hnRR!Me#_A2!BrZK&UwshzE8D zDuPdmD4{^35@WJ+VBo1}RZdqnvIQN!W5=Ag2hJYGgDii6^+LPm^Ycg8{V|Rc*PlwI zu~es0)zk1Z;M{Z0$d>iddd)a5Zn;TLI3M4dqPT0)zx0)eEAB(!HTD3u4kFx(fF+lU zz8odHkgywJ3&K`}B*G>H?k_9fv-Jrcm8&_WZ9m&~2wKZu*egpO$2ICa18tFa#%$K}OmOG#avW6?2%4S@0Satp}*fWG7sD{-Pe7WaR<2ozoh8jMzx_h>^Td#f0 z%jV+wU7MxHxsRZfn88ca?32*sc>{{?@3H+$gkJ%u;_)eXrd8?q6vFhBsvJ+HAtS++ zt0f=REEcK!%Kmc z*5UfO6kK?0xZ(c zg`(Z$nFlWzH&JTH@_Ph`4K6k`f&4ZAmmoWU)2{_C=MD7_A5u;UmtmeOvzxQ%(&d From 8c09bde1b1a69d1162628ea581c490d482d8ad3c Mon Sep 17 00:00:00 2001 From: jkobject Date: Wed, 29 Sep 2021 16:03:29 +0000 Subject: [PATCH 5/9] ignoring eeg. --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 25be81e..5bfb1b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ experiments/MNIST_data/* -experiments/private/* \ No newline at end of file +experiments/private/* +*.egg-info From 330b8d1ac006cbd93e451c0fb96143b51b18a365 Mon Sep 17 00:00:00 2001 From: jkobject Date: Wed, 29 Sep 2021 16:03:58 +0000 Subject: [PATCH 6/9] ignoring all eeg --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5bfb1b3..49a71d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ experiments/MNIST_data/* experiments/private/* -*.egg-info +*.egg-info/ From 29ae7bcde06d9506c62f379dc10c68f2243b3046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Kalfon?= Date: Wed, 29 Sep 2021 12:05:20 -0400 Subject: [PATCH 7/9] Delete contrastive.egg-info directory --- contrastive.egg-info/PKG-INFO | 11 ----------- contrastive.egg-info/SOURCES.txt | 8 -------- contrastive.egg-info/dependency_links.txt | 1 - contrastive.egg-info/requires.txt | 3 --- contrastive.egg-info/top_level.txt | 1 - 5 files changed, 24 deletions(-) delete mode 100644 contrastive.egg-info/PKG-INFO delete mode 100644 contrastive.egg-info/SOURCES.txt delete mode 100644 contrastive.egg-info/dependency_links.txt delete mode 100644 contrastive.egg-info/requires.txt delete mode 100644 contrastive.egg-info/top_level.txt diff --git a/contrastive.egg-info/PKG-INFO b/contrastive.egg-info/PKG-INFO deleted file mode 100644 index e7bcbf9..0000000 --- a/contrastive.egg-info/PKG-INFO +++ /dev/null @@ -1,11 +0,0 @@ -Metadata-Version: 1.0 -Name: contrastive -Version: 1.1.0 -Summary: Python library for performing unsupervised learning (e.g. PCA) in contrastive settings, where one is interested in finding directions and patterns that exist one dataset, but not the other -Home-page: https://github.com/abidlabs/contrastive -Author: Abubakar Abid -Author-email: a12d@stanford.edu -License: UNKNOWN -Description: UNKNOWN -Keywords: unsupervised,contrastive,learning,PCA -Platform: UNKNOWN diff --git a/contrastive.egg-info/SOURCES.txt b/contrastive.egg-info/SOURCES.txt deleted file mode 100644 index ca6e95d..0000000 --- a/contrastive.egg-info/SOURCES.txt +++ /dev/null @@ -1,8 +0,0 @@ -README.rst -setup.py -contrastive/__init__.py -contrastive.egg-info/PKG-INFO -contrastive.egg-info/SOURCES.txt -contrastive.egg-info/dependency_links.txt -contrastive.egg-info/requires.txt -contrastive.egg-info/top_level.txt \ No newline at end of file diff --git a/contrastive.egg-info/dependency_links.txt b/contrastive.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/contrastive.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/contrastive.egg-info/requires.txt b/contrastive.egg-info/requires.txt deleted file mode 100644 index 712aa4a..0000000 --- a/contrastive.egg-info/requires.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy -sklearn -matplotlib diff --git a/contrastive.egg-info/top_level.txt b/contrastive.egg-info/top_level.txt deleted file mode 100644 index 3ed9ead..0000000 --- a/contrastive.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -contrastive From d9fc12dd122e1f4776f6f355cd1c952ffbb1d68e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Kalfon?= Date: Wed, 29 Sep 2021 12:05:59 -0400 Subject: [PATCH 8/9] Delete build/lib/contrastive directory --- build/lib/contrastive/__init__.py | 458 ------------------------------ 1 file changed, 458 deletions(-) delete mode 100644 build/lib/contrastive/__init__.py diff --git a/build/lib/contrastive/__init__.py b/build/lib/contrastive/__init__.py deleted file mode 100644 index 5ec19a1..0000000 --- a/build/lib/contrastive/__init__.py +++ /dev/null @@ -1,458 +0,0 @@ -### Dependencies are: -# numpy - for matrix manipulation -# sklearn - for implementing spectral clustering and standard PCA -### - -from __future__ import print_function -import numpy as np -from numpy import linalg as LA -from sklearn import cluster -from sklearn.decomposition import PCA - -class CPCA(object): - """ - Contrastive PCA (cPCA) - - Linear dimensionality reduction that uses eigenvalue decomposition - to identify directions that have increased variance in the primary (foreground) - dataset relative to a secondary (background) dataset. Then, those directions - are used to project the data to a lower dimensional space. - """ - - # Getter methods for various attributes - def get_data(self): - return self.data - def get_bg(self): - return self.bg - def get_fg(self): - return self.fg - def get_active_labels(self): - return self.active_labels - def get_pca_directions(self): - return self.pca_directions - def get_active_pca_projected(self): - return self.active_pca - def get_affinity_matrix(self): - return self.affinity_matrix - - # A helper method to standardize arrays - def standardize_array(self, array): - standardized_array = (array-np.mean(array,axis=0)) / np.std(array,axis=0) - return np.nan_to_num(standardized_array) - - #stores - def __init__(self, n_components=2, standardize=True, verbose=False): - self.standardize = standardize - self.n_components = n_components - self.verbose = verbose - self.fitted = False - - """ - Finds the covariance matrices of the foreground and background datasets, - and then transforms the foreground dataset based on the principal contrastive components - - Parameters: see self.fit() and self.transform() for parameter description - """ - def fit_transform(self, foreground, background, plot=False, gui=False, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): - self.fit(foreground, background) - return self.transform(dataset=self.fg, alpha_selection=alpha_selection, n_alphas=n_alphas, max_log_alpha=max_log_alpha, n_alphas_to_return=n_alphas_to_return, plot=plot, gui=gui, active_labels=active_labels, colors=colors, legend=legend, alpha_value=alpha_value, return_alphas=return_alphas) - - """ - Computes the covariance matrices of the foreground and background datasets - - Parameters - ----------- - foreground: array, shape (n_data_points, n_features) - The dataset in which the interesting directions that we would like to discover are present or enriched - - background : array, shape (n_data_points, n_features) - The dataset in which the interesting directions that we would like to discover are absent or unenriched - - preprocess_with_pca_dim: int - If this parameter is provided (and it is greater than n_features), then both the foreground and background - datasets undergo a preliminary round of PCA to reduce their dimension to this number. If it is not provided - but n_features > 1,000, a preliminary round of PCA is automatically performed to reduce the dimensionality to 1,000. - """ - - def fit(self, foreground, background, preprocess_with_pca_dim=None): - # Housekeeping - self.pca_directions = None - self.bg_eig_vals = None - self.affinity_matrix = None - - # Datasets and dataset sizes - self.fg = foreground - self.bg = background - self.n_fg, self.features_d = foreground.shape - self.n_bg, self.features_d_bg = background.shape - - if not(self.features_d==self.features_d_bg): - raise ValueError('The dimensionality of the foreground and background datasets must be the same') - - #if the dimension is higher than preprocess_with_pca_dim, then PCA is performed to reduce the dimensionality - if preprocess_with_pca_dim is None: - preprocess_with_pca_dim = 1000 - - #Center the background and foreground data - self.bg = self.bg - np.mean(self.bg, axis=0) - if self.standardize: #Standardize if specified - self.bg = self.standardize_array(self.bg) - - self.fg = self.fg - np.mean(self.fg, axis=0) - if self.standardize: #Standardize if specified - self.fg = self.standardize_array(self.fg) - - if (self.features_d>preprocess_with_pca_dim): - data = np.concatenate((self.fg, self.bg),axis=0) - pca = PCA(n_components = preprocess_with_pca_dim) - data = pca.fit_transform(data) - self.fg = data[:self.n_fg,:] - self.bg = data[self.n_fg:,:] - self.features_d = preprocess_with_pca_dim - - if (self.verbose): - print("Data dimensionality reduced to "+str(preprocess_with_pca_dim)+". Percent variation retained: ~"+str(int(100*np.sum(pca.explained_variance_ratio_)))+'%') - - - if (self.verbose): - print("Data loaded and preprocessed") - - #Calculate the covariance matrices - self.bg_cov = self.bg.T.dot(self.bg)/(self.bg.shape[0]-1) - self.fg_cov = self.fg.T.dot(self.fg)/(self.n_fg-1) - - if (self.verbose): - print("Covariance matrices computed") - - self.fitted = True - - - def transform(self, dataset, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, plot=False, gui=False, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): - if (self.fitted==False): - raise ValueError("This model has not been fit to a foreground/background dataset yet. Please run the fit() or fit_transform() functions first.") - if not(alpha_selection=='auto' or alpha_selection=='manual' or alpha_selection=='all'): - raise ValueError("Invalid argument for parameter alpha_selection: must be 'auto' or 'manual' or 'all'") - if (alpha_selection=='all' and plot==True): - raise ValueError('The plot parameter cannot be set to True if alpha_selection is set to "all"') - if ((alpha_selection=='all' or alpha_selection=='manual') and gui==True): - raise ValueError('The gui parameter cannot be set to True if alpha_selection is set to "all" or "manual"') - if ((gui==True or plot==True) and not(self.n_components==2)): - raise ValueError('The gui and plot parameters modes cannot be used if the number of components is not 2') - if (not(alpha_value) and alpha_selection=='manual'): - raise ValueError('The the alpha_selection parameter is set to "manual", the alpha_value parameter must be provided') - #you can't be plot or gui with non-2 components - # Handle the plotting variables - if (plot or gui): - if active_labels is None: - active_labels = np.ones(dataset.shape[0]) - self.active_labels = active_labels - if colors is None: - self.colors = ['k','r','b','g','c'] - - if gui: - try: - import matplotlib.pyplot as plt - from matplotlib.gridspec import GridSpec - except ImportError: - raise ImportError("Something wrong while loading matplotlib.pyplot! You probably don't have plotting libraries installed.") - try: - from ipywidgets import widgets, interact, Layout - from IPython.display import display - except ImportError: - raise ImportError("To use the GUI, you must be running this code in a jupyter notebook that supports ipywidgets") - - transformed_data_auto, alphas_auto = self.automated_cpca(dataset, n_alphas_to_return, n_alphas, max_log_alpha) - transformed_data_manual, alphas_manual = self.all_cpca(dataset, n_alphas, max_log_alpha) - if (self.n_fg>1000): - print("The GUI may be slow to respond with large numbers of data points. Consider using a subset of the original data.") - - """ - Handles the plotting - """ - def graph_foreground(ax,fg, active_labels, alpha): - for i, l in enumerate(np.sort(np.unique(active_labels))): - ax.scatter(fg[np.where(active_labels==l),0],fg[np.where(active_labels==l),1], color=self.colors[i%len(self.colors)], alpha=0.6) - if (alpha==0): - ax.annotate(r'$\alpha$='+str(np.round(alpha,2))+" (PCA)", (0.05,0.05), xycoords='axes fraction') - else: - ax.annotate(r'$\alpha$='+str(np.round(alpha,2)), (0.05,0.05), xycoords='axes fraction') - - - """ - This code gets run whenever the widget slider is moved - """ - def update(value): - fig = plt.figure(figsize=[10,4]) - gs=GridSpec(2,4) - - for i in range(4): - ax1=fig.add_subplot(gs[int(i//2),i%2]) # First row, first column - fg = transformed_data_auto[i] - graph_foreground(ax1, fg, self.active_labels, alphas_auto[i]) - - ax5=fig.add_subplot(gs[:,2:]) # Second row, span all columns - - alpha_idx = np.abs(alphas_manual-10**value).argmin() - fg = transformed_data_manual[alpha_idx] - graph_foreground(ax5, fg, self.active_labels, alphas_manual[alpha_idx]) - - #if len(np.unique(self.active_labels))>1: - #plt.legend() - - plt.tight_layout() - plt.show() - - widg = interact(update, value=widgets.FloatSlider(description=r'\(\log_{10}{\alpha} \)', min=-1, max=3, step=4/40, continuous_update=False, layout=Layout(width='80%'))) - - return - - elif plot: - try: - import matplotlib.pyplot as plt - except ImportError: - raise ImportError("Something wrong while loading matplotlib.pyplot! You probably don't have plotting libraries installed.") - if (alpha_selection=='auto'): - transformed_data, best_alphas = self.automated_cpca(dataset, n_alphas_to_return, n_alphas, max_log_alpha) - plt.figure(figsize=[14,3]) - for j, fg in enumerate(transformed_data): - plt.subplot(1,4,j+1) - for i, l in enumerate(np.sort(np.unique(self.active_labels))): - idx = np.where(self.active_labels==l) - plt.scatter(fg[idx,0],fg[idx,1], color=self.colors[i%len(self.colors)], alpha=0.6, label='Class '+str(i)) - plt.title('Alpha='+str(np.round(best_alphas[j],2))) - if len(np.unique(self.active_labels))>1: - plt.legend() - plt.show() - elif (alpha_selection=='manual'): - transformed_data, best_alphas = self.automated_cpca(dataset, n_alphas_to_return, n_alphas, max_log_alpha) - plt.figure(figsize=[14,3]) - for j, fg in enumerate(transformed_data): - plt.subplot(1,4,j+1) - for i, l in enumerate(np.sort(np.unique(self.active_labels))): - idx = np.where(self.active_labels==l) - plt.scatter(fg[idx,0],fg[idx,1], color=self.colors[i%len(self.colors)], alpha=0.6, label='Class '+str(i)) - plt.title('Alpha='+str(np.round(best_alphas[j],2))) - if len(np.unique(self.active_labels))>1: - plt.legend() - plt.show() - - return - - else: - if (alpha_selection=='auto'): - transformed_data, best_alphas = self.automated_cpca(dataset, n_alphas_to_return, n_alphas, max_log_alpha) - alpha_values = best_alphas - elif (alpha_selection=='all'): - transformed_data, all_alphas = self.all_cpca(dataset, n_alphas, max_log_alpha) - alpha_values = all_alphas - else: - transformed_data = self.cpca_alpha(dataset, alpha_value) - alpha_values = alpha_value - if return_alphas: - return transformed_data, alpha_values - else: - return transformed_data - - - """ - This function performs contrastive PCA using the alpha technique on the - active and background dataset. It automatically determines n_alphas=4 important values - of alpha up to based to the power of 10^(max_log_alpha=5) on spectral clustering - of the top subspaces identified by cPCA. - The final return value is the data projected into the top (n_components = 2) - subspaces, which can be plotted outside of this function - """ - def automated_cpca(self, dataset, n_alphas_to_return, n_alphas, max_log_alpha): - best_alphas, all_alphas, _, _ = self.find_spectral_alphas(n_alphas, max_log_alpha, n_alphas_to_return) - best_alphas = np.concatenate(([0], best_alphas)) #one of the alphas is always alpha=0 - data_to_plot = [] - for alpha in best_alphas: - transformed_dataset = self.cpca_alpha(dataset=dataset, alpha=alpha) - data_to_plot.append(transformed_dataset) - return data_to_plot, best_alphas - - """ - This function performs contrastive PCA using the alpha technique on the - active and background dataset. It returns the cPCA-reduced data for all values of alpha specified, - both the active and background, as well as the list of alphas - """ - def all_cpca(self, dataset, n_alphas, max_log_alpha): - alphas = np.concatenate(([0],np.logspace(-1,max_log_alpha,n_alphas))) - data_to_plot = [] - for alpha in alphas: - transformed_dataset = self.cpca_alpha(dataset=dataset, alpha=alpha) - data_to_plot.append(transformed_dataset) - return data_to_plot, alphas - - """ - Returns active and bg dataset projected in the cpca direction, as well as the top c_cpca eigenvalues indices. - If specified, it returns the top_cpca directions - """ - def cpca_alpha(self, dataset, alpha=1): - n_components = self.n_components - sigma = self.fg_cov - alpha*self.bg_cov - w, v = LA.eig(sigma) - eig_idx = np.argpartition(w, -n_components)[-n_components:] - eig_idx = eig_idx[np.argsort(-w[eig_idx])] - v_top = v[:,eig_idx] - reduced_dataset = dataset.dot(v_top) - reduced_dataset[:,0] = reduced_dataset[:,0]*np.sign(reduced_dataset[0,0]) - reduced_dataset[:,1] = reduced_dataset[:,1]*np.sign(reduced_dataset[0,1]) - return reduced_dataset - - """ - This method performs spectral clustering on the affinity matrix of subspaces - returned by contrastive pca, and returns (`=3) exemplar values of alpha - """ - def find_spectral_alphas(self, n_alphas, max_log_alpha, n_alphas_to_return): - self.create_affinity_matrix(max_log_alpha, n_alphas) - affinity = self.affinity_matrix - spectral = cluster.SpectralClustering(n_clusters=n_alphas_to_return, affinity='precomputed') - alphas = np.concatenate(([0],np.logspace(-1,max_log_alpha,n_alphas))) - spectral.fit(affinity) - labels = spectral.labels_ - best_alphas = list() - for i in range(n_alphas_to_return): - idx = np.where(labels==i)[0] - if not(0 in idx): #because we don't want to include the cluster that includes alpha=0 - affinity_submatrix = affinity[idx][:, idx] - sum_affinities = np.sum(affinity_submatrix, axis=0) - exemplar_idx = idx[np.argmax(sum_affinities)] - best_alphas.append(alphas[exemplar_idx]) - return np.sort(best_alphas), alphas, affinity[0,:], labels - - """ - This method creates the affinity matrix of subspaces returned by contrastive pca - """ - def create_affinity_matrix(self, max_log_alpha, n_alphas): - from math import pi - alphas = np.concatenate(([0],np.logspace(-1,max_log_alpha,n_alphas))) - subspaces = list() - k = len(alphas) - affinity = 0.5*np.identity(k) #it gets doubled - for alpha in alphas: - space = self.cpca_alpha(dataset=self.fg, alpha=alpha) - q, r = np.linalg.qr(space) - subspaces.append(q) - for i in range(k): - for j in range(i+1,k): - q0 = subspaces[i] - q1 = subspaces[j] - u, s, v = np.linalg.svd(q0.T.dot(q1)) - affinity[i,j] = s[0]*s[1] - affinity = affinity + affinity.T - self.affinity_matrix = np.nan_to_num(affinity) - -class Kernel_CPCA(CPCA): - def __init__(self, n_components=2, standardize=True, verbose=False, kernel="linear", gamma=10): - self.kernel=kernel - self.gamma=gamma - super().__init__(n_components, standardize, verbose) - - def fit_transform(self, foreground, background, plot=False, gui=False, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): - self.fg = foreground - self.bg = background - self.n_fg, self.features_d = foreground.shape - self.n_bg, self.features_d_bg = background.shape - if (gui or plot): - print("The parameters gui and plot cannot be set to True in Kernel PCA. Will return transformed data as an array instead") - if not(alpha_selection=='manual'): - print("The alpha parameter must be set manually for Kernel PCA. Will be using value of alpha = 2") - alpha_value = 2 - return cpca_alpha(alpha_value) - - def fit(self, foreground, background, preprocess_with_pca_dim=None): - raise ValueError("For Kernel CPCA, the fit() function is not defined. Please use the fit_transform() function directly") - - def transform(self, dataset, alpha_selection='auto', n_alphas=40, max_log_alpha=3, n_alphas_to_return=4, plot=False, gui=False, active_labels = None, colors=None, legend=None, alpha_value=None, return_alphas=False): - raise ValueError("For Kernel CPCA, the transform() function is not defined. Please use the fit_transform() function directly") - - def cpca_alpha(self, alpha,degree=2,coef0=1): - N=self.n_fg + self.n_bg - Z=np.concatenate([self.fg,self.bg],axis=0) - - ## selecting the kernel and computing the kernel matrix - if self.kernel=='linear': - K=Z.dot(Z.T) - elif method=='poly': - K=(Z.dot(Z.T)+coef0)**degree - elif method=='rbf': - K=np.exp(-gamma*squareform(pdist(Z))**2) - - ## Centering the data - K=centering(K,n) - - ## Using Kernel PCA to do the same - K_til=np.zeros(K.shape) - K_til[0:n,:]=K[0:n,:]/n - K_til[n:,:]=-alpha*K[n:,:]/m - Sig,A=np.linalg.eig(K_til) - Sig=np.real(Sig) - Sig[np.absolute(Sig)<1e-6]=0 - idx_nonzero=Sig!=0 - Sig=Sig[idx_nonzero] - A=np.real(A[:,idx_nonzero]) - sort_idx=np.argsort(Sig) - Sig=Sig[sort_idx] - A=A[:,sort_idx] - # Normalization - A_norm=np.zeros(A.shape[1]) - for i in range(A.shape[1]): - A_norm[i]=np.sqrt(A[:,i].dot(K).dot(A[:,i]).clip(min=0)) - A[:,i]/=A_norm[i]+1e-15 - - Z_proj_kernel=K.dot(A[:,-2:]) - X_proj_kernel=Z_proj_kernel[0:n,:] - Y_proj_kernel=Z_proj_kernel[n:,:] - - return X_proj_kernel#,Y_proj_kernel,Sig[-2:],A[:,-2:] - - ## ancillary functions - def centering(K,n): - m=K.shape[0]-n - Kx=K[0:n,:][:,0:n] - Ky=K[n:,:][:,n:] - Kxy=K[0:n,:][:,n:] - Kyx=K[n:,:][:,0:n] - K_center=np.copy(K) - K_center[0:n,:][:,0:n]=Kx - np.ones([n,n]).dot(Kx)/n - Kx.dot(np.ones([n,n]))/n\ - +np.ones([n,n]).dot(Kx).dot(np.ones([n,n]))/n/n - K_center[n:,:][:,n:]=Ky - np.ones([m,m]).dot(Ky)/m - Ky.dot(np.ones([m,m]))/m\ - +np.ones([m,m]).dot(Ky).dot(np.ones([m,m]))/m/m - K_center[0:n,:][:,n:]=Kxy - np.ones([n,n]).dot(Kxy)/n - Kxy.dot(np.ones([m,m]))/m\ - +np.ones([n,n]).dot(Kxy).dot(np.ones([m,m]))/n/m - K_center[n:,:][:,0:n]=Kyx - np.ones([m,m]).dot(Kyx)/m - Kyx.dot(np.ones([n,n]))/n\ - +np.ones([m,m]).dot(Kyx).dot(np.ones([n,n]))/m/n - return K_center - -if __name__ == '__main__': - N = 401; D = 1001; gap=3 - # In B, all the data pts are from the same distribution, which has different variances in three subspaces. - B = np.zeros((N, D)) - B[:,0:10] = np.random.normal(0,10,(N,10)) - B[:,10:20] = np.random.normal(0,3,(N,10)) - B[:,20:30] = np.random.normal(0,1,(N,10)) - - - # In A there are four clusters. - A = np.zeros((N, D)) - A[:,0:10] = np.random.normal(0,10,(N,10)) - # group 1 - A[0:100, 10:20] = np.random.normal(0,1,(100,10)) - A[0:100, 20:30] = np.random.normal(0,1,(100,10)) - # group 2 - A[100:200, 10:20] = np.random.normal(0,1,(100,10)) - A[100:200, 20:30] = np.random.normal(gap,1,(100,10)) - # group 3 - A[200:300, 10:20] = np.random.normal(2*gap,1,(100,10)) - A[200:300, 20:30] = np.random.normal(0,1,(100,10)) - # group 4 - A[300:400, 10:20] = np.random.normal(2*gap,1,(100,10)) - A[300:400, 20:30] = np.random.normal(gap,1,(100,10)) - A_labels = [0]*100+[1]*100+[2]*100+[3]*100 - - cpca = CPCA(standardize=False) - cpca.fit_transform(A, B, plot=True, active_labels=A_labels) - - print(A.shape) - print(B.shape) From 5e68a2376894a294b4bdd07625b3c9ae4bd5ee52 Mon Sep 17 00:00:00 2001 From: jkobject Date: Mon, 29 Nov 2021 14:10:58 +0000 Subject: [PATCH 9/9] formatting --- contrastive/__init__.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/contrastive/__init__.py b/contrastive/__init__.py index 31ec250..755fd7c 100644 --- a/contrastive/__init__.py +++ b/contrastive/__init__.py @@ -301,20 +301,20 @@ def all_cpca(self, dataset, n_alphas, max_log_alpha): """ def cpca_alpha(self, dataset, alpha=1): if self.only_loadings: - if not self.low_memory: - pca = PCA(self.n_components, svd_solver="randomized", copy=False,) - else: - pca = IncrementalPCA(self.n_components, copy=False, batch_size=1000) - return pca.fit(self.fg_cov - alpha*self.bg_cov).components_ + if not self.low_memory: + pca = PCA(self.n_components, svd_solver="randomized", copy=False,) + else: + pca = IncrementalPCA(self.n_components, copy=False, batch_size=1000) + return pca.fit(self.fg_cov - alpha*self.bg_cov).components_ else: - w, v = LA.eigh(self.fg_cov - alpha*self.bg_cov, overwrite_a=True, check_finite=False, driver="evd") - eig_idx = np.argpartition(w, -self.n_components)[-self.n_components:] - eig_idx = eig_idx[np.argsort(-w[eig_idx])] - v_top = v[:,eig_idx] + w, v = LA.eigh(self.fg_cov - alpha*self.bg_cov, overwrite_a=True, check_finite=False, driver="evd") + eig_idx = np.argpartition(w, -self.n_components)[-self.n_components:] + eig_idx = eig_idx[np.argsort(-w[eig_idx])] + v_top = v[:,eig_idx] reduced_dataset = dataset.dot(v_top) for comp in range(self.n_components): - reduced_dataset[:, comp] = reduced_dataset[:, comp] * \ - np.sign(reduced_dataset[0, comp]) + reduced_dataset[:, comp] = reduced_dataset[:, comp] * \ + np.sign(reduced_dataset[0, comp]) return reduced_dataset """