From 707a2687e38fbe2a1915b1f1c5a807a31db96154 Mon Sep 17 00:00:00 2001 From: smlmbrt Date: Wed, 21 Feb 2024 11:59:19 +0000 Subject: [PATCH] Make sure it only reads SUM and provides the correct column names back Signed-off-by: smlmbrt --- pgscatalog_utils/ancestry/read.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pgscatalog_utils/ancestry/read.py b/pgscatalog_utils/ancestry/read.py index 2db224e..bb83273 100644 --- a/pgscatalog_utils/ancestry/read.py +++ b/pgscatalog_utils/ancestry/read.py @@ -75,9 +75,9 @@ def extract_ref_psam_cols( def read_pgs(loc_aggscore): """ - Function to read the output of aggreagte_scores + Function to read the PGS SUM from the output of aggreagte_scores :param loc_aggscore: path to aggregated scores output - :return: + :return: df with PGS SUM indexed by sampleset and IID """ logger.debug("Reading aggregated score data: {}".format(loc_aggscore)) df = pd.read_csv( @@ -86,8 +86,8 @@ def read_pgs(loc_aggscore): index_col=["sampleset", "IID"], converters={"IID": str}, header=0, - ).pivot(columns=["PGS"], values=["SUM", "AVG"]) - # join column levels ({PGS}_{VALUE}) - df.columns = [f"{j}_{i}" for i, j in df.columns] + ).pivot(columns=["PGS"], values=["SUM"]) + # rename to PGS only + df.columns = [f"{j}" for i, j in df.columns] return df