From 707a2687e38fbe2a1915b1f1c5a807a31db96154 Mon Sep 17 00:00:00 2001
From: smlmbrt <sam.a.lambert@gmail.com>
Date: Wed, 21 Feb 2024 11:59:19 +0000
Subject: [PATCH] Make sure it only reads SUM and provides the correct column
 names back

Signed-off-by: smlmbrt <sam.a.lambert@gmail.com>
---
 pgscatalog_utils/ancestry/read.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pgscatalog_utils/ancestry/read.py b/pgscatalog_utils/ancestry/read.py
index 2db224e..bb83273 100644
--- a/pgscatalog_utils/ancestry/read.py
+++ b/pgscatalog_utils/ancestry/read.py
@@ -75,9 +75,9 @@ def extract_ref_psam_cols(
 
 def read_pgs(loc_aggscore):
     """
-    Function to read the output of aggreagte_scores
+    Function to read the PGS SUM from the output of aggreagte_scores
     :param loc_aggscore: path to aggregated scores output
-    :return:
+    :return: df with PGS SUM indexed by sampleset and IID
     """
     logger.debug("Reading aggregated score data: {}".format(loc_aggscore))
     df = pd.read_csv(
@@ -86,8 +86,8 @@ def read_pgs(loc_aggscore):
         index_col=["sampleset", "IID"],
         converters={"IID": str},
         header=0,
-    ).pivot(columns=["PGS"], values=["SUM", "AVG"])
-    # join column levels ({PGS}_{VALUE})
-    df.columns = [f"{j}_{i}" for i, j in df.columns]
+    ).pivot(columns=["PGS"], values=["SUM"])
+    # rename to PGS only
+    df.columns = [f"{j}" for i, j in df.columns]
 
     return df