Skip to content

Commit

Permalink
Make sure it only reads SUM and provides the correct column names back
Browse files Browse the repository at this point in the history
Signed-off-by: smlmbrt <[email protected]>
  • Loading branch information
smlmbrt committed Feb 21, 2024
1 parent df52a69 commit 707a268
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions pgscatalog_utils/ancestry/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ def extract_ref_psam_cols(

def read_pgs(loc_aggscore):
"""
Function to read the output of aggreagte_scores
Function to read the PGS SUM from the output of aggreagte_scores
:param loc_aggscore: path to aggregated scores output
:return:
:return: df with PGS SUM indexed by sampleset and IID
"""
logger.debug("Reading aggregated score data: {}".format(loc_aggscore))
df = pd.read_csv(
Expand All @@ -86,8 +86,8 @@ def read_pgs(loc_aggscore):
index_col=["sampleset", "IID"],
converters={"IID": str},
header=0,
).pivot(columns=["PGS"], values=["SUM", "AVG"])
# join column levels ({PGS}_{VALUE})
df.columns = [f"{j}_{i}" for i, j in df.columns]
).pivot(columns=["PGS"], values=["SUM"])
# rename to PGS only
df.columns = [f"{j}" for i, j in df.columns]

return df

0 comments on commit 707a268

Please sign in to comment.