Skip to content

Commit

Permalink
fix numeric IIDs
Browse files Browse the repository at this point in the history
  • Loading branch information
nebfield committed Nov 29, 2023
1 parent d626b60 commit 14b93a1
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions pgscatalog_utils/ancestry/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def read_pcs(loc_pcs: list[str],dataset: str, loc_related_ids=None, nPCs=None):

for i, path in enumerate(loc_pcs):
logger.debug("Reading PCA projection: {}".format(path))
df = pd.read_csv(path, sep='\t')
df = pd.read_csv(path, sep='\t', converters={"IID": str})
df['sampleset'] = dataset
df.set_index(['sampleset', 'IID'], inplace=True)

Expand Down Expand Up @@ -46,7 +46,10 @@ def read_pcs(loc_pcs: list[str],dataset: str, loc_related_ids=None, nPCs=None):
IDs_related = [x.strip() for x in infile.readlines()]
proj.loc[proj.index.get_level_values(level=1).isin(IDs_related), 'Unrelated'] = False
else:
proj['Unrelated'] = np.nan
# if unrelated is all nan -> dtype is float64
# if unrelated is only true / false -> dtype is bool
# if unrelated contains None
proj['Unrelated'] = None

return proj

Expand Down Expand Up @@ -76,7 +79,7 @@ def read_pgs(loc_aggscore, onlySUM: bool):
:return:
"""
logger.debug('Reading aggregated score data: {}'.format(loc_aggscore))
df = pd.read_csv(loc_aggscore, sep='\t', index_col=['sampleset', 'IID'])
df = pd.read_csv(loc_aggscore, sep='\t', index_col=['sampleset', 'IID'], converters={"IID": str})
if onlySUM:
df = df[[x for x in df.columns if x.endswith('_SUM')]]
rn = [x.rstrip('_SUM') for x in df.columns]
Expand Down

0 comments on commit 14b93a1

Please sign in to comment.