Skip to content

Commit

Permalink
Enhance: scraping logic for papers from feeds.
Browse files Browse the repository at this point in the history
  • Loading branch information
GeoffreyChen777 committed Feb 26, 2024
1 parent d10ea33 commit 15de8a9
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 26 deletions.
9 changes: 9 additions & 0 deletions app/renderer/services/command-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ export class CommandService extends Eventable<{}> {
});
},
});

this.register({
id: "scrape_preprints",
description: "Scrape metadata for all preprint papers in the library.",
priority: 99996,
handler: () => {
paperService.scrapePreprint();
},
});
}

@errorcatching("Failed to register command.", true, "CommandService")
Expand Down
12 changes: 4 additions & 8 deletions app/renderer/services/feed-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -461,19 +461,15 @@ export class FeedService extends Eventable<IFeedServiceState> {
"Feed"
);

const paperEntityDrafts = await this._scrapeService.scrape(
feedEntities.map((feedEntityDraft: IFeedEntityObject) => {
const paperEntityDrafts = feedEntities.map(
(feedEntityDraft: IFeedEntityObject) => {
const paperEntityDraft = new PaperEntity({}, true).fromFeed(
feedEntityDraft
);
// NOTE: we don't want to download the PDFs when adding to library.
paperEntityDraft.mainURL = "";
return {
type: "PaperEntity",
value: paperEntityDraft,
};
}),
["semanticscholar"]
return paperEntityDraft;
}
);

// NOTE: here we decide to not download the PDFs when adding to library.
Expand Down
52 changes: 34 additions & 18 deletions app/renderer/services/paper-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ export class PaperService extends Eventable<IPaperServiceState> {
this._schedulerService.createTask(
"paperServiceScrapePreprint",
() => {
this.scrapePreprint();
this._routineScrapePreprint();
},
7 * 86400,
undefined,
Expand Down Expand Up @@ -601,6 +601,38 @@ export class PaperService extends Eventable<IPaperServiceState> {
"PaperService"
)
async scrapePreprint() {
if (this._databaseCore.getState("dbInitializing")) {
return;
}
this._logService.info(
`Scraping metadata of preprint paper(s)...`,
"",
true,
"PaperService"
);
const preprintPaperEntities = this._paperEntityRepository.load(
await this._databaseCore.realm(),
'(publication contains[c] "arXiv") OR (publication contains[c] "openreview") OR publication == ""',
"addTime",
"desc"
);
await this.scrape(
preprintPaperEntities.map((paperEntity) => {
return new PaperEntity(paperEntity);
})
);
}

/**
* Scrape preprint paper entities.
*/
@processing(ProcessingKey.General)
@errorcatching(
"Failed to scrape metadata of preprints.",
true,
"PaperService"
)
async _routineScrapePreprint() {
if (this._databaseCore.getState("dbInitializing")) {
return;
}
Expand All @@ -612,23 +644,7 @@ export class PaperService extends Eventable<IPaperServiceState> {
) {
return;
}
this._logService.info(
`Scraping metadata of preprint paper(s)...`,
"",
true,
"PaperService"
);
const preprintPaperEntities = this._paperEntityRepository.load(
await this._databaseCore.realm(),
'(publication contains[c] "arXiv") OR (publication contains[c] "openreview") OR publication == ""',
"addTime",
"desc"
);
await this.scrape(
preprintPaperEntities.map((paperEntity) => {
return new PaperEntity(paperEntity);
})
);
await this.scrapePreprint();
this._preferenceService.set({
lastRematchTime: Math.round(Date.now() / 1000),
});
Expand Down

0 comments on commit 15de8a9

Please sign in to comment.