From 5c96222efad39f8dcfb0ac4e5a331da69551ea7a Mon Sep 17 00:00:00 2001 From: gitronald Date: Tue, 13 Feb 2024 01:50:10 -0800 Subject: [PATCH] update: serp data model --- WebSearcher/models.py | 4 +++- WebSearcher/searchers.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/WebSearcher/models.py b/WebSearcher/models.py index 8fa6979..a5517b6 100644 --- a/WebSearcher/models.py +++ b/WebSearcher/models.py @@ -16,8 +16,10 @@ class BaseSERP(BaseModel): loc: Optional[str] = None # Location if set, "Canonical Name" url: str # URL of SERP html: str # Raw HTML of SERP - headers: Dict[str, str] # HTTP headers + # headers: Dict[str, str] # HTTP headers timestamp: str # Timestamp of crawl response_code: int # HTTP response code + user_agent: str # User agent used for the crawl serp_id: str # Search Engine Results Page (SERP) ID crawl_id: str # Crawl ID for grouping SERPs + version: str # WebSearcher version diff --git a/WebSearcher/searchers.py b/WebSearcher/searchers.py index 42eba32..02f3568 100644 --- a/WebSearcher/searchers.py +++ b/WebSearcher/searchers.py @@ -203,11 +203,11 @@ def save_serp(self, save_dir: str = '', append_to: str = ""): url=self.url, html=self.html, response_code=self.response.status_code, + user_agent=self.headers['User-Agent'], timestamp=self.timestamp, serp_id=self.serp_id, crawl_id=self.crawl_id, version=self.version, - user_agent=self.headers['User-Agent'] ) utils.write_lines([serp.model_dump()], append_to)