Skip to content

Commit

Permalink
pdf2txt conversion added
Browse files Browse the repository at this point in the history
  • Loading branch information
ShubhamMishra1611 committed Aug 8, 2023
1 parent ef658e1 commit 8c13263
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions pdf2txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from PyPDF2 import PdfReader

class PDFProcessor:
def __init__(self, pdf_path):
self.pdf_path = pdf_path
self.reader = PdfReader(self.pdf_path)
self.num_pages = len(self.reader.pages)

def get_text_from_page(self, page_number):
if 0 <= page_number < self.num_pages:
page = self.reader.pages[page_number]
extracted_text = page.extract_text()
if extracted_text == None or len(extracted_text)< 10:
print("Either the pdf is too short or is not readable. Try again...")
return 0
else:
return extracted_text
else:
return "Invalid page number"

def get_num_pages(self):
return self.num_pages

# Example usage
if __name__ == "__main__":
pdf_path = "D:\Resume (2).pdf"
# pdf_path = "C:/Users/HP/Desktop/Doc1.pdf"

pdf_processor = PDFProcessor(pdf_path)

num_pages = pdf_processor.get_num_pages()
page_number = 0
page_text = pdf_processor.get_text_from_page(page_number)
# print(f"Text from page {page_number}:\n{page_text}")
if page_text != 0:
print(f"Text from page {page_number}:\n{page_text}")

0 comments on commit 8c13263

Please sign in to comment.