From 42910d5942432e0b5b7110afdc80599324dabf63 Mon Sep 17 00:00:00 2001 From: Tao PR Date: Wed, 15 Feb 2017 01:12:36 +0700 Subject: [PATCH] build_knowledge : clear graph db before move on --- README.md | 5 +++-- build_knowledge.py | 1 + pylib/knowledge/graph.py | 8 ++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a76e379..69d59fa 100644 --- a/README.md +++ b/README.md @@ -56,10 +56,11 @@ Execute: ``` The script continuously and endlessly crawls the knowledge topic -from Wikipedia starting from `Jupiter` page. You may change +from Wikipedia starting from the seeding page. You may change the initial topic within the script to what best suits you. To stop the process, just terminate is fine. It won't leave -anything at dirty state. +anything at dirty stat so you can re-execute the script again +at any time. >**[NOTE]** The script keeps continuously crawling and downloading the related knowledge through link traveral. diff --git a/build_knowledge.py b/build_knowledge.py index 1286886..00d4086 100644 --- a/build_knowledge.py +++ b/build_knowledge.py @@ -74,6 +74,7 @@ def iter_topic(crawl_collection,start): # Initialise a knowledge database print(colored('Initialising knowledge graph database...','cyan')) kb = Knowledge('localhost','vor','root',args['root']) + kb.clear() # Load existing pos patterns print(colored('Loading POS patterns...','cyan')) diff --git a/pylib/knowledge/graph.py b/pylib/knowledge/graph.py index b9dcbbd..301765d 100644 --- a/pylib/knowledge/graph.py +++ b/pylib/knowledge/graph.py @@ -50,6 +50,14 @@ def __prepare_classes(self): print(colored(e,'yellow')) + """ + Permanently remove all edges and vertices + """ + def clear(self): + self.orient.command('delete vertex TOPIC') + self.orient.command('delete vertex KEYWORD') + self.orient.command('delete edge') # May this be redundant? + print(colored('[Graph clearance] done','yellow')) """ Add a set of new knowledge links