-
Notifications
You must be signed in to change notification settings - Fork 0
/
testChunker.php
91 lines (80 loc) · 2.47 KB
/
testChunker.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
<html>
<head>
<title>Chunker</title>
<link type="text/css" href="style.css" rel="stylesheet">
</head>
<body>
<b>Input:</b><br/>
<form action="testChunker.php">
<textarea name="parsedText"><?php if(isset($_GET["parsedText"])) echo $_GET["parsedText"] ?></textarea>
<br/>
<input type="submit" value="Chunk!"/>
</form>
<?php
if(isset($_GET['parsedText']) && $_GET['parsedText'] != ""){
include('chunkParseTree.php');
$parsed = $_GET["parsedText"];
$parsed = str_replace("\n", "", $parsed);
$parsed = substr($parsed, 2);
$parsed = substr($parsed, 0, -2);
$parsed = str_replace("((", "( (", $parsed);
$parsed = str_replace("((", "( (", $parsed);
$parsed = str_replace("))", ") )", $parsed);
$parsed = str_replace("))", ") )", $parsed);
$tokens = explode(" ", $parsed);
foreach($tokens as $token){
if(strcmp(substr($token, 0, 1), "(") == 0){
//Got a new phrase - make a new leaf
$tokenCategory = trim(substr($token, 1));
if(!isset($rootNode)){
$rootNode = new Node($tokenCategory);
$currentNode = $rootNode;
$currentNode->level = 0;
}else{
$newNode = new Node($tokenCategory);
$newNode->setParent($currentNode);
$newNode->level = $currentNode->level + 1;
if(!$rootNode->hasChildren())
$rootNode->addChild($newNode);
else
$currentNode->addChild($newNode);
$currentNode = $newNode;
}
}elseif(strcmp(substr($token, -1, 1), ")") == 0){
//Phrase ended
//if it contained a word, add it to the current leaf, else switch to the parent
$tokenWord = substr($token, 0, -1);
if(strlen($tokenWord) > 0){
$currentNode->setWord($tokenWord);
}
if($currentNode->getParent() != null)
$currentNode = $currentNode->getParent();
}
}
$wordCount = str_word_count($rootNode->traverse('inorder', ''));
$chunkSize = ceil($wordCount/4);
$finalChunks = array();
$rootNode->getChunksToSize($rootNode, $chunkSize, $finalChunks);
while(count($finalChunks) > 10){
$finalChunks = array();
$rootNode->clearInnerChunks($rootNode);
$chunkSize = $chunkSize * 1.5;
$rootNode->getChunksToSize($rootNode, $chunkSize, $finalChunks);
}
$finalChunks = array_reverse($finalChunks);
//Print chunks
echo "<b>Chunks:</b><br/><div class='finalChunks'><ul>";
foreach($finalChunks as $finalChunk){
echo "<li>".$finalChunk."</li>";
}
echo "</ul></div>";
echo "<br style='clear:both;'/><br style='clear:both;'/>";
echo "<b>Tree:</b><br/>";
//Draw a pretty tree :)
$rootNode->printTree($rootNode);
}
?>
<br/>
<br/>
</body>
</html>