From 24c7d333a136ccb73a6cecb1efcf28e3449a09dd Mon Sep 17 00:00:00 2001
From: Sanhe <MacHu-GWU@users.noreply.github.com>
Date: Tue, 2 Jan 2024 23:03:01 -0500
Subject: [PATCH] add type_is_ngram_words

---
 afwf_fts_anything/dataset.py        | 19 +++-----
 afwf_fts_anything/handlers/fts.py   |  4 +-
 afwf_fts_anything/setting.py        | 17 ++++++-
 bin/automation/config.py            |  2 +-
 docs/user-guide/01-How-it-Works.rst |  1 +
 info.plist                          | 76 ++++++++++++++++++++++++-----
 6 files changed, 89 insertions(+), 30 deletions(-)

diff --git a/afwf_fts_anything/dataset.py b/afwf_fts_anything/dataset.py
index 83ecdbd..839f957 100644
--- a/afwf_fts_anything/dataset.py
+++ b/afwf_fts_anything/dataset.py
@@ -35,20 +35,13 @@ class Dataset(AttrsClass):
         the folder is automatically generated based on your setting and data.
     - ``${name}-icon``: the icon directory, which contains the icon for Alfred.
     """
-
+    # fmt: off
     name: str = AttrsClass.ib_str()
-    path_setting: T.Optional[Path] = AttrsClass.ib_generic(
-        type_=Path, nullable=True, default=None
-    )
-    path_data: T.Optional[Path] = AttrsClass.ib_generic(
-        type_=Path, nullable=True, default=None
-    )
-    dir_index: T.Optional[Path] = AttrsClass.ib_generic(
-        type_=Path, nullable=True, default=None
-    )
-    dir_icon: T.Optional[Path] = AttrsClass.ib_generic(
-        type_=Path, nullable=True, default=None
-    )
+    path_setting: T.Optional[Path] = AttrsClass.ib_generic(type_=Path, nullable=True, default=None)
+    path_data: T.Optional[Path] = AttrsClass.ib_generic(type_=Path, nullable=True, default=None)
+    dir_index: T.Optional[Path] = AttrsClass.ib_generic(type_=Path, nullable=True, default=None)
+    dir_icon: T.Optional[Path] = AttrsClass.ib_generic(type_=Path, nullable=True, default=None)
+    # fmt: on
 
     @property
     def _path_setting(self) -> Path:
diff --git a/afwf_fts_anything/handlers/fts.py b/afwf_fts_anything/handlers/fts.py
index b91a780..8fd6685 100644
--- a/afwf_fts_anything/handlers/fts.py
+++ b/afwf_fts_anything/handlers/fts.py
@@ -72,12 +72,14 @@ def main(
         doc_list = dataset.search(query_str)
         setting = dataset.setting
         for doc in doc_list:
+            arg = setting.format_arg(doc)
             item = afwf.Item(
                 title=setting.format_title(doc),
                 subtitle=setting.format_subtitle(doc),
-                arg=setting.format_arg(doc),
+                arg=arg,
                 autocomplete=setting.format_autocomplete(doc),
             )
+            item.open_url(url=arg)
             icon = setting.format_icon(doc)
             if icon is not None:
                 # use absolute path
diff --git a/afwf_fts_anything/setting.py b/afwf_fts_anything/setting.py
index 571b047..ecbc76a 100644
--- a/afwf_fts_anything/setting.py
+++ b/afwf_fts_anything/setting.py
@@ -34,6 +34,9 @@ class Field(AttrsClass):
     :param type_is_ngram: if True, the value is index using ngram. It matches
         any character shorter than N characters.
         https://whoosh.readthedocs.io/en/latest/ngrams.html.
+    :param type_is_ngram_words: similar to type_is_ngram, but it tokenizes
+        text into words before index. It matches any character shorter than N characters.
+        https://whoosh.readthedocs.io/en/latest/api/fields.html#whoosh.fields.NGRAMWORDS.
     :param type_is_phrase: if True, the value is indexed using phrase. Only
         case-insensitive phrase will be matched. See
         https://whoosh.readthedocs.io/en/latest/schema.html#built-in-field-types
@@ -58,6 +61,7 @@ class Field(AttrsClass):
     name: str = attr.ib()
     type_is_store: bool = attr.ib(default=False)
     type_is_ngram: bool = attr.ib(default=False)
+    type_is_ngram_words: bool = attr.ib(default=False)
     type_is_phrase: bool = attr.ib(default=False)
     type_is_keyword: bool = attr.ib(default=False)
     type_is_numeric: bool = attr.ib(default=False)
@@ -74,6 +78,7 @@ def __attrs_post_init__(self):
         flag = sum(
             [
                 self.type_is_ngram,
+                self.type_is_ngram_words,
                 self.type_is_phrase,
                 self.type_is_keyword,
                 self.type_is_numeric,
@@ -84,7 +89,7 @@ def __attrs_post_init__(self):
         else:
             msg = (
                 f"you have to specify one and only one index type for column {self.name!r}, "
-                f"valid types are: ngram, phrase, keyword, numeric."
+                f"valid types are: ngram, ngram_words, phrase, keyword, numeric."
             )
             raise MalformedSettingError(msg)
 
@@ -192,7 +197,7 @@ def store_fields(self) -> T.List[str]:
 
     @cached_property
     def ngram_fields(self) -> T.List[str]:
-        return [field.name for field in self.fields if field.type_is_ngram]
+        return [field.name for field in self.fields if field.type_is_ngram or field.type_is_ngram_words]
 
     @cached_property
     def phrase_fields(self) -> T.List[str]:
@@ -249,6 +254,14 @@ def create_whoosh_schema(self) -> whoosh.fields.Schema:
                     field_boost=field.weight,
                     sortable=field.is_sortable,
                 )
+            elif field.type_is_ngram_words:
+                whoosh_field = whoosh.fields.NGRAMWORDS(
+                    stored=field.type_is_store,
+                    minsize=field.ngram_minsize,
+                    maxsize=field.ngram_maxsize,
+                    field_boost=field.weight,
+                    sortable=field.is_sortable,
+                )
             elif field.type_is_phrase:
                 whoosh_field = whoosh.fields.TEXT(
                     stored=field.type_is_store,
diff --git a/bin/automation/config.py b/bin/automation/config.py
index f7d29d7..ee61c7d 100644
--- a/bin/automation/config.py
+++ b/bin/automation/config.py
@@ -17,6 +17,6 @@ class AutomationConfig:
 config = AutomationConfig(
     python_version="3.8",
     dir_workflow=Path(
-        "/Users/sanhehu/Documents/Alfred-Preferences/Alfred.alfredpreferences/workflows/user.workflow.A20183AE-E273-479C-8C30-6016DD77C018"
+        "/Users/sanhehu/Documents/Alfred-Setting/Alfred.alfredpreferences/workflows/user.workflow.029AD850-D41F-4B53-B495-35061A408298",
     ),
 )
diff --git a/docs/user-guide/01-How-it-Works.rst b/docs/user-guide/01-How-it-Works.rst
index e3581bc..90f754c 100644
--- a/docs/user-guide/01-How-it-Works.rst
+++ b/docs/user-guide/01-How-it-Works.rst
@@ -63,6 +63,7 @@ Field is the basic unit of search. You can define how you want the data to be ma
 :name <str>: the name of the field
 :type_is_store <bool>: if True, the value is only stored but not indexed for search. Usually it can be used to dynamically construct value for argument (the action when you press enter), or for auto complete (the action when you press tab)
 :type_is_ngram <bool>: if True, the value is index using ngram. It matches any character shorter than N characters. https://whoosh.readthedocs.io/en/latest/ngrams.html.
+:type_is_ngram_words <bool>: similar to type_is_ngram, but it tokenizes text into words before index. It matches any character shorter than N characters. https://whoosh.readthedocs.io/en/latest/api/fields.html#whoosh.fields.NGRAMWORDS.
 :type_is_phrase <bool>: if True, the value is indexed using phrase. Only case-insensitive phrase will be matched. See https://whoosh.readthedocs.io/en/latest/schema.html#built-in-field-types
 :type_is_keyword <bool>: if True, the value is indexed using keyword. The keyword has to be exactly matched. See https://whoosh.readthedocs.io/en/latest/schema.html#built-in-field-types
 :type_is_numeric: if True, the value is indexed using number. The number field is not used for searching, it is only used for sorting. See https://whoosh.readthedocs.io/en/latest/schema.html#built-in-field-types
diff --git a/info.plist b/info.plist
index 3a71bfa..14feb06 100644
--- a/info.plist
+++ b/info.plist
@@ -4,8 +4,6 @@
 <dict>
 	<key>bundleid</key>
 	<string>MacHu-GWU.afwf_fts_anything</string>
-	<key>category</key>
-	<string>Productivity</string>
 	<key>connections</key>
 	<dict>
 		<key>27D78147-51C3-4E72-B805-0E11B1F3A6D8</key>
@@ -47,7 +45,7 @@
 			</dict>
 			<dict>
 				<key>destinationuid</key>
-				<string>B86DCC60-B7BA-4FCB-A766-0282D4F57AEF</string>
+				<string>EA372964-D319-44BA-AA38-CFB7EAEEF180</string>
 				<key>modifiers</key>
 				<integer>0</integer>
 				<key>modifiersubtext</key>
@@ -71,6 +69,21 @@
 				<false/>
 			</dict>
 		</array>
+		<key>EA372964-D319-44BA-AA38-CFB7EAEEF180</key>
+		<array>
+			<dict>
+				<key>destinationuid</key>
+				<string>B86DCC60-B7BA-4FCB-A766-0282D4F57AEF</string>
+				<key>modifiers</key>
+				<integer>0</integer>
+				<key>modifiersubtext</key>
+				<string></string>
+				<key>sourceoutputuid</key>
+				<string>5EA742F9-1BBA-4433-8613-BFBFF0D0E462</string>
+				<key>vitoclose</key>
+				<false/>
+			</dict>
+		</array>
 	</dict>
 	<key>createdby</key>
 	<string>Sanhe Hu</string>
@@ -227,7 +240,7 @@
 				<key>spaces</key>
 				<string></string>
 				<key>url</key>
-				<string>{query}</string>
+				<string>{var:open_url_arg}</string>
 				<key>utf8</key>
 				<true/>
 			</dict>
@@ -238,6 +251,36 @@
 			<key>version</key>
 			<integer>1</integer>
 		</dict>
+		<dict>
+			<key>config</key>
+			<dict>
+				<key>conditions</key>
+				<array>
+					<dict>
+						<key>inputstring</key>
+						<string>{var:open_url}</string>
+						<key>matchcasesensitive</key>
+						<false/>
+						<key>matchmode</key>
+						<integer>0</integer>
+						<key>matchstring</key>
+						<string>y</string>
+						<key>outputlabel</key>
+						<string></string>
+						<key>uid</key>
+						<string>5EA742F9-1BBA-4433-8613-BFBFF0D0E462</string>
+					</dict>
+				</array>
+				<key>elselabel</key>
+				<string>else</string>
+			</dict>
+			<key>type</key>
+			<string>alfred.workflow.utility.conditional</string>
+			<key>uid</key>
+			<string>EA372964-D319-44BA-AA38-CFB7EAEEF180</string>
+			<key>version</key>
+			<integer>1</integer>
+		</dict>
 	</array>
 	<key>readme</key>
 	<string>afwf_fts_anything is an Alfred Workflow allows you to do full-text search on your own dataset, and use the result to open url, open file, run script, or basically do anything. Typically, you need to setup expansive elasticsearch server, learn how to do data ingestion, learn search API, and build your own Alfred workflow. afwf_fts_anything removes all the blockers and let you just focus on your dataset and search configuration.
@@ -250,42 +293,49 @@ See more at: https://github.com/MacHu-GWU/afwf_fts_anything-project</string>
 			<key>xpos</key>
 			<integer>740</integer>
 			<key>ypos</key>
-			<integer>150</integer>
+			<integer>115</integer>
 		</dict>
 		<key>0954010F-CD86-448F-A32A-086CF90100D2</key>
 		<dict>
 			<key>xpos</key>
 			<integer>740</integer>
 			<key>ypos</key>
-			<integer>45</integer>
+			<integer>10</integer>
 		</dict>
 		<key>27D78147-51C3-4E72-B805-0E11B1F3A6D8</key>
 		<dict>
 			<key>xpos</key>
-			<integer>585</integer>
+			<integer>545</integer>
 			<key>ypos</key>
-			<integer>65</integer>
+			<integer>35</integer>
 		</dict>
 		<key>55DEB335-2FC6-4FF9-8482-F949B27BFEE9</key>
 		<dict>
 			<key>xpos</key>
-			<integer>30</integer>
+			<integer>35</integer>
 			<key>ypos</key>
-			<integer>45</integer>
+			<integer>10</integer>
 		</dict>
 		<key>92ADA757-06DC-428F-AA48-7C16FD4BE487</key>
 		<dict>
 			<key>xpos</key>
-			<integer>585</integer>
+			<integer>545</integer>
 			<key>ypos</key>
-			<integer>170</integer>
+			<integer>140</integer>
 		</dict>
 		<key>B86DCC60-B7BA-4FCB-A766-0282D4F57AEF</key>
 		<dict>
 			<key>xpos</key>
 			<integer>740</integer>
 			<key>ypos</key>
-			<integer>255</integer>
+			<integer>220</integer>
+		</dict>
+		<key>EA372964-D319-44BA-AA38-CFB7EAEEF180</key>
+		<dict>
+			<key>xpos</key>
+			<integer>545</integer>
+			<key>ypos</key>
+			<integer>245</integer>
 		</dict>
 	</dict>
 	<key>variablesdontexport</key>