diff --git a/README.md b/README.md index 4b7439f..4e4d36a 100644 --- a/README.md +++ b/README.md @@ -76,29 +76,41 @@ OR: query source | tuxi [options] Options: -h Show this help message and exit. -v Print tuxi version info and exit. + -r Raw search results. (no pretty output, no colors) + -q Only output search results. (silences "Did you mean?", greeting, usage) + -a Prints all valid answers. + + -u Prints out the top handful of URLs for your search query + (this is automatically printed out if tuxi can't find you an answer) + -b Tries to select the best answer based on keywords at the start and end of your query. (experimental - eg: define WORD, SONG lyrics, PERSON quotes, weather CITY, FILM cast) + -t Pick answers to test. (you can specify multiple answers using tuxi_NAME in your query) + -l use LANG_[lang] in your query to override the language used (eg: tuxi -l LANG_en_US my search query) tuxi supports the following environment variables: TUXI_LANG=[lang] sets default search language (eg: TUXI_LANG='en_US') + TUXI_DELAY=[int] if you find more than one answer is being printed (and you're not using -a) increase this number by a little (you want it to be as low as possible) - default value is 100 (eg: TUXI_DELAY=120) + default value is 250 (eg: TUXI_DELAY=270) developer flags: -d prints debug info along with results - -s saves HTML for this query to /home/magic/.cache/tuxi/tuxi-[date]-[query].html + -s saves HTML for this query to /home/dave/.cache/tuxi/[date]-[query].html + -c use most recent cached result and query this can be combined with -t flag to more quickly test for different answers + -p disable pipe support (it can break some scripts including our own test script) Report bugs at https://github.com/Bugswriter/tuxi/issues diff --git a/tests/testqueries.txt b/tests/testqueries.txt index 31f18ca..ff4b508 100644 --- a/tests/testqueries.txt +++ b/tests/testqueries.txt @@ -18,7 +18,6 @@ # currency # trans # sport_fixture -# rich # kno_right did_you_mean Linux Tarvalds @@ -30,19 +29,19 @@ math 72^43/12(44+1)-3124 math (40/3)+4*6 math 45/sqrt(4)*3 -basic/kno_val christmas day +kno_val christmas day basic/kno_val summer solstice basic/kno_val shortest day of the year -kno_top the office cast -kno_top legends of runeterra -kno_top reddit -kno_top twitter +richcast/kno_top the office cast +kno_top/kno_right legends of runeterra +kno_top/kno_right reddit +kno_top/kno_right twitter -rich/kno_val elevation of mt everest -rich/kno_val what is elevation of mt rainier -rich/kno_val how big is the grand canyon -rich/kno_val how long is the yellow river? +kno_val elevation of mt everest +kno_val what is elevation of mt rainier +kno_val how big is the grand canyon +kno_val how long is the yellow river? feat the meaning of life the universe and everything else feat shotgun doom eternal @@ -79,7 +78,7 @@ weather how cold is it in canada right now? kno_right lorem ipsum kno_right the office -kno_right what is a firefighter +kno_right/define what is a firefighter kno_right what is the american bill of rights define define humor @@ -94,7 +93,6 @@ sport_fixture results of last chelsea match quotes linus torvalds quotes quotes quotes from gandhi -quotes inspirational quotes kno_val density of silver kno_val density of hydrogen @@ -102,5 +100,5 @@ kno_val what is the triple point of oxygen richcast cast of event horizon richcast cast of equalibrium -richcast GoT stars -richcast avengers actors +richcast/kno_top GoT stars +richcast/kno_top avengers actors diff --git a/tuxi b/tuxi index 6a4b937..0078bd5 100755 --- a/tuxi +++ b/tuxi @@ -1,18 +1,27 @@ #!/usr/bin/env sh +# tuxi is a cli assistant created by Bugswriter +# to get answers for your questions instantly. +# tuxi is currently developed by many collaborators +# you can get more information on our repo :) + +# https://github.com/Bugswriter/tuxi © GPL-3.0 License + ############################### ##### Constants ##### ############################### -# setting this overrides the system language variable -# this can also be set in your shell environment with TUXI_LANG= -# the -l commandline flag overrides everything +# setting this overrides getting the system language variable +# the -l commandline flag overrides everything +# this can also be set in your shell environment with +# TUXI_LANG= [ -n "$TUXI_LANG" ] && LANGUAGE="$TUXI_LANG" || LANGUAGE="" # if you find more than one answer is being printed (and you're not using -a) -# increase this number by a little (you want it to be as low as possible) -# this can also be set in your shell environment with TUXI_DELAY= -[ -n "$TUXI_DELAY" ] && MICRO_DELAY="$TUXI_DELAY" || MICRO_DELAY=150 +# increase this number by a little (you want it to be as low as possible) +# this can also be set in your shell environment with +# TUXI_DELAY= +[ -n "$TUXI_DELAY" ] && MICRO_DELAY="$TUXI_DELAY" || MICRO_DELAY=250 VERSION="dev 2.0" MAIN_PID="$$" @@ -23,46 +32,50 @@ MAIN_PID="$$" ######################################### # credit to @Zhann in #149 - -if [ "$OSTYPE" = 'darwin'* ]; then +# to use it, you will need to have GNU core utils installed +case "$OSTYPE" in +darwin*) sed() { gsed "$@" } paste() { gpaste "$@" } -fi + ;; +esac ###################################### ##### Snippet priority ##### ###################################### -# this variable determines the order the tests are started, they are processed in parallel -# even though these are started in order, by default, the first answer to resolve is the one printed -# the order here might only make a very small difference +# priority importance +# this variable determines the order the tests are started, they are processed in parallel +# even though these are started in order, by default, the first answer to resolve is the one printed +# the order here might only make a very small difference -# the first word should be the name of the a_function() followed by a space -# you can disable tests by commenting out the line(s) +# priority order +# the first word should be the name of the a_function() followed by a space +# you can disable tests by commenting out the line(s) priority=" +tracklist # Album track lists ( eg: noisia outer edges tracklist ) richcast # Rich Rich Answers ( eg: social network cast ) -basic # Basic Answers ( eg: christmas day ) -math # Math ( eg: log_2(3) * pi^e ) -feat # Featured Snippets ( eg: who is garfield ) -kno_val # Chem facts ( eg: density of silver, density of hydrogen, what is the triple point of oxygen ) -kno_top # Knowledge Graph - top ( list ) ( eg: the office cast ) -kno_right # Knowledge Graph - right ( eg: the office ) define # Define ( eg: define Aggrandize ) +lists # Simple lists ( eg Need for Speed Heat cars list ) +kno_val # Chem facts ( eg: density of silver, density of hydrogen, what is the triple point of oxygen ) pronounce # Learn to pronounce ( eg: pronounce linux ) +lyrics_int # Lyrics ( eg: gecgecgec lyrics ) weather # Weather ( eg: weather new york ) -lists # Simple lists ( eg Need for Speed Heat cars list ) +math # Math ( eg: log_2(3) * pi^e ) unit # Units Conversion ( eg: 1m into 1 cm ) currency # Currency Conversion ( eg: 1 USD in rupee ) +kno_top # Knowledge Graph - top ( list ) ( eg: the office cast ) +basic # Basic Answers ( eg: christmas day ) +feat # Featured Snippets ( eg: who is garfield ) +quotes # Quotes ( eg: mahatma gandhi quotes ) trans # Translate ( eg: Vais para cascais? em ingles ) sport_fixture # Shows last or next fixture of a sports team ( eg. Chelsea next game ) -quotes # Quotes ( eg: mahatma gandhi quotes ) -lyrics_int # Lyrics ( eg: gecgecgec lyrics ) lyrics_us # Lyrics for US users, above does not work for US -rich # Rich Answers ( eg: elevation of mount everest ) +kno_right # Knowledge Graph - right ( eg: the office ) " ############################## @@ -83,6 +96,7 @@ save_html=false use_cache=false pick_lang=false no_pipe=false +plus_urls=false # color codes N="\033[0m" # Reset @@ -104,29 +118,41 @@ help_text() { printf "%bOptions:%b\n" "$G" "$N" printf " -h Show this help message and exit.\n" printf " -v Print tuxi version info and exit.\n" + printf "\n" printf " -r Raw search results.\n" printf " (no pretty output, no colors)\n" + printf "\n" printf " -q Only output search results.\n" printf " (silences \"Did you mean?\", greeting, usage)\n" + printf "\n" printf " -a Prints all valid answers.\n" + printf "\n" + printf " -u Prints out the top handful of URLs for your search query\n" + printf " (this is automatically printed out if tuxi can't find you an answer)\n" + printf "\n" printf " -b Tries to select the best answer based on keywords at the start and end of your query.\n" printf " (experimental - eg: define WORD, SONG lyrics, PERSON quotes, weather CITY, FILM cast)\n" + printf "\n" printf " -t Pick answers to test.\n" printf " (you can specify multiple answers using tuxi_NAME in your query)\n" + printf "\n" printf " -l use LANG_[lang] in your query to override the language used\n" printf " (eg: tuxi -l LANG_en_US my search query)\n" printf "\n" printf "%btuxi supports the following environment variables:%b\n" "$G" "$N" printf " TUXI_LANG=[lang] sets default search language (eg: TUXI_LANG='en_US')\n" + printf "\n" printf " TUXI_DELAY=[int] if you find more than one answer is being printed (and you're not using -a)\n" printf " increase this number by a little (you want it to be as low as possible)\n" - printf " default value is 100 (eg: TUXI_DELAY=120)\n" + printf " default value is 250 (eg: TUXI_DELAY=270)\n" printf "\n" printf "%bdeveloper flags:%b\n" "$G" "$N" printf " -d prints debug info along with results\n" - printf " -s saves HTML for this query to $XDG_CACHE_HOME/tuxi/tuxi-[date]-[query].html\n" + printf " -s saves HTML for this query to $XDG_CACHE_HOME/tuxi/[date]-[query].html\n" + printf "\n" printf " -c use most recent cached result and query\n" printf " this can be combined with -t flag to more quickly test for different answers\n" + printf "\n" printf " -p disable pipe support (it can break some scripts including our own test script)\n" printf "\n" printf "%bReport bugs at%b %bhttps://github.com/Bugswriter/tuxi/issues%b\n" "$G" "$N" "$C" "$N" @@ -148,13 +174,14 @@ help_text() { # -s : save google HTML response # -c : use most recent cached results # -p : disable pipe support (needed for test script) -while getopts "rvhqabtldscp" OPT; do +# -u : also print out the top links +while getopts "rvhqabtldscpu" OPT; do case "$OPT" in r) raw=true ;; v) - printf "tuxi %s\n" "$version" + printf "tuxi %s\n" "$VERSION" exit 0 ;; h) @@ -188,18 +215,22 @@ while getopts "rvhqabtldscp" OPT; do p) no_pipe=true ;; + u) + plus_urls=true + ;; *) help_text | head -n 1 exit 1 ;; esac done + # shifts to query shift $((OPTIND - 1)) $pick_search && $best_match && echo "sorry but -b and -t mutually exclusive" && exit 1 -#//TODO this may need reworking later to use read instead and only capture the first line -# question | tuxi [-flags] --> answer :) +# TODO this may need reworking later to use read instead and only capture the first line +# question | tuxi [-flags] --> answer :) if ! $no_pipe; then [ -p /dev/stdin ] && query=$(cat) fi @@ -251,18 +282,20 @@ check_deps() { done } -# pup : https://github.com/ericchiang/pup -# recode : https://github.com/rrthomas/recode -# jq : https://github.com/stedolan/jq +# Dependencies +# pup : https://github.com/ericchiang/pup +# recode : https://github.com/rrthomas/recode +# jq : https://github.com/stedolan/jq check_deps "pup" "recode" "jq" ######################################## ##### Query manipulation ##### ######################################## -# If query is empty and -c is passed: use query from cached result -# If query is empty (no -c): exit -# If quiet=false: Prints greeting and usage +# Conditions to Query +# If query is empty and -c is passed: use query from cached result +# If query is empty (no -c): exit +# If quiet=false: Prints greeting and usage if [ -z "$1" ] && [ -z "$query" ]; then if ! $use_cache; then if ! $quiet; then @@ -279,8 +312,8 @@ fi [ -z "$query" ] && query="$*" # language select: the -l flag -# language specified on the command line overwrites both -# the variable set at the top of this script and the system language +# language specified on the command line overwrites both +# the variable set at the top of this script and the system language if $pick_lang; then query="$(printf '%b\n' "$query" | sed 's/ /\\n/g')" LANGUAGE="$(printf '%b\n' "$query" | grep 'LANG_' | sed 's/LANG_//g')" @@ -288,8 +321,8 @@ if $pick_lang; then fi # Custom answers: the -t flag -# clears the list of snippets to check (saving the original list to print out if a mistake is made) -# then loops through the query looking for tuxi_ and updates the priority variable to use only those snippets +# clears the list of snippets to check (saving the original list to print out if a mistake is made) +# then loops through the query looking for tuxi_ and updates the priority variable to use only those snippets if $pick_search; then list_priority="$priority" snippet_check=$(printf '%b\n' "$list_priority" | cut -d ' ' -f1 | sed -e '/^\s*#.*$/d' -e '/^\s*$/d') @@ -318,7 +351,7 @@ fi # our patented (honest!) "smrt search" algorithm: the -b flag # jokes aside, this is going to need some iterating on, I'll turn it into a tidy loop later if $best_match; then - j=7 + j=8 use_quotes=false use_lyrics=false use_weather=false @@ -327,6 +360,7 @@ if $best_match; then use_define=false use_list=false use_pronounce=false + use_tracklist=false query_check="$(printf '%b\n' "$query" | sed 's/ /\\n/g' | tr '[:upper:]' '[:lower:]')" first_word=$(printf '%b\n' "$query_check" | head -n1) @@ -341,16 +375,18 @@ if $best_match; then define | definition) use_define=true ;; list) use_list=true ;; pronounce | pronunciation) use_pronounce=true ;; + tracklist | songs) use_tracklist=true ;; esac done $use_quotes && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep 'quotes')" || j=$(($j - 1)) $use_lyrics && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep 'lyrics')" || j=$(($j - 1)) $use_weather && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep 'weather')" || j=$(($j - 1)) - $use_cast && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep -e 'rich' -e 'lists' -e 'kno_')" || j=$(($j - 1)) + $use_cast && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep -e 'lists' -e 'kno_')" || j=$(($j - 1)) $use_define && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep 'define')" || j=$(($j - 1)) $use_pronounce && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep 'pronounce')" || j=$(($j - 1)) - $use_list && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep -e 'rich' -e 'lists' -e 'kno_')" || j=$(($j - 1)) + $use_tracklist && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep 'tracklist')" || j=$(($j - 1)) + $use_list && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | grep -e 'lists' -e 'kno_')" || j=$(($j - 1)) [ $j -eq 0 ] && priority="$(printf '%b\n' "$priority" | cut -d ' ' -f1 | sed -e '/^\s*#.*$/d' -e '/^[[:space:]]*$/d' | grep -v 'quotes' | grep -v 'lyrics' | grep -v 'weather')" fi @@ -358,53 +394,196 @@ fi ##### Answer functions ##### ###################################### -## FUNCTION TEMPLATE +# the following divs have been removed due to seemingly being redundant +# Im leaving them here just in case they're needed again in future +# div.ujudUb (seems to be another lyrics scrape) + +# FUNCTION TEMPLATE + # NewAnswerName should be the word used in $priority # a_NewAnswerName() { # Answer description (and example) # echo "$google_html" | pup ... [ SCRAPE METHOD HERE ] ... # } -## NOTE: the order of these functions doesn't matter, priority is determined by the variable -a_define() { # Define (eg: define Aggrandize) //credit @igaurab - echo "$google_html" | pup 'div.DgZBFd, div.vdBwhd, div[data-dobid="dfn"] text{}' | sed -e 's/^/* /' -e '1 s/^* //' | recode html..ISO-8859-1 +# NOTE: the order of these functions doesn't matter, priority is determined by the variable + +# Define (eg: define Aggrandize) //original snippet credit @igaurab +a_define() { + case "$LANGUAGE" in + en_*) dfn_use_new=true ;; + *) dfn_use_new=false ;; + esac + if $dfn_use_new; then + define="$(echo "$google_html" | pup 'div.VpH2eb.dZd3De.vmod text{}' | sed '/^[[:space:]]*$/d' | recode html..ISO-8859-1)" + if [ -n "$define" ]; then + printf 'pronounced: %b%s%b\n\n' "$C" "$(printf '%s\n' "$define" | grep -m1 -A1 -w '/' | tail -n1)" "$N" + dfn_top=true + dfn_end_top=false + dfn_start=false + dfn_append=false + dfn_marker=false + dfn_sim_op=false + dfn_skip=false + printf '%s\n' "$define" | while IFS= read -r dfn_foo; do + if $dfn_top; then + if $dfn_end_top; then + case "$dfn_foo" in + *"noun: " | *"verb: " | *"adjective: " | *"adverb: " | *"pronoun: " | *"preposition: " | *"conjunction: " | *"determiner: " | *"exclamation: ") + printf '%s ' "$dfn_foo" + dfn_append=true + ;; + *"noun:" | *"verb:" | *"adjective:" | *"adverb:" | *"pronoun:" | *"preposition:" | *"conjunction:" | *"determiner:" | *"exclamation:") + printf '%s ' "$dfn_foo" + dfn_append=true + ;; + *) printf '%b%s%b\n\t%b%s%b\n' "$Y" "$dfn_backup" "$N" "$B" "$dfn_foo" "$N" ;; + esac + dfn_top=false + else + case "$dfn_foo" in + noun | verb | adjective | adverb | pronoun | preposition | conjunction | determiner | exclamation) + dfn_backup="$dfn_foo" + dfn_end_top=true + ;; + *) continue ;; + esac + fi + elif $dfn_append; then + printf '%b%s%b\n' "$C" "$dfn_foo" "$N" + dfn_append=false + elif [ $dfn_foo -eq $dfn_foo ] 2>/dev/null; then + printf '\n' + $dfn_sim_op && dfn_sim_op=false + elif [ "$dfn_foo" = '/' ]; then + $dfn_skip && dfn_skip=false || dfn_skip=true + elif $dfn_skip; then + continue + elif [ "$dfn_foo" = '. ' ]; then + $dfn_sim_op && dfn_sim_op=false + elif [ "$dfn_foo" = '.' ]; then + printf '\n' + $dfn_sim_op && dfn_sim_op=false + elif $dfn_marker; then + [ "$dfn_foo" = 'Similar:' ] && dfn_hl="$G" || dfn_hl="$R" + printf '%b%s%b\n' "$dfn_hl" "$dfn_foo" "$N" + dfn_marker=false + dfn_sim_op=true + elif $dfn_start; then + case "$dfn_foo" in + informal) printf '(informal) ' ;; + British) printf '(British) ' ;; + rare) printf '(rare) ' ;; + *) + printf '%s ' "$dfn_foo" + dfn_append=true + dfn_start=false + ;; + esac + else + case "$dfn_foo" in + "; "*) + printf '%s ' "$dfn_foo" + dfn_append=true + ;; + *"noun: " | *"verb: " | *"adjective: " | *"adverb: " | *"pronoun: " | *"preposition: " | *"conjunction: " | *"determiner: " | *"exclamation: ") + printf '\n%s ' "$dfn_foo" + $dfn_sim_op && dfn_sim_op=false + dfn_append=true + ;; + " h ") + dfn_marker=true + ;; + noun | verb | adjective | adverb | pronoun | preposition | conjunction | determiner | exclamation) + printf '\n' + $dfn_sim_op && dfn_sim_op=false + dfn_start=true + ;; + *) + $dfn_sim_op && printf '\t%s\n' "$dfn_foo" \ + || printf '\t%b%s%b\n' "$B" "$dfn_foo" "$N" + ;; + esac + fi + done + fi + else + echo "$google_html" | pup 'div.DgZBFd, div.vdBwhd, div[data-dobid="dfn"] text{}' | sed -e 's/^/* /' -e '1 s/^* //' | recode html..ISO-8859-1 + fi } -a_kno_val() { # Chem facts ( eg: density of silver, density of hydrogen, what is the triple point of oxygen) - # "what is the " seems to be required for some things //credit @sudocanttype + +# this div is google's top line answer, works for simple dates, values etc +# eg: density of silver, what is the triple point of oxygen, elevation of mount everest, christmas day +# "what is the " seems to be required for some things //credit @sudocanttype +a_kno_val() { echo "$google_html" | pup 'div.Z0LcW.XcVN5d text{}' | tr '\n' ' ' } -a_math() { # Math ( eg: log_2(3) * pi^e ) //credit @BeyondMagic - echo "$google_html" | pup 'span.qv3Wpe text{}' | tr -d ' ' + +# Math ( eg: log_2(3) * pi^e ) //credit @BeyondMagic +a_math() { + echo "$google_html" | pup 'span.qv3Wpe text{}' | tr -d '\n ' | recode html..ISO-8859-1 } -a_kno_top() { # Knowledge Graph - top (list) ( eg: the office cast ) //credit @Bugswriter + +# Knowledge Graph - top (list) ( eg: the office cast ) //credit @Bugswriter +a_kno_top() { echo "$google_html" | pup 'div.dAassd json{}' | jq -r '.[] | .children | .[] | .text' | sed ':a;N;$!ba;s/\n/ /g;s/null/\n/g' | sed '1s/.*/* &/;2,$s/.*/*&/;$d' | recode html..ISO-8859-1 } -a_quotes() { # Quotes ( eg: mahatma gandhi quotes ) //credit @PoseidonCoder + +# Quotes ( eg: mahatma gandhi quotes ) //credit @PoseidonCoder +a_quotes() { echo "$google_html" | pup 'div.Qynugf text{}' | recode html..ISO-8859-1 } -a_basic() { # Basic Answers ( eg: tuxi christmas day ) // @Bugswriter + +# Basic Answers ( eg: summer solstice || easter ) // @Bugswriter +# this displays similar info to kno_val but uses a different div in the google results +a_basic() { echo "$google_html" | pup 'div.zCubwf text{}' | tr -d '\n' | recode html..ISO-8859-1 } -a_richcast() { # Rich Rich Answers ( eg: social network cast ) //credit @BeyondMagic + +# Rich Rich Answers ( eg: social network cast ) //credit @BeyondMagic +a_richcast() { echo "$google_html" | pup 'a.ct5Ked json{}' | jq -r '.[] | .title' | sed 's/^/* /' | recode html..ISO-8859-1 } -a_lists() { # Simple lists (eg Need for Speed Heat cars list) //credit @BeyondMagic - echo "$google_html" | pup 'li.TrT0Xe text{}' | sed -e 's/^ //' -e 's/^/* /' -e 's/\.$//' | recode html..ISO-8859-1 -} -a_rich() { # Rich Answers ( eg: elevation of mount everest ) //credit @d-shaun + @Bugswriter - rich=$(echo "$google_html" | pup 'div.ujudUb, div.mR2gOd, div.XcVN5d text{}' | sed 's/^ //' | recode html..ISO-8859-1) - [ "$(printf '%b\n' "$rich" | head -n1)" = 'View all' ] || printf '%b\n' "$rich" + +# Simple lists (eg: how to exit vim || how to update windows) //original snippet credit @BeyondMagic +a_lists() { + lists="$(echo "$google_html" | pup 'div.co8aDb.XcVN5d, li.TrT0Xe' | sed 's/^[[:blank:]]*//g' | recode html..ISO-8859-1)" + if [ -n "$lists" ]; then + lists_num=0 + printf '%s\n' "$lists" | while IFS= read -r lists_foo; do + case "$lists_foo" in + "