diff --git a/danker.sh b/danker.sh
index d6f1e87..a90fe5b 100755
--- a/danker.sh
+++ b/danker.sh
@@ -24,15 +24,15 @@ if [ "$1" == "ALL" ]; then
filename='all.links'
for i in `./lib/getLanguages.sh`; do ./lib/createLinks.sh "$i" >> all-link-files.txt; done
for i in `cat all-link-files.txt`; do cat "$i" >> "$filename"; done
- sort --field-separator=$'\t' --key=1 --temporary-directory=. -no "$filename" "$filename"
+ sort -S 50% --field-separator=$'\t' --key=1 --temporary-directory=. -no "$filename" "$filename"
else
filename=`./lib/createLinks.sh "$1"`
fi
if [ "$2" == "BIGMEM" ]; then
./lib/dankerBigMem.py "$filename" $damping_factor $iterations $start_value | sed "s/\(.*\)/Q\1/" > "$filename".rank
else
- sort --field-separator=$'\t' --key=2 --temporary-directory=. -no "$filename"".right" "$filename"
+ sort -S 50% --field-separator=$'\t' --key=2 --temporary-directory=. -no "$filename"".right" "$filename"
./lib/danker.py "$filename" "$filename"".right" $damping_factor $iterations $start_value | sed "s/\(.*\)/Q\1/" > "$filename".rank
rm "$filename"".right"
fi
-sort -nro "$filename"".rank" --field-separator=$'\t' --key=2 "$filename"".rank"
+sort -S 50% -nro "$filename"".rank" --field-separator=$'\t' --key=2 "$filename"".rank"
diff --git a/lib/createLinks.sh b/lib/createLinks.sh
index 29c2768..74fc22d 100755
--- a/lib/createLinks.sh
+++ b/lib/createLinks.sh
@@ -22,7 +22,7 @@ rss="https://dumps.wikimedia.org/""$1""wiki/latest/"
###################### DOWNLOAD AND UNZIP
wget -q "$rss""$1""wiki-latest-pagelinks.sql.gz-rss.xml" "$rss""$1""wiki-latest-page_props.sql.gz-rss.xml" "$rss""$1""wiki-latest-page.sql.gz-rss.xml" "$rss""$1""wiki-latest-redirect.sql.gz-rss.xml"
-dump_date=`cat *.xml | sed -n "s#^ $download\(.*\)#\1#p" | sort -u | head -n 1`
+dump_date=`cat *.xml | sed -n "s#^ $download\(.*\)#\1#p" | sort -S 50% -u | head -n 1`
rm *.xml
pagelinks="$1""wiki-""$dump_date""-pagelinks.sql"
pageprops="$1""wiki-""$dump_date""-page_props.sql"
@@ -46,15 +46,15 @@ fi
rm "$1"*.sql
###################### JOINS
export LC_ALL=C
-sort --field-separator=$'\t' --key=2 -o "$1""page.lines" "$1""page.lines"
-sort --field-separator=$'\t' --key=2 -o "$1""pagelinks.lines" "$1""pagelinks.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""page.lines" "$1""page.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""pagelinks.lines" "$1""pagelinks.lines"
join -j 2 "$1""pagelinks.lines" "$1""page.lines" -o 1.1,2.1 -t $'\t' > "$1""pagelinks2.lines"
-sort --field-separator=$'\t' --key=2 -o "$1""pagelinks2.lines" "$1""pagelinks2.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""pagelinks2.lines" "$1""pagelinks2.lines"
# take care of redirects (note: 'double redirects' are fixed by bots --> https://en.wikipedia.org/wiki/Wikipedia:Double_redirects)
-sort --field-separator=$'\t' --key=2 -o "$1""redirects.lines" "$1""redirects.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""redirects.lines" "$1""redirects.lines"
join -j 2 "$1""redirects.lines" "$1""page.lines" -o 2.1,1.1 -t $'\t' > "$1""redirects2.lines"
-sort --field-separator=$'\t' --key=2 -o "$1""redirects2.lines" "$1""redirects2.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""redirects2.lines" "$1""redirects2.lines"
join -j 2 "$1""pagelinks2.lines" "$1""redirects2.lines" -o 1.1,2.1 -t $'\t' > "$1""pagelinks22.lines"
# we can write this back to our page links set (potentially duplicating links)
@@ -63,11 +63,11 @@ cat "$1""pagelinks22.lines" >> "$1""pagelinks2.lines"
# end redirects
###################### GET Q-IDs
-sort --field-separator=$'\t' --key=2 -o "$1""pagelinks2.lines" "$1""pagelinks2.lines"
-sort --field-separator=$'\t' --key=2 -o "$1""pageprops.lines" "$1""pageprops.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""pagelinks2.lines" "$1""pagelinks2.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""pageprops.lines" "$1""pageprops.lines"
join -j 2 "$1""pagelinks2.lines" "$1""pageprops.lines" -o 2.1,1.1 -t $'\t' > "$1""pagelinks.lines"
-sort --field-separator=$'\t' --key=2 -o "$1""pagelinks.lines" "$1""pagelinks.lines"
+sort -S 50% --field-separator=$'\t' --key=2 -o "$1""pagelinks.lines" "$1""pagelinks.lines"
join -j 2 "$1""pagelinks.lines" "$1""pageprops.lines" -o 2.1,1.1 -t $'\t' | sed "s/\(Q\|q\)\(.*\)\t\(Q\|q\)\(.*\)/\2\t\4/" > "$1""pagelinks2.lines"
-sort "$1""pagelinks2.lines" | uniq > "$1"-"$dump_date"".links"
+sort -S 50% -u "$1""pagelinks2.lines" > "$1"-"$dump_date"".links"
rm "$1"*.lines
echo "$1"-"$dump_date"".links"