Skip to content

Commit

Permalink
removed setting temporay directory to current dir - managed via TMPDI…
Browse files Browse the repository at this point in the history
…R environment variable
  • Loading branch information
athalhammer committed Oct 17, 2024
1 parent 7a64f96 commit 16ecc45
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
20 changes: 10 additions & 10 deletions script/create_links.sh
Original file line number Diff line number Diff line change
Expand Up @@ -195,19 +195,19 @@ export LC_ALL=C

# Prepare page table - needed to normalize pagelinks and redirects
sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""page.lines" \
"$wiki""page.lines"

# Prepare pagelinks
sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""pagelinks.lines" \
"$wiki""pagelinks.lines"

# Prepare linktarget
sort -k 1,1 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki"linktarget.lines \
"$wiki"linktarget.lines

Expand All @@ -228,7 +228,7 @@ join -j 2 \

# Prepare redirects
sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""redirect.lines" \
"$wiki""redirect.lines"

Expand All @@ -243,12 +243,12 @@ join -j 2 \
# Take care of redirects. Note: 'double redirects' are fixed by bots
# (https://en.wikipedia.org/wiki/Wikipedia:Double_redirects).
sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""pagelinks_norm.lines" \
"$wiki""pagelinks_norm.lines"

sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""redirect_norm.lines" \
"$wiki""redirect_norm.lines"

Expand All @@ -266,11 +266,11 @@ cat "$wiki""pagelinks_redirected.lines" >> "$wiki""pagelinks_norm.lines"

# Resolve internal IDs to Wikidata Q-Is
sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""pagelinks_norm.lines" \
"$wiki""pagelinks_norm.lines"
sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""pageprops.lines" \
"$wiki""pageprops.lines"
join -j 2 \
Expand All @@ -280,7 +280,7 @@ join -j 2 \
> "$wiki""pagelinks.lines"

sort -k 2,2 \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki""pagelinks.lines" \
"$wiki""pagelinks.lines"
join -j 2 \
Expand All @@ -292,7 +292,7 @@ join -j 2 \

# Sort final output, cleanup, and print filename
sort -k 1,1n -k 2,2n -u \
-S "$MEM_PERC" -T . \
-S "$MEM_PERC" \
-o "$wiki"-"$dump_date"".links" \
"$wiki-$dump_date"".links"

Expand Down
6 changes: 3 additions & 3 deletions script/dank.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ if [ "$1" == "ALL" ]; then
done

# merge
xargs sort -m -k 1,1n -T . -S "$MEM_PERC" -o "$filename" < "$filename.files.txt"
xargs sort -m -k 1,1n -S "$MEM_PERC" -o "$filename" < "$filename.files.txt"

# collect stats
xargs wc -l < "$filename.files.txt" | grep -v "total" | sed "s/^[[:space:]]\+//" >> "$filename.stats.txt"
Expand Down Expand Up @@ -112,12 +112,12 @@ if [ "$bigmem" ]; then
2>> "$filename.stats.txt" | sed "s/\(.*\)/Q\1/" \
> "$filename".rank
else
sort -k 2,2n -T . -S "$MEM_PERC" -o "$filename"".right" "$filename"
sort -k 2,2n -S "$MEM_PERC" -o "$filename"".right" "$filename"
python3 -m danker "$filename" -r "$filename"".right" "$damping" "$iterations" "$start_value" -i \
2>> "$filename.stats.txt" | sed "s/\(.*\)/Q\1/" \
> "$filename".rank
rm "$filename"".right"
fi
sort -k 2,2nr -T . -S "$MEM_PERC" -o "$filename"".rank" "$filename"".rank"
sort -k 2,2nr -S "$MEM_PERC" -o "$filename"".rank" "$filename"".rank"
wc -l "$filename"".rank" >> "$filename.stats.txt"
echo "$filename"

0 comments on commit 16ecc45

Please sign in to comment.