-
Notifications
You must be signed in to change notification settings - Fork 0
/
moebooru-crawler.sh
executable file
·83 lines (71 loc) · 1.42 KB
/
moebooru-crawler.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env sh
NUM=
USAGE=$(
cat <<-END
Usage: moebooru-crawler URL [ -n NUM, --num=NUM ]
-n NUM, --num=NUM print NUM links of images,
or print all if NUM is '0'
END
)
error() { echo "$@" >&2; }
_exit() {
error "$USAGE"
exit 2
}
while [ $# -gt 0 ]; do
case "$1" in
-n | --num)
[ -n "$2" ] || _exit
NUM="$2"
shift 2
;;
-n=* | --num=*)
NUM="${1#*=}"
shift
;;
-*)
_exit
;;
*)
[ -z "$URL" ] || _exit
URL="$1"
shift
;;
esac
done
if [ -n "$NUM" ]; then
[ "$NUM" -ge 0 ] 2>/dev/null || _exit
fi
[ -n "$URL" ] || _exit
get_links() {
content=$(curl -fsSL "$1")
echo "$content" | grep -o 'file_url="[^"]*' | grep -o 'http[^"]*'
}
if echo "$URL" | grep -qs '?'; then
path=${URL%%\?*}.xml
query=${URL#*\?}
else
path="$URL.xml"
query=
fi
links=
if [ -z "$NUM" ]; then
url="$path"
[ -n "$query" ] && url="$url?$query"
links=$(get_links "$url")
else
query=$(echo "$query" | sed "s/&\?page=[0-9]*//g")
[ -n "$query" ] && query="$query&"
page=1
while [ "$NUM" -eq 0 ] || [ "$(echo "$links" | wc -w)" -lt "$NUM" ]; do
p="page=$page"
url="$path?$query$p"
_links=$(get_links "$url")
[ "$(echo "$_links" | wc -w)" -eq 0 ] && break
links="$links $_links"
: $((page = page + 1))
done
links=$(echo "$links" | xargs -n 1)
[ "$NUM" -eq 0 ] || links=$(echo "$links" | head -n "$NUM")
fi
echo "$links"