commit fecff68b69260c611c7d4a498ed02b32b6c7a2d2
parent 6de69db6f6e87763f9f324b277b6ad2b09f614b3
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Wed, 27 Jan 2021 11:36:43 +0100
extract_urls: allow more than 1 url per line
Diffstat:
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/.local/bin/extract_urls b/.local/bin/extract_urls
@@ -1,9 +1,13 @@
#!/usr/bin/awk -f
{
- if (match($0, /(http|https|ftp|gopher):\/\/[A-z0-9\-\/\.\?=%]+[^ .,:\t\n\r<">\)]/)) {
- url = substr($0, RSTART, RLENGTH)
- if (!urls[url]++)
- print url
+ if (split($0, words, " ")) {
+ for (word in words) {
+ if (match(words[word], /(http|https|ftp|gopher):\/\/[A-z0-9\-\/\.\?=%:]+[^'.:\r\?]/)) {
+ url = substr(words[word], RSTART, RLENGTH)
+ if (!urls[url]++)
+ print url
+ }
+ }
}
}