commit 345d3bfe275cec50647214312e9efa65db7b3fe8
parent 7ef4aa35636e8de351e0b0c5018d8fea6455a850
Author: Anders Damsgaard <anders@adamsgaard.dk>
Date: Thu, 11 Apr 2019 09:43:28 +0200
Allow getdoi to read DOI from pdfs
Diffstat:
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/bin/getdoi b/bin/getdoi
@@ -6,7 +6,9 @@ host="http://api.crossref.org/works"
function show_help {
echo "usage: ${0##*/} [OPTIONS] QUERY"
echo "will attempt to get the DOI from $host"
- echo "where QUERY can consist of publication title, author, DOI, ORCID id."
+ echo "where QUERY can consist of publication title, author, DOI, ORCID id,"
+ echo "or a PDF file. In case a file is specified, ${0##*/} will attempt to"
+ echo "extract the DOI from it."
echo "If no QUERY is specified, this program will expect a QUERY as stdin."
echo
echo "OPTIONS are one or more of the following:"
@@ -35,7 +37,7 @@ function extract_dois {
sed 's/.*DOI":"//' | sed 's/"}.*//' | sed 's|\\\/|/|g'
}
-function get_doi {
+function get_doi_from_crossref {
query="$(echo "$@" | sed 's/ /+/g')"
url="$host?rows=$number&select=DOI&query=$query"
[ "$verbose" = 1 ] && echo "connecting to $url"
@@ -46,6 +48,22 @@ function get_doi {
echo "$result" | extract_dois
}
+function get_doi_from_file {
+ doi=$(pdfinfo "$1" | grep -io "doi.*") ||
+ doi=$(pdftotext "$1" 2>/dev/null - | grep -io "doi.*" -m 1 ) ||
+ die "Error: Could not extract DOI from file $doi"
+ doi=$(echo "$doi" | sed 's/[A-Za-z\.\/:]*//;s/[\.,]$//')
+ echo "$doi"
+}
+
+function get_doi {
+ if [ -e "$1" ]; then
+ get_doi_from_file "$1"
+ else
+ get_doi_from_crossref "$@"
+ fi
+}
+
verbose=0
number=1
prefix=""