#!/bin/sh
# Find dead (404) links in an html file
# Requires curl, grep, hxwls, parallel
# $Id: deadlink.sh,v 1.4 2025/02/17 10:09:32 oc45ujef Exp $
# https://wwwcip.cs.fau.de/~oc45ujef/misc/src/deadlink.sh
links(){
if [ "$1" = "-" ]
then cat
else
if [ -f "$1" ]
then
cat "$1"
else
curl -Ls "$1"
fi
fi | hxwls -
}
links "$1" \
| grep -e '^http' \
| parallel '
case {} in
*"youtube.com/watch"*)
curl -ILs -o /dev/null -w "%{http_code}\t{= uq =}\n" "https://www.youtube.com/oembed?format=json&url={= uq =}"
;;
*)
curl -ILs -o /dev/null -w "%{http_code}\t%{url}\n" {}
;;
esac' \
| grep -e '^404'