#!/bin/sh # Find dead (404) links in an html file # Requires curl, grep, hxwls, parallel # $Id: deadlink.sh,v 1.4 2025/02/17 10:09:32 oc45ujef Exp $ # https://wwwcip.cs.fau.de/~oc45ujef/misc/src/deadlink.sh links(){ if [ "$1" = "-" ] then cat else if [ -f "$1" ] then cat "$1" else curl -Ls "$1" fi fi | hxwls - } links "$1" \ | grep -e '^http' \ | parallel ' case {} in *"youtube.com/watch"*) curl -ILs -o /dev/null -w "%{http_code}\t{= uq =}\n" "https://www.youtube.com/oembed?format=json&url={= uq =}" ;; *) curl -ILs -o /dev/null -w "%{http_code}\t%{url}\n" {} ;; esac' \ | grep -e '^404'