about summary refs log tree commit diff
path: root/tools
diff options
context:
space:
mode:
authorPatryk Niedźwiedziński <pniedzwiedzinski19@gmail.com>2021-03-15 08:51:01 +0100
committerPatryk Niedźwiedziński <pniedzwiedzinski19@gmail.com>2021-03-15 08:51:01 +0100
commitb5f5fbd118faadd6a9f9b68ab00371b434c08e0e (patch)
treee11150f2810a3ef3372e16abeb7c865054106c01 /tools
parent4283da83bd80d3f256106cc5d8798b4c4e8b388d (diff)
downloadkronika-b5f5fbd118faadd6a9f9b68ab00371b434c08e0e.tar.gz
kronika-b5f5fbd118faadd6a9f9b68ab00371b434c08e0e.zip
Add rssg
Diffstat (limited to 'tools')
-rwxr-xr-xtools/rssg189
1 files changed, 189 insertions, 0 deletions
diff --git a/tools/rssg b/tools/rssg
new file mode 100755
index 0000000..52031e7
--- /dev/null
+++ b/tools/rssg
@@ -0,0 +1,189 @@
+#!/bin/sh
+#
+# https://www.romanzolotarev.com/bin/rssg
+# Copyright 2018 Roman Zolotarev <hi@romanzolotarev.com>
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+set -e
+
+
+main () {
+	test -n "$1" || usage
+	test -n "$2" || usage
+	test -f "$1" || no_file "$1"
+
+
+	index_file=$(readlink -f "$1")
+	test -z "${index_file##*html}" && html=$(cat "$index_file")
+	test -z "${index_file##*md}" && html=$(md_to_html "$index_file")
+	test -n "$html" || usage
+
+	base="${index_file%/*}"
+	base_url="$(echo "$html" | get_url | sed 's#\(.*\)/[^/]*#\1#')"
+
+	url=$(		echo "$html" | get_url)
+
+	title="$2"
+
+	description=$(	echo "$html" | get_description |
+			remove_tags |
+			remove_nbsp )
+
+	items=$(	echo "$html" | get_items)
+
+	rss=$(		echo "$items" |
+			render_items "$base" "$base_url" |
+			render_feed "$url" "$title" "$description")
+
+	>&2 echo "[rssg] ${index_file##$(pwd)/} $(echo "$rss" | grep -c '<item>') items"
+	echo "$rss"
+}
+
+
+usage() {
+	echo "usage: ${0##*/} index.{html,md} title > rss.xml" >&2
+	exit 1
+}
+
+
+no_file() {
+	echo "${0##*/}: $1: No such file" >&2
+	exit 2
+}
+
+
+md_to_html() {
+	test -x "$(which lowdown)" || exit 3
+	lowdown \
+	-D html-skiphtml \
+	-D smarty \
+	-d metadata \
+	-d autolink "$1"
+}
+
+
+get_title() {
+	awk 'tolower($0)~/^<h1/{gsub(/<[^>]*>/,"",$0);print;exit}'
+}
+
+
+get_url() {
+	grep -i '<a .*rss.xml"' | head -1 |
+	sed 's#.*href="\(.*\)".*#\1#'
+}
+
+
+get_items() {
+	grep -i 'href=".*" title="' |
+	sed 's#.*href="\(.*\)" title="\(.*\)">\(.*\)</a>.*#\1 \2 \3#'
+}
+
+
+get_description() {
+	start='sub("^.*<"s"*"t"("s"[^>]*)?>","")'
+	stop='sub("</"s"*"t""s"*>.*","")&&x=1'
+	awk -v 's=[[:space:]]' -v 't=[Pp]' "$start,$stop;x{exit}"
+}
+
+remove_tags() {
+	sed 's#<[^>]*>##g;s#</[^>]*>##g'
+}
+
+
+remove_nbsp() {
+	sed 's#\&nbsp;# #g'
+}
+
+
+rel_to_abs_urls() {
+	site_url="$1"
+	base_url="$2"
+
+	abs='s#(src|href)="/([^"]*)"#\1="'"$site_url"/'\2"#g'
+	rel='s#(src|href)="([^:/"]*)"#\1="'"$base_url"/'\2"#g'
+	sed -E "$abs;$rel"
+}
+
+
+date_rfc_822() {
+	date -j '+%a, %d %b %Y %H:%M:%S %z' \
+	"$(echo "$1"| tr -cd '[:digit:]')0000"
+}
+
+
+render_items() {
+	while read -r i
+	do render_item "$1" "$2" "$i"
+	done
+}
+
+
+render_item() {
+	base="$1"
+	base_url="$2"
+	item="$3"
+
+	site_url="$(echo "$base_url"| sed 's#\(.*//.*\)/.*#\1#')"
+
+	date=$(echo "$item"|awk '{print$2}')
+	url=$(echo "$item"|awk '{print$1}')
+
+	f="$base/$url"
+	test -f "$f" && html=$(cat "$f")
+	test -f "${f%\.html}.md" && html=$(md_to_html "${f%\.html}.md")
+
+	description=$(
+		echo "$html" |
+		rel_to_abs_urls "$site_url" "$base_url" |
+		remove_nbsp
+	)
+	title=$(echo "$description" | get_title)
+	guid="$base_url/$(echo "$url" | sed 's#^/##')"
+
+	echo '
+<item>
+<guid>'"$guid"'</guid>
+<link>'"$guid"'</link>
+<pubDate>'"$(date_rfc_822 "$date")"'</pubDate>
+<title>'"$title"'</title>
+<description><![CDATA[
+
+'"$description"'
+
+]]></description>
+</item>'
+}
+
+
+render_feed() {
+	url="$1"
+	title=$(echo "$2" | remove_nbsp)
+	description="$3"
+
+	base_url="$(echo "$url" | cut -d '/' -f1-3)"
+
+	echo '<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
+<channel>
+<atom:link href="'"$url"'" rel="self" type="application/rss+xml" />
+<title>'"$title"'</title>
+<description>'"$description"'</description>
+<link>'"$base_url"'/</link>
+<lastBuildDate>'"$(date_rfc_822 date)"'</lastBuildDate>
+'"$(cat)"'
+</channel></rss>'
+}
+
+
+main "$@"