I've gone through old posts and rehosted images wherever I could. Some images were already gone, so they've been left alone.
Because you might find it interesting, here's the process I followed and my code:
At first I was downloading images manually and uploading them as attachments, as a result of that I've changed embedded attachment styling a bit but it's still not ideal.
Also, this was a lot of manual labor, so I was thinking of a better way. I didn't feel like learning how to interface with phpBB's attachments programmatically, so I decided to just upload things to a static folder on the website.
I made a userscript to quickly harvest imgur links inside a post and then give me a command I could run on the server to quickly download them.
Userscript
Code: Select all
// ==UserScript==
// @match https://love2d.org/forums/posting.php*
// @grant GM_setClipboard
// ==/UserScript==
(function() {
'use strict';
let imgur_re = /(https?:\/\/)?i\.imgur\.com\/([a-zA-Z0-9\.]*)/
let button = document.createElement("button")
button.className = "button button-secondary"
button.innerText = "rehost"
button.addEventListener("click", e => {
e.preventDefault()
let message = document.querySelector("#message")
let files = []
let match
while ((match = imgur_re.exec(message.value)) !== null) {
let filename = match[2]
message.value = message.value.replace(match[0], `https://love2d.org/imgmirrur/${filename}`)
files.push(filename)
}
let cmd = "./download.sh " + files.join(" ")
console.log("copied: ", cmd)
GM_setClipboard(cmd)
})
document.querySelector("#format-buttons").appendChild(button)
})();
download.sh
Code: Select all
#!/bin/bash
for fn in $@; do
if [ -f $fn ]; then
echo "skipping already downloaded $fn"
else
echo "downloading $fn"
wget https://i.imgur.com/$fn
fi
done
I was rehosting images in batches of whenever-I-felt-like-it. However, a few days ago I ran into an issue: All requests to imgur were returning a code 429: Something imgur uses to signify rate limiting.
I sent them a support message asking to be unblocked, but it was ignored. Therefore, I had to overengineer a solution to download images through a VPN, and then upload it to the server.
download.erl
Code: Select all
-module(download).
-compile(export_all).
prepare() -> io:format("call download:prepare({127,0,0,1}) where the IP is the VPN interface~n").
prepare(IP) ->
inets:start(),
ssl:start(),
ok = httpc:set_options([{ip, IP}]), % make all outgoing httpc requests go through this interface
{ok, Response} = httpc:request("https://ipof.me/"),
{{_,200,_}, _, Body} = Response,
string:find(Body, "(vpn provider)") =/= nomatch. % make sure it's working
go(Files) ->
UniqueFiles = lists:usort(Files),
{DownloadedAlready, NotDownloadedAlready} = lists:partition(fun (Fn) -> filelib:is_file(Fn) end, UniqueFiles),
[ io:format("skipping already downloaded ~s~n", [Fn]) || Fn <- DownloadedAlready ],
Responses = [ {Fn, download(Fn)} || Fn <- NotDownloadedAlready ],
{Successes, Failures} = lists:partition(fun ({_, {{_, Code, _}, _, _}}) -> Code =:= 200 end, Responses),
[ io:format("failed to download ~s (code ~p)~n", [Fn, Code]) || {Fn, {{_, Code, _}, _, _}} <- Failures ],
[ file:write_file(Fn, Body) || {Fn, {_, _, Body}} <- Successes ],
SuccessFiles = [ Fn || {Fn, _} <- Successes ],
case length(SuccessFiles) of
0 -> ok;
_ -> upload_files(SuccessFiles)
end,
io:format("uploaded ~p files~n", [length(SuccessFiles)]).
download(Fn) ->
{ok, Res} = httpc:request("https://i.imgur.com/" ++ Fn),
io:format("downloaded ~s~n", [Fn]),
Res.
upload_files(Files) ->
Cmd = io_lib:format("scp ~s love:love2d.org/imgmirrur/", [lists:join(" ", Files)]),
os:cmd(Cmd),
ok.
A quick change to the userscript was also required to adapt to the new format (download:go([...]).)
Today I finished rehosting everything. Albums required a little bit more manual work. First I downloaded them using
https://github.com/mikf/gallery-dl with the --with-metadata argument.
I then cooked up a script to generate an album page from the downloaded folder.
gallery_page.js
Code: Select all
const fs = require("fs")
const path = require("path")
const { exec } = require("child_process")
if (process.argv.length < 3) {
console.log("gallery_page.js folder")
process.exitCode = 1
return
}
const imgname_re = /_([^_]+)$/
const folder = process.argv[2]
let title
let album_id
let images = []
files = fs.readdirSync(folder)
for (const file of files) {
const fullpath = path.join(folder, file)
if (file.endsWith(".json")) { // metadata
const info = JSON.parse(fs.readFileSync(fullpath))
if (album_id === undefined) {
album_id = info.album.id
}
if (title === undefined && info.title !== "") {
title = info.title
}
const image = {}
image.filename = `${info.filename}.${info.extension}`
if (info.description !== "") {
image.description = info.description
}
images.push(image)
} else { // image
fs.copyFileSync(fullpath, imgname_re.exec(file)[1])
}
}
let album_page = `
<html>
<head>
<title>${title === undefined ? "untitled album" : title}</title>
<link rel="stylesheet" src="album.css" />
</head>
<body>
`
if (title !== undefined) {
album_page += `<h1>${title}</h1>`
}
let i = 0
for (const image of images) {
album_page += `<div id="${i}">`
if (image.filename.endsWith("mp4")) {
album_page += `<video src="${image.filename}" autoplay></video>`
} else {
album_page += `<a href="${image.filename}"><img src="${image.filename}" /></a>`
i += 1
if (image.description !== undefined) {
album_page += `<p>${image.description}</p>`
}
}
album_page += `</div>`
}
album_page += `
</body>
</html>`
fs.writeFileSync(`${album_id}.html`, album_page)
exec(`scp ${album_id}.html ${images.map(f => f.filename).join(" ")} love:love2d.org/imgmirrur/`, (_, stdout, __) => console.log(stdout))
console.log(`https://love2d.org/imgmirrur/${album_id}.html`)
Here are some numbers:
- 941 images downloaded and attached (phase 1)
- 1565 images directly rehosted (phase 2)
- 1133 images rehosted through VPN (phase 3)
- 1.2GB of total data
From now on, I would recommend attachments. I'm aware of some issues with them, for example, you can't put an attachment image inside url-tags, but fixing that would probably require uncomfortable amounts of phpBB-hacking.
Uploading a webm is allowed if you want to show off a video/gif, but unfortunately they won't embed. I may look into that.