diff options
Diffstat (limited to 'lib/puke')
-rw-r--r-- | lib/puke/core.rb | 190 | ||||
-rw-r--r-- | lib/puke/server.rb | 125 |
2 files changed, 315 insertions, 0 deletions
diff --git a/lib/puke/core.rb b/lib/puke/core.rb new file mode 100644 index 0000000..c37b17a --- /dev/null +++ b/lib/puke/core.rb @@ -0,0 +1,190 @@ +require "gdbm" +require "fileutils" +require "tempfile" +require "time" + +module Puke + class << self + attr_reader :datadir, :listen_address, :listen_port, :secret + def verbose?() @verbose end + end + + module_function + + def setup(verbose: false, datadir:, listen:, secret: nil) + @verbose = verbose + @datadir = datadir + l = listen.split(":") + @listen_address = l[0] + @listen_port = Integer(l[1]) + @secret = secret + + FileUtils.mkdir_p(@datadir) + FileUtils.mkdir_p(File.join(@datadir, "tmp")) + gpath = File.join(@datadir, "puke.gdbm") + @gdbm = GDBM.new(gpath, 0644, GDBM::WRCREAT) + end + + def close + @gdbm.close + @gdbm = :closed + end + + def tmpdir + File.join(datadir, "tmp") + end + + # Returns the largest tid ever, or -1 if no messages are stored + def last_tid + @gdbm.fetch("last-tid", "-1").to_i + end + + def thread(tid) + @gdbm.fetch("t#{tid}").split("\0") + end + + def mid?(mid) + @gdbm["m#{mid}"]&.split("\0")&.first + end + + def metadata(mid) + tid, subject, date = @gdbm["m#{mid}"].split("\0") + { mid: mid, tid: tid.to_i, subject: subject, date: Time.rfc2822(date) } + end + + def read(mid) + File.read(File.join(datadir, mangle_mid(mid) + ".mbox")) + end + + CreateMutex = Thread::Mutex.new + def create(io, is_raw = false) + t = Tempfile.open("unnamed", tmpdir) + if is_raw + # Is this really correct?? + t << "From mboxrd Thu Jan 1 00:00:00 1970\n" + io.each_line do |line| + t << line.gsub(/\A(>*)From /, "\\1>From ").gsub(/\r\n\z/, "\n") + end + t << "\n" + else + IO.copy_stream(io, t) + end + t.rewind + mid, tid, subject, date = parse_file(t) + fpath = File.join(datadir, mangle_mid(mid) + ".mbox") + + # A mutex is used here in case two threads try to store the same message + CreateMutex.synchronize { + # File.exist?(fpath) is needed here because of the mangling... + if Puke.mid?(mid) or File.exist?(fpath) + existing = Puke.metadata(mid) + if (!tid || tid == existing[:tid]) && subject == existing[:subject] && + date == existing[:date] + raise "duplicate message" + else + # TODO: Oops, how to deal with this case? + raise "duplicate Message-ID; subject=#{existing[:subject]} and " \ + "subject=#{subject}" + end + end + + unless tid + tid = last_tid + 1 + @gdbm["last-tid"] = tid.to_s + @gdbm["t#{tid}"] = mid + else + # TODO: Hmph + l = Puke.thread(tid).map { |m| Puke.metadata(m) } << { + mid: mid, date: date, subject: subject } + l.sort_by! { |i| [i[:date], i[:subject]] } + @gdbm["t#{tid}"] = l.map { |i| i[:mid] }.join("\0") + end + @gdbm["m#{mid}"] = [tid, subject, date.rfc2822].join("\0") + File.link(t, fpath) + } + mid + ensure + t and t.close! + end + + def parse_file(io) + # We can assume the input is in mboxrd format + mid = nil + parents = [] + subject = nil + date = nil + + state = nil + io.each_line do |line| + # We use \0 as a delimiter so don't allow that + line.delete("\0") + + if line == "\n" + # End of headers + break + elsif line.start_with?(" ") || line.start_with?("\t") + case state + when nil + # This is invalid and must not happen... + when :subject + subject << line.chomp + when :references + parents.concat(line.scan(/(<[^<>]+>)/).flatten) + end + else + h, c = line.split(":", 2) + h.downcase! + c or next # Hmm, invalid header? + c.strip! + + case h + when "date" + date = c + state = nil + when "message-id" + next if mid + mid = c + state = nil + when "in-reply-to" + parents.unshift(c) + state = nil + when "references" + parents.concat(c.scan(/(<[^<>]+>)/).flatten) + state = :references + when "subject" + next if subject + subject = c + state = :subject + else + state = nil + end + end + end + + mid or raise "Message-ID not found" + # Strip < and > + /\A<(?<imid>.+@.+)>\z/ =~ mid and mid.ascii_only? or + raise "Message-ID is invalid" + mid = imid + subject or raise "Subject not found" + date or raise "Date not found" + date = Time.rfc2822(date) + + # TODO: Implement saner threading algorithm + tid = nil + parents.each do |pmid| + # Strip < and > + /\A<(?<ipmid>.+@.+)>\z/ =~ pmid and pmid.ascii_only? or next + pmid = ipmid + item = @gdbm["m#{pmid}"] or next + tid, = item.split("\0") + break + end + + [mid, tid, subject, date] + end + + def mangle_mid(mid) + mid.tr("/", "_") + end +end diff --git a/lib/puke/server.rb b/lib/puke/server.rb new file mode 100644 index 0000000..29d4479 --- /dev/null +++ b/lib/puke/server.rb @@ -0,0 +1,125 @@ +require_relative "core" +require "webrick" +require "erb" + +class Puke::Server + THREADS_PER_PAGE = 20 + + def initialize + @access_log = File.open(File.join(Puke.datadir, "access.log"), "a") + @access_log.sync = true + al = [[@access_log, WEBrick::AccessLog::COMMON_LOG_FORMAT]] + @server = WEBrick::HTTPServer.new(:BindAddress => Puke.listen_address, + :Port => Puke.listen_port, + :AccessLog => al) + @server.mount_proc("/", method(:root)) + @server.mount_proc("/mid", method(:mid)) + @server.mount_proc("/new", method(:new)) + + @root_template = ERB.new(ROOT_TEMPLATE) + end + + def root(req, res) + case req.path_info + when "/" + page = req.query.fetch("page", "1").to_i + last_tid = Puke.last_tid + + if last_tid >= 0 + b = (last_tid-page*THREADS_PER_PAGE+1).clamp(0, last_tid) + e = (last_tid-(page-1)*THREADS_PER_PAGE).clamp(0, last_tid) + threads = e.downto(b) + .map { |tid| Puke.thread(tid) } + .map { |mids| mids.map { |mid| Puke.metadata(mid) } } + else + threads = [] + end + + res.content_type = "text/html; charset=UTF-8" + res.body = @root_template.result(binding) + when "/robots.txt" + # I don't want search engines to index puke + res.content_type = "text/plain" + res.body = <<EOF +User-agent: * +Disallow: / +EOF + else + res.status = 404 + end + end + + def mid(req, res) + /\A\/(?<mid>.+@.+?)(?:\/(?<unya>\w+)\.mbox)?\z/ =~ req.path_info and + tid = Puke.mid?(mid) or (res.status = 404 and return) + case unya + when "thread" + # TODO: Concatenated-IO object?(?) + res.body = Puke.thread(tid).inject("") do |s, imid| + s << Puke.read(imid) + end + when nil, "raw" + res.body = Puke.read(mid) + else + res.status = 400 + return + end + res.content_type = "text/plain" + end + + # FIXME: How about large messages? + def new(req, res) + Puke.secret and + req.path_info == "/#{Puke.secret}" or (res.status = 404 and return) + + # Parse query string ourselves because req.query does not work for POST + # requests as for GET requests. + query = WEBrick::HTTPUtils.parse_query(req.query_string) + if format = query["format"] + is_raw = format == "raw" + else + is_raw = req.body[0, 5] != "From " + end + mids = Puke.create(StringIO.new(req.body), is_raw) + res.content_type = "text/plain" + res.body = "#{mids}\n" + end + + def start + @server.start + end + + def shutdown + @server.shutdown + @access_log.close + end + + private + + include ERB::Util + + def fdate(time) + time.strftime("%Y-%m-%d %H:%M:%S %:z") + end + + ROOT_TEMPLATE = <<'EOF' +<html> +<head> +<title>random texts (page: <%=page%>)</title> +<style> +pre { whitespace: pre-wrap; } +</style> +</head> +<body><h1>random texts (page: <%=page%>)</h1> +<hr> +<%threads.each do |ms| root, *child = ms%><pre> +--<%=h fdate root[:date]%> <a href="/mid/<%=h root[:mid]%>"><%=h root[:subject]%></a> <a href="/mid/<%=h root[:mid]%>/thread.mbox">[thread.mbox]</a><%child.each do |m|%> + `<%=h fdate m[:date]%> <a href="/mid/<%=h m[:mid]%>"><%=h m[:subject]%></a><%end%></pre> +<%end%> +<hr> +page: <%if page > 1%><a href="/?page=<%=page - 1%>"><< prev</a><%end%> +<%if threads.last&.dig(0, :tid)&.> 0%><a href="/?page=<%=page + 1%>">next >></a><%end%> +</body> +</html> +EOF +end |