From 4d86351b6799db4338561ec84c5531ba14ce64b2 Mon Sep 17 00:00:00 2001 From: Kazuki Yamaguchi Date: Wed, 7 Jun 2017 21:22:41 +0900 Subject: puke --- .gitattributes | 2 + .gitignore | 1 + LICENSE | 18 +++++ README.md | 26 ++++++++ lib/puke.rb | 2 + lib/puke/core.rb | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/puke/server.rb | 125 +++++++++++++++++++++++++++++++++++ puke | 30 +++++++++ puke-import | 28 ++++++++ 9 files changed, 422 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 lib/puke.rb create mode 100644 lib/puke/core.rb create mode 100644 lib/puke/server.rb create mode 100755 puke create mode 100755 puke-import diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b74031e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.rb diff=ruby +/puke* diff=ruby diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ceeb05b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/tmp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..17cceaf --- /dev/null +++ b/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) 2017 Kazuki Yamaguchi + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5eb671e --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +puke: random text board +======================= + +puke is a textboard system specialized for posting patches. + +A running example is here: https://s.poepoe.org + +Usage +----- + +Ruby 2.4 or later is required. No other third-party libraries are required. + +Setup the web server: + + $ ./puke -d DATADIR -l ADDRESS:PORT -s SECRET + +and post a message with your favorite HTTP client: + + $ cat random.mboxrd | curl --data-binary @- -H 'Content-Type: text/plain' http://ADDRESS:PORT/new/SECRET + + +License +------- + +puke is available as a free software under the terms of the MIT License. +See the file LICENSE for details. diff --git a/lib/puke.rb b/lib/puke.rb new file mode 100644 index 0000000..a77582b --- /dev/null +++ b/lib/puke.rb @@ -0,0 +1,2 @@ +require_relative "puke/core" +require_relative "puke/server" diff --git a/lib/puke/core.rb b/lib/puke/core.rb new file mode 100644 index 0000000..c37b17a --- /dev/null +++ b/lib/puke/core.rb @@ -0,0 +1,190 @@ +require "gdbm" +require "fileutils" +require "tempfile" +require "time" + +module Puke + class << self + attr_reader :datadir, :listen_address, :listen_port, :secret + def verbose?() @verbose end + end + + module_function + + def setup(verbose: false, datadir:, listen:, secret: nil) + @verbose = verbose + @datadir = datadir + l = listen.split(":") + @listen_address = l[0] + @listen_port = Integer(l[1]) + @secret = secret + + FileUtils.mkdir_p(@datadir) + FileUtils.mkdir_p(File.join(@datadir, "tmp")) + gpath = File.join(@datadir, "puke.gdbm") + @gdbm = GDBM.new(gpath, 0644, GDBM::WRCREAT) + end + + def close + @gdbm.close + @gdbm = :closed + end + + def tmpdir + File.join(datadir, "tmp") + end + + # Returns the largest tid ever, or -1 if no messages are stored + def last_tid + @gdbm.fetch("last-tid", "-1").to_i + end + + def thread(tid) + @gdbm.fetch("t#{tid}").split("\0") + end + + def mid?(mid) + @gdbm["m#{mid}"]&.split("\0")&.first + end + + def metadata(mid) + tid, subject, date = @gdbm["m#{mid}"].split("\0") + { mid: mid, tid: tid.to_i, subject: subject, date: Time.rfc2822(date) } + end + + def read(mid) + File.read(File.join(datadir, mangle_mid(mid) + ".mbox")) + end + + CreateMutex = Thread::Mutex.new + def create(io, is_raw = false) + t = Tempfile.open("unnamed", tmpdir) + if is_raw + # Is this really correct?? + t << "From mboxrd Thu Jan 1 00:00:00 1970\n" + io.each_line do |line| + t << line.gsub(/\A(>*)From /, "\\1>From ").gsub(/\r\n\z/, "\n") + end + t << "\n" + else + IO.copy_stream(io, t) + end + t.rewind + mid, tid, subject, date = parse_file(t) + fpath = File.join(datadir, mangle_mid(mid) + ".mbox") + + # A mutex is used here in case two threads try to store the same message + CreateMutex.synchronize { + # File.exist?(fpath) is needed here because of the mangling... + if Puke.mid?(mid) or File.exist?(fpath) + existing = Puke.metadata(mid) + if (!tid || tid == existing[:tid]) && subject == existing[:subject] && + date == existing[:date] + raise "duplicate message" + else + # TODO: Oops, how to deal with this case? + raise "duplicate Message-ID; subject=#{existing[:subject]} and " \ + "subject=#{subject}" + end + end + + unless tid + tid = last_tid + 1 + @gdbm["last-tid"] = tid.to_s + @gdbm["t#{tid}"] = mid + else + # TODO: Hmph + l = Puke.thread(tid).map { |m| Puke.metadata(m) } << { + mid: mid, date: date, subject: subject } + l.sort_by! { |i| [i[:date], i[:subject]] } + @gdbm["t#{tid}"] = l.map { |i| i[:mid] }.join("\0") + end + @gdbm["m#{mid}"] = [tid, subject, date.rfc2822].join("\0") + File.link(t, fpath) + } + mid + ensure + t and t.close! + end + + def parse_file(io) + # We can assume the input is in mboxrd format + mid = nil + parents = [] + subject = nil + date = nil + + state = nil + io.each_line do |line| + # We use \0 as a delimiter so don't allow that + line.delete("\0") + + if line == "\n" + # End of headers + break + elsif line.start_with?(" ") || line.start_with?("\t") + case state + when nil + # This is invalid and must not happen... + when :subject + subject << line.chomp + when :references + parents.concat(line.scan(/(<[^<>]+>)/).flatten) + end + else + h, c = line.split(":", 2) + h.downcase! + c or next # Hmm, invalid header? + c.strip! + + case h + when "date" + date = c + state = nil + when "message-id" + next if mid + mid = c + state = nil + when "in-reply-to" + parents.unshift(c) + state = nil + when "references" + parents.concat(c.scan(/(<[^<>]+>)/).flatten) + state = :references + when "subject" + next if subject + subject = c + state = :subject + else + state = nil + end + end + end + + mid or raise "Message-ID not found" + # Strip < and > + /\A<(?.+@.+)>\z/ =~ mid and mid.ascii_only? or + raise "Message-ID is invalid" + mid = imid + subject or raise "Subject not found" + date or raise "Date not found" + date = Time.rfc2822(date) + + # TODO: Implement saner threading algorithm + tid = nil + parents.each do |pmid| + # Strip < and > + /\A<(?.+@.+)>\z/ =~ pmid and pmid.ascii_only? or next + pmid = ipmid + item = @gdbm["m#{pmid}"] or next + tid, = item.split("\0") + break + end + + [mid, tid, subject, date] + end + + def mangle_mid(mid) + mid.tr("/", "_") + end +end diff --git a/lib/puke/server.rb b/lib/puke/server.rb new file mode 100644 index 0000000..29d4479 --- /dev/null +++ b/lib/puke/server.rb @@ -0,0 +1,125 @@ +require_relative "core" +require "webrick" +require "erb" + +class Puke::Server + THREADS_PER_PAGE = 20 + + def initialize + @access_log = File.open(File.join(Puke.datadir, "access.log"), "a") + @access_log.sync = true + al = [[@access_log, WEBrick::AccessLog::COMMON_LOG_FORMAT]] + @server = WEBrick::HTTPServer.new(:BindAddress => Puke.listen_address, + :Port => Puke.listen_port, + :AccessLog => al) + @server.mount_proc("/", method(:root)) + @server.mount_proc("/mid", method(:mid)) + @server.mount_proc("/new", method(:new)) + + @root_template = ERB.new(ROOT_TEMPLATE) + end + + def root(req, res) + case req.path_info + when "/" + page = req.query.fetch("page", "1").to_i + last_tid = Puke.last_tid + + if last_tid >= 0 + b = (last_tid-page*THREADS_PER_PAGE+1).clamp(0, last_tid) + e = (last_tid-(page-1)*THREADS_PER_PAGE).clamp(0, last_tid) + threads = e.downto(b) + .map { |tid| Puke.thread(tid) } + .map { |mids| mids.map { |mid| Puke.metadata(mid) } } + else + threads = [] + end + + res.content_type = "text/html; charset=UTF-8" + res.body = @root_template.result(binding) + when "/robots.txt" + # I don't want search engines to index puke + res.content_type = "text/plain" + res.body = <.+@.+?)(?:\/(?\w+)\.mbox)?\z/ =~ req.path_info and + tid = Puke.mid?(mid) or (res.status = 404 and return) + case unya + when "thread" + # TODO: Concatenated-IO object?(?) + res.body = Puke.thread(tid).inject("") do |s, imid| + s << Puke.read(imid) + end + when nil, "raw" + res.body = Puke.read(mid) + else + res.status = 400 + return + end + res.content_type = "text/plain" + end + + # FIXME: How about large messages? + def new(req, res) + Puke.secret and + req.path_info == "/#{Puke.secret}" or (res.status = 404 and return) + + # Parse query string ourselves because req.query does not work for POST + # requests as for GET requests. + query = WEBrick::HTTPUtils.parse_query(req.query_string) + if format = query["format"] + is_raw = format == "raw" + else + is_raw = req.body[0, 5] != "From " + end + mids = Puke.create(StringIO.new(req.body), is_raw) + res.content_type = "text/plain" + res.body = "#{mids}\n" + end + + def start + @server.start + end + + def shutdown + @server.shutdown + @access_log.close + end + + private + + include ERB::Util + + def fdate(time) + time.strftime("%Y-%m-%d %H:%M:%S %:z") + end + + ROOT_TEMPLATE = <<'EOF' + + +random texts (page: <%=page%>) + + +

random texts (page: <%=page%>)

+
+<%threads.each do |ms| root, *child = ms%>
+--<%=h fdate root[:date]%> <%=h root[:subject]%> [thread.mbox]<%child.each do |m|%>
+ `<%=h fdate m[:date]%> <%=h m[:subject]%><%end%>
+<%end%> +
+page: <%if page > 1%><< prev<%end%> +<%if threads.last&.dig(0, :tid)&.> 0%>next >><%end%> + + +EOF +end diff --git a/puke b/puke new file mode 100755 index 0000000..959774f --- /dev/null +++ b/puke @@ -0,0 +1,30 @@ +#!/usr/bin/env ruby +require_relative "lib/puke" +require "optparse" + +opts = {} +OptionParser.new { |o| + o.on("-v", "--verbose") { opts[:verbose] = true } + o.on("-d", "--datadir DATADIR") { |v| opts[:datadir] = v } + o.on("-l", "--listen ADDRESS:PORT") { |v| opts[:listen] = v } + o.on("-s", "--secret SECRET") { |v| opts[:secret] = v } +}.parse!(ARGV) +ARGV.empty? or raise "unknown argument(s): %p" % ARGV + +opts[:datadir] or raise "-d (--datadir) option is required" +opts[:listen] or raise "-l (--listen) option is required" + +# Setup Puke and verify the options +Puke.setup(**opts) +server = Puke::Server.new + +# Set exiting handler +handler = proc { + server.shutdown + Puke.close +} +trap("INT", handler) +trap("TERM", handler) + +# Start the WEBrick server +server.start diff --git a/puke-import b/puke-import new file mode 100755 index 0000000..ed362df --- /dev/null +++ b/puke-import @@ -0,0 +1,28 @@ +#!/usr/bin/env ruby +require "uri" +require "net/http" + +# usage: puke-import URI +# or: cat mboxrd | puke-import URI +# +# URI is the url for importing an message. +# E.g. 'http://localhost:8080/new/secret-string' +# +# TODO: mboxrd containing multiple emails + +uri = URI.parse(ARGV.shift) +http = Net::HTTP.new(uri.host, uri.port) +http.use_ssl = uri.scheme == "https" +http.start { + ARGF.binmode + while true + io = ARGF.to_io + name = File === io ? io.path : io.inspect + print "Importing #{name}... " + req = http.post(uri.path, io.read, + "Content-Type" => "application/octet-stream") + puts "done; mid=#{req.body}" + ARGF.skip + break if ARGF.to_io == io + end +} -- cgit v1.2.3