puke

author: Kazuki Yamaguchi <k@rhe.jp> 2017-06-07 21:22:41 +0900
committer: Kazuki Yamaguchi <k@rhe.jp> 2017-06-08 12:47:47 +0900
commit: 4d86351b6799db4338561ec84c5531ba14ce64b2 (patch)
tree: 148136a0496fb6cf09ca039097b7ce9551370fb5
download: puke-4d86351b6799db4338561ec84c5531ba14ce64b2.tar.gz
9 files changed, 422 insertions, 0 deletions
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..b74031e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.rb diff=ruby
+/puke* diff=ruby
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ceeb05b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/tmp
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..17cceaf
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,18 @@
+Copyright (c) 2017 Kazuki Yamaguchi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5eb671e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,26 @@
+puke: random text board
+=======================
+
+puke is a textboard system specialized for posting patches.
+
+A running example is here: https://s.poepoe.org
+
+Usage
+-----
+
+Ruby 2.4 or later is required. No other third-party libraries are required.
+
+Setup the web server:
+
+    $ ./puke -d DATADIR -l ADDRESS:PORT -s SECRET
+
+and post a message with your favorite HTTP client:
+
+    $ cat random.mboxrd | curl --data-binary @- -H 'Content-Type: text/plain' http://ADDRESS:PORT/new/SECRET
+
+
+License
+-------
+
+puke is available as a free software under the terms of the MIT License.
+See the file LICENSE for details.
diff --git a/lib/puke.rb b/lib/puke.rb
new file mode 100644
index 0000000..a77582b
--- /dev/null
+++ b/lib/puke.rb
@@ -0,0 +1,2 @@
+require_relative "puke/core"
+require_relative "puke/server"
diff --git a/lib/puke/core.rb b/lib/puke/core.rb
new file mode 100644
index 0000000..c37b17a
--- /dev/null
+++ b/lib/puke/core.rb
@@ -0,0 +1,190 @@
+require "gdbm"
+require "fileutils"
+require "tempfile"
+require "time"
+
+module Puke
+  class << self
+    attr_reader :datadir, :listen_address, :listen_port, :secret
+    def verbose?() @verbose end
+  end
+
+  module_function
+
+  def setup(verbose: false, datadir:, listen:, secret: nil)
+    @verbose = verbose
+    @datadir = datadir
+    l = listen.split(":")
+    @listen_address = l[0]
+    @listen_port = Integer(l[1])
+    @secret = secret
+
+    FileUtils.mkdir_p(@datadir)
+    FileUtils.mkdir_p(File.join(@datadir, "tmp"))
+    gpath = File.join(@datadir, "puke.gdbm")
+    @gdbm = GDBM.new(gpath, 0644, GDBM::WRCREAT)
+  end
+
+  def close
+    @gdbm.close
+    @gdbm = :closed
+  end
+
+  def tmpdir
+    File.join(datadir, "tmp")
+  end
+
+  # Returns the largest tid ever, or -1 if no messages are stored
+  def last_tid
+    @gdbm.fetch("last-tid", "-1").to_i
+  end
+
+  def thread(tid)
+    @gdbm.fetch("t#{tid}").split("\0")
+  end
+
+  def mid?(mid)
+    @gdbm["m#{mid}"]&.split("\0")&.first
+  end
+
+  def metadata(mid)
+    tid, subject, date = @gdbm["m#{mid}"].split("\0")
+    { mid: mid, tid: tid.to_i, subject: subject, date: Time.rfc2822(date) }
+  end
+
+  def read(mid)
+    File.read(File.join(datadir, mangle_mid(mid) + ".mbox"))
+  end
+
+  CreateMutex = Thread::Mutex.new
+  def create(io, is_raw = false)
+    t = Tempfile.open("unnamed", tmpdir)
+    if is_raw
+      # Is this really correct??
+      t << "From mboxrd Thu Jan  1 00:00:00 1970\n"
+      io.each_line do |line|
+        t << line.gsub(/\A(>*)From /, "\\1>From ").gsub(/\r\n\z/, "\n")
+      end
+      t << "\n"
+    else
+      IO.copy_stream(io, t)
+    end
+    t.rewind
+    mid, tid, subject, date = parse_file(t)
+    fpath = File.join(datadir, mangle_mid(mid) + ".mbox")
+
+    # A mutex is used here in case two threads try to store the same message
+    CreateMutex.synchronize {
+      # File.exist?(fpath) is needed here because of the mangling...
+      if Puke.mid?(mid) or File.exist?(fpath)
+        existing = Puke.metadata(mid)
+        if (!tid || tid == existing[:tid]) && subject == existing[:subject] &&
+            date == existing[:date]
+          raise "duplicate message"
+        else
+          # TODO: Oops, how to deal with this case?
+          raise "duplicate Message-ID; subject=#{existing[:subject]} and " \
+            "subject=#{subject}"
+        end
+      end
+
+      unless tid
+        tid = last_tid + 1
+        @gdbm["last-tid"] = tid.to_s
+        @gdbm["t#{tid}"] = mid
+      else
+        # TODO: Hmph
+        l = Puke.thread(tid).map { |m| Puke.metadata(m) } << {
+          mid: mid, date: date, subject: subject }
+        l.sort_by! { |i| [i[:date], i[:subject]] }
+        @gdbm["t#{tid}"] = l.map { |i| i[:mid] }.join("\0")
+      end
+      @gdbm["m#{mid}"] = [tid, subject, date.rfc2822].join("\0")
+      File.link(t, fpath)
+    }
+    mid
+  ensure
+    t and t.close!
+  end
+
+  def parse_file(io)
+    # We can assume the input is in mboxrd format
+    mid = nil
+    parents = []
+    subject = nil
+    date = nil
+
+    state = nil
+    io.each_line do |line|
+      # We use \0 as a delimiter so don't allow that
+      line.delete("\0")
+
+      if line == "\n"
+        # End of headers
+        break
+      elsif line.start_with?(" ") || line.start_with?("\t")
+        case state
+        when nil
+          # This is invalid and must not happen...
+        when :subject
+          subject << line.chomp
+        when :references
+          parents.concat(line.scan(/(<[^<>]+>)/).flatten)
+        end
+      else
+        h, c = line.split(":", 2)
+        h.downcase!
+        c or next # Hmm, invalid header?
+        c.strip!
+
+        case h
+        when "date"
+          date = c
+          state = nil
+        when "message-id"
+          next if mid
+          mid = c
+          state = nil
+        when "in-reply-to"
+          parents.unshift(c)
+          state = nil
+        when "references"
+          parents.concat(c.scan(/(<[^<>]+>)/).flatten)
+          state = :references
+        when "subject"
+          next if subject
+          subject = c
+          state = :subject
+        else
+          state = nil
+        end
+      end
+    end
+
+    mid or raise "Message-ID not found"
+    # Strip < and >
+    /\A<(?<imid>.+@.+)>\z/ =~ mid and mid.ascii_only? or
+      raise "Message-ID is invalid"
+    mid = imid
+    subject or raise "Subject not found"
+    date or raise "Date not found"
+    date = Time.rfc2822(date)
+
+    # TODO: Implement saner threading algorithm
+    tid = nil
+    parents.each do |pmid|
+      # Strip < and >
+      /\A<(?<ipmid>.+@.+)>\z/ =~ pmid and pmid.ascii_only? or next
+      pmid = ipmid
+      item = @gdbm["m#{pmid}"] or next
+      tid, = item.split("\0")
+      break
+    end
+
+    [mid, tid, subject, date]
+  end
+
+  def mangle_mid(mid)
+    mid.tr("/", "_")
+  end
+end
diff --git a/lib/puke/server.rb b/lib/puke/server.rb
new file mode 100644
index 0000000..29d4479
--- /dev/null
+++ b/lib/puke/server.rb
@@ -0,0 +1,125 @@
+require_relative "core"
+require "webrick"
+require "erb"
+
+class Puke::Server
+  THREADS_PER_PAGE = 20
+
+  def initialize
+    @access_log = File.open(File.join(Puke.datadir, "access.log"), "a")
+    @access_log.sync = true
+    al = [[@access_log, WEBrick::AccessLog::COMMON_LOG_FORMAT]]
+    @server = WEBrick::HTTPServer.new(:BindAddress => Puke.listen_address,
+                                      :Port => Puke.listen_port,
+                                      :AccessLog => al)
+    @server.mount_proc("/", method(:root))
+    @server.mount_proc("/mid", method(:mid))
+    @server.mount_proc("/new", method(:new))
+
+    @root_template = ERB.new(ROOT_TEMPLATE)
+  end
+
+  def root(req, res)
+    case req.path_info
+    when "/"
+      page = req.query.fetch("page", "1").to_i
+      last_tid = Puke.last_tid
+
+      if last_tid >= 0
+        b = (last_tid-page*THREADS_PER_PAGE+1).clamp(0, last_tid)
+        e = (last_tid-(page-1)*THREADS_PER_PAGE).clamp(0, last_tid)
+        threads = e.downto(b)
+          .map { |tid| Puke.thread(tid) }
+          .map { |mids| mids.map { |mid| Puke.metadata(mid) } }
+      else
+        threads = []
+      end
+
+      res.content_type = "text/html; charset=UTF-8"
+      res.body = @root_template.result(binding)
+    when "/robots.txt"
+      # I don't want search engines to index puke
+      res.content_type = "text/plain"
+      res.body = <<EOF
+User-agent: *
+Disallow: /
+EOF
+    else
+      res.status = 404
+    end
+  end
+
+  def mid(req, res)
+    /\A\/(?<mid>.+@.+?)(?:\/(?<unya>\w+)\.mbox)?\z/ =~ req.path_info and
+      tid = Puke.mid?(mid) or (res.status = 404 and return)
+    case unya
+    when "thread"
+      # TODO: Concatenated-IO object?(?)
+      res.body = Puke.thread(tid).inject("") do |s, imid|
+        s << Puke.read(imid)
+      end
+    when nil, "raw"
+      res.body = Puke.read(mid)
+    else
+      res.status = 400
+      return
+    end
+    res.content_type = "text/plain"
+  end
+
+  # FIXME: How about large messages?
+  def new(req, res)
+    Puke.secret and
+      req.path_info == "/#{Puke.secret}" or (res.status = 404 and return)
+
+    # Parse query string ourselves because req.query does not work for POST
+    # requests as for GET requests.
+    query = WEBrick::HTTPUtils.parse_query(req.query_string)
+    if format = query["format"]
+      is_raw = format == "raw"
+    else
+      is_raw = req.body[0, 5] != "From "
+    end
+    mids = Puke.create(StringIO.new(req.body), is_raw)
+    res.content_type = "text/plain"
+    res.body = "#{mids}\n"
+  end
+
+  def start
+    @server.start
+  end
+
+  def shutdown
+    @server.shutdown
+    @access_log.close
+  end
+
+  private
+
+  include ERB::Util
+
+  def fdate(time)
+    time.strftime("%Y-%m-%d %H:%M:%S %:z")
+  end
+
+  ROOT_TEMPLATE = <<'EOF'
+<html>
+<head>
+<title>random texts (page: <%=page%>)</title>
+<style>
+pre { whitespace: pre-wrap; }
+</style>
+</head>
+<body><h1>random texts (page: <%=page%>)</h1>
+<hr>
+<%threads.each do |ms| root, *child = ms%><pre>
+--<%=h fdate root[:date]%> <a href="/mid/<%=h root[:mid]%>"><%=h root[:subject]%></a> <a href="/mid/<%=h root[:mid]%>/thread.mbox">[thread.mbox]</a><%child.each do |m|%>
+ `<%=h fdate m[:date]%> <a href="/mid/<%=h m[:mid]%>"><%=h m[:subject]%></a><%end%></pre>
+<%end%>
+<hr>
+page: <%if page > 1%><a href="/?page=<%=page - 1%>">&lt;&lt; prev</a><%end%>
+<%if threads.last&.dig(0, :tid)&.> 0%><a href="/?page=<%=page + 1%>">next &gt;&gt;</a><%end%>
+</body>
+</html>
+EOF
+end
diff --git a/puke b/puke
new file mode 100755
index 0000000..959774f
--- /dev/null
+++ b/puke
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+require_relative "lib/puke"
+require "optparse"
+
+opts = {}
+OptionParser.new { |o|
+  o.on("-v", "--verbose") { opts[:verbose] = true }
+  o.on("-d", "--datadir DATADIR") { |v| opts[:datadir] = v }
+  o.on("-l", "--listen ADDRESS:PORT") { |v| opts[:listen] = v }
+  o.on("-s", "--secret SECRET") { |v| opts[:secret] = v }
+}.parse!(ARGV)
+ARGV.empty? or raise "unknown argument(s): %p" % ARGV
+
+opts[:datadir] or raise "-d (--datadir) option is required"
+opts[:listen] or raise "-l (--listen) option is required"
+
+# Setup Puke and verify the options
+Puke.setup(**opts)
+server = Puke::Server.new
+
+# Set exiting handler
+handler = proc {
+  server.shutdown
+  Puke.close
+}
+trap("INT", handler)
+trap("TERM", handler)
+
+# Start the WEBrick server
+server.start
diff --git a/puke-import b/puke-import
new file mode 100755
index 0000000..ed362df
--- /dev/null
+++ b/puke-import
@@ -0,0 +1,28 @@
+#!/usr/bin/env ruby
+require "uri"
+require "net/http"
+
+# usage: puke-import URI <mboxrd files...>
+#    or: cat mboxrd | puke-import URI
+#
+# URI is the url for importing an message.
+# E.g. 'http://localhost:8080/new/secret-string'
+#
+# TODO: mboxrd containing multiple emails
+
+uri = URI.parse(ARGV.shift)
+http = Net::HTTP.new(uri.host, uri.port)
+http.use_ssl = uri.scheme == "https"
+http.start {
+  ARGF.binmode
+  while true
+    io = ARGF.to_io
+    name = File === io ? io.path : io.inspect
+    print "Importing #{name}... "
+    req = http.post(uri.path, io.read,
+                    "Content-Type" => "application/octet-stream")
+    puts "done; mid=#{req.body}"
+    ARGF.skip
+    break if ARGF.to_io == io
+  end
+}
author	Kazuki Yamaguchi <k@rhe.jp>	2017-06-07 21:22:41 +0900
committer	Kazuki Yamaguchi <k@rhe.jp>	2017-06-08 12:47:47 +0900
commit	4d86351b6799db4338561ec84c5531ba14ce64b2 (patch)
tree	148136a0496fb6cf09ca039097b7ce9551370fb5
download	puke-4d86351b6799db4338561ec84c5531ba14ce64b2.tar.gz