require "gdbm" require "fileutils" require "tempfile" require "time" module Puke class << self attr_reader :datadir, :listen_address, :listen_port, :secret def verbose?() @verbose end end module_function def setup(verbose: false, datadir:, listen:, secret: nil) @verbose = verbose @datadir = datadir l = listen.split(":") @listen_address = l[0] @listen_port = Integer(l[1]) @secret = secret FileUtils.mkdir_p(@datadir) FileUtils.mkdir_p(File.join(@datadir, "tmp")) gpath = File.join(@datadir, "puke.gdbm") @gdbm = GDBM.new(gpath, 0644, GDBM::WRCREAT) end def close @gdbm.close @gdbm = :closed end def tmpdir File.join(datadir, "tmp") end # Returns the largest tid ever, or -1 if no messages are stored def last_tid @gdbm.fetch("last-tid", "-1").to_i end def thread(tid) @gdbm.fetch("t#{tid}").split("\0") end def tid_for(mid) Puke.metadata(mid)&.fetch(:tid) end def metadata(mid) data = @gdbm["m#{mid}"] or return tid, subject, date = data.split("\0") { mid: mid, tid: tid.to_i, subject: subject, date: Time.rfc2822(date) } end def open(mid, &blk) File.open(File.join(datadir, mangle_mid(mid) + ".mbox"), &blk) end CreateMutex = Thread::Mutex.new def create0(file) mid, tid, subject, date = parse_file(file) fpath = File.join(datadir, mangle_mid(mid) + ".mbox") # A mutex is used here in case two threads try to store the same message CreateMutex.synchronize { # File.exist?(fpath) is needed here because of the mangling... if existing = Puke.metadata(mid) or File.exist?(fpath) if existing && (!tid || tid == existing[:tid]) && subject == existing[:subject] && date == existing[:date] raise "duplicate message" else # TODO: Oops, how to deal with this case? raise "duplicate Message-ID; subject=#{existing[:subject]} and " \ "subject=#{subject}" end end unless tid tid = last_tid + 1 @gdbm["last-tid"] = tid.to_s @gdbm["t#{tid}"] = mid else # TODO: Hmph l = Puke.thread(tid).map { |m| Puke.metadata(m) } << { mid: mid, date: date, subject: subject } l.sort_by! { |i| [i[:date], i[:subject]] } @gdbm["t#{tid}"] = l.map { |i| i[:mid] }.join("\0") end @gdbm["m#{mid}"] = [tid, subject, date.rfc2822].join("\0") File.link(file, fpath) } mid end def create(io, is_raw = false) if is_raw Tempfile.create("unnamed", tmpdir) { |t| t.binmode # Is this really correct?? t << "From mboxrd Thu Jan 1 00:00:00 1970\n" x = nil io.each_line do |line| x = line[-1] == "\n" line.gsub!(/\A(>*)From /, "\\1>From ") line.gsub!(/\r\n\z/, "\n") t << line end if x t << "\n\n" else t << "\n" end t.rewind create0(t) } else # io is already in mboxrd format and may contain multiple messages mids = [] begin t = nil io.each_line do |line| # Beginning of a new message if line.start_with?("From ") if t t.rewind mids << create0(t) t.close! end t = Tempfile.open("unnamed", tmpdir) t.binmode end raise "invalid mbox header" unless t t << line end if t t.rewind mids << create0(t) t.close! end ensure t and not t.closed? and t.close! end mids end end def parse_file(io) # We can assume the input is in mboxrd format mid = nil parents = [] subject = nil date = nil state = nil io.each_line do |line| # We use \0 as a delimiter so don't allow that line.delete("\0") if line == "\n" # End of headers break elsif line.start_with?(" ") || line.start_with?("\t") case state when nil # This is invalid and must not happen... when :subject subject << line.chomp when :references parents.concat(line.scan(/(<[^<>]+>)/).flatten) end else h, c = line.split(":", 2) h.downcase! c or next # Hmm, invalid header? c.strip! case h when "date" date = c state = nil when "message-id" next if mid mid = c state = nil when "in-reply-to" parents.unshift(c) state = nil when "references" parents.concat(c.scan(/(<[^<>]+>)/).flatten) state = :references when "subject" next if subject subject = c state = :subject else state = nil end end end mid or raise "Message-ID not found" # Strip < and > /\A<(?.+@.+)>\z/ =~ mid and mid.ascii_only? or raise "Message-ID is invalid" mid = imid subject or raise "Subject not found" date or raise "Date not found" date = Time.rfc2822(date) # TODO: Implement saner threading algorithm tid = nil parents.each do |pmid| # Strip < and > /\A<(?.+@.+)>\z/ =~ pmid and pmid.ascii_only? or next pmid = ipmid item = @gdbm["m#{pmid}"] or next tid, = item.split("\0") break end [mid, tid, subject, date] end def mangle_mid(mid) mid.tr("/", "_") end end