diff options
Diffstat (limited to 'lib/puke/core.rb')
-rw-r--r-- | lib/puke/core.rb | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/lib/puke/core.rb b/lib/puke/core.rb new file mode 100644 index 0000000..c37b17a --- /dev/null +++ b/lib/puke/core.rb @@ -0,0 +1,190 @@ +require "gdbm" +require "fileutils" +require "tempfile" +require "time" + +module Puke + class << self + attr_reader :datadir, :listen_address, :listen_port, :secret + def verbose?() @verbose end + end + + module_function + + def setup(verbose: false, datadir:, listen:, secret: nil) + @verbose = verbose + @datadir = datadir + l = listen.split(":") + @listen_address = l[0] + @listen_port = Integer(l[1]) + @secret = secret + + FileUtils.mkdir_p(@datadir) + FileUtils.mkdir_p(File.join(@datadir, "tmp")) + gpath = File.join(@datadir, "puke.gdbm") + @gdbm = GDBM.new(gpath, 0644, GDBM::WRCREAT) + end + + def close + @gdbm.close + @gdbm = :closed + end + + def tmpdir + File.join(datadir, "tmp") + end + + # Returns the largest tid ever, or -1 if no messages are stored + def last_tid + @gdbm.fetch("last-tid", "-1").to_i + end + + def thread(tid) + @gdbm.fetch("t#{tid}").split("\0") + end + + def mid?(mid) + @gdbm["m#{mid}"]&.split("\0")&.first + end + + def metadata(mid) + tid, subject, date = @gdbm["m#{mid}"].split("\0") + { mid: mid, tid: tid.to_i, subject: subject, date: Time.rfc2822(date) } + end + + def read(mid) + File.read(File.join(datadir, mangle_mid(mid) + ".mbox")) + end + + CreateMutex = Thread::Mutex.new + def create(io, is_raw = false) + t = Tempfile.open("unnamed", tmpdir) + if is_raw + # Is this really correct?? + t << "From mboxrd Thu Jan 1 00:00:00 1970\n" + io.each_line do |line| + t << line.gsub(/\A(>*)From /, "\\1>From ").gsub(/\r\n\z/, "\n") + end + t << "\n" + else + IO.copy_stream(io, t) + end + t.rewind + mid, tid, subject, date = parse_file(t) + fpath = File.join(datadir, mangle_mid(mid) + ".mbox") + + # A mutex is used here in case two threads try to store the same message + CreateMutex.synchronize { + # File.exist?(fpath) is needed here because of the mangling... + if Puke.mid?(mid) or File.exist?(fpath) + existing = Puke.metadata(mid) + if (!tid || tid == existing[:tid]) && subject == existing[:subject] && + date == existing[:date] + raise "duplicate message" + else + # TODO: Oops, how to deal with this case? + raise "duplicate Message-ID; subject=#{existing[:subject]} and " \ + "subject=#{subject}" + end + end + + unless tid + tid = last_tid + 1 + @gdbm["last-tid"] = tid.to_s + @gdbm["t#{tid}"] = mid + else + # TODO: Hmph + l = Puke.thread(tid).map { |m| Puke.metadata(m) } << { + mid: mid, date: date, subject: subject } + l.sort_by! { |i| [i[:date], i[:subject]] } + @gdbm["t#{tid}"] = l.map { |i| i[:mid] }.join("\0") + end + @gdbm["m#{mid}"] = [tid, subject, date.rfc2822].join("\0") + File.link(t, fpath) + } + mid + ensure + t and t.close! + end + + def parse_file(io) + # We can assume the input is in mboxrd format + mid = nil + parents = [] + subject = nil + date = nil + + state = nil + io.each_line do |line| + # We use \0 as a delimiter so don't allow that + line.delete("\0") + + if line == "\n" + # End of headers + break + elsif line.start_with?(" ") || line.start_with?("\t") + case state + when nil + # This is invalid and must not happen... + when :subject + subject << line.chomp + when :references + parents.concat(line.scan(/(<[^<>]+>)/).flatten) + end + else + h, c = line.split(":", 2) + h.downcase! + c or next # Hmm, invalid header? + c.strip! + + case h + when "date" + date = c + state = nil + when "message-id" + next if mid + mid = c + state = nil + when "in-reply-to" + parents.unshift(c) + state = nil + when "references" + parents.concat(c.scan(/(<[^<>]+>)/).flatten) + state = :references + when "subject" + next if subject + subject = c + state = :subject + else + state = nil + end + end + end + + mid or raise "Message-ID not found" + # Strip < and > + /\A<(?<imid>.+@.+)>\z/ =~ mid and mid.ascii_only? or + raise "Message-ID is invalid" + mid = imid + subject or raise "Subject not found" + date or raise "Date not found" + date = Time.rfc2822(date) + + # TODO: Implement saner threading algorithm + tid = nil + parents.each do |pmid| + # Strip < and > + /\A<(?<ipmid>.+@.+)>\z/ =~ pmid and pmid.ascii_only? or next + pmid = ipmid + item = @gdbm["m#{pmid}"] or next + tid, = item.split("\0") + break + end + + [mid, tid, subject, date] + end + + def mangle_mid(mid) + mid.tr("/", "_") + end +end |