aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKazuki Yamaguchi <k@rhe.jp>2017-06-11 02:47:44 +0900
committerKazuki Yamaguchi <k@rhe.jp>2017-06-11 02:47:44 +0900
commit7081fd664273894c1325a024f52e6450697d3646 (patch)
tree8da8a0012d89e92d896844615e629d5e5df96ec2
parent9c1fff85f1a01bbd30429c0be20aaa9f60ca90ca (diff)
downloadpuke-7081fd664273894c1325a024f52e6450697d3646.tar.gz
Improve thread detection logic
-rw-r--r--lib/puke/core.rb104
-rw-r--r--lib/puke/server.rb8
2 files changed, 62 insertions, 50 deletions
diff --git a/lib/puke/core.rb b/lib/puke/core.rb
index 23fb429..b4c2015 100644
--- a/lib/puke/core.rb
+++ b/lib/puke/core.rb
@@ -44,13 +44,16 @@ module Puke
end
def tid_for(mid)
- Puke.metadata(mid)&.fetch(:tid)
+ m = Puke.metadata(mid) and m[:subject] and m[:tid]
end
+ # Note that the value for key :subject may be nil if we don't have the
+ # actual message
def metadata(mid)
data = @gdbm["m#{mid}"] or return
- tid, subject, date = data.split("\0")
- { mid: mid, tid: tid.to_i, subject: subject, date: Time.rfc2822(date) }
+ tid, date, subject = data.split("\0")
+ date = Time.rfc2822(date)
+ { mid: mid, tid: tid.to_i, date: date, subject: subject }
end
def open(mid, &blk)
@@ -59,36 +62,47 @@ module Puke
CreateMutex = Thread::Mutex.new
def create0(file)
- mid, tid, subject, date = parse_file(file)
- fpath = File.join(datadir, mangle_mid(mid) + ".mbox")
+ mid, subject, date, references, in_reply_to = parse_file(file)
- # A mutex is used here in case two threads try to store the same message
CreateMutex.synchronize {
- # File.exist?(fpath) is needed here because of the mangling...
- if existing = Puke.metadata(mid) or File.exist?(fpath)
- if existing && (!tid || tid == existing[:tid]) &&
- subject == existing[:subject] && date == existing[:date]
- raise "duplicate message"
- else
- # TODO: Oops, how to deal with this case?
- raise "duplicate Message-ID; subject=#{existing[:subject]} and " \
- "subject=#{subject}"
- end
+ if existing = Puke.metadata(mid) and existing[:subject]
+ # TODO: Found a non-dummy existing entry. How to deal with this case?
+ raise "duplicate Message-ID; existing=#{existing[:subject]} and " \
+ "new=#{subject}"
end
- unless tid
- tid = last_tid + 1
- @gdbm["last-tid"] = tid.to_s
- @gdbm["t#{tid}"] = mid
+ tid = existing[:tid] if existing
+ tid = Puke.metadata(in_reply_to)&.fetch(:tid) if !tid && in_reply_to
+ references.each { |i|
+ tid = Puke.metadata(i)&.fetch(:tid) and break
+ } if !tid
+ if tid
+ # Yucks
+ new_mids = Puke.thread(tid)
+ .reject { |m| m == mid }
+ .map { |m| Puke.metadata(m).values_at(:date, :subject, :mid) }
+ .push([date, subject, mid])
+ .sort
+ .map { |i| i.last }
else
- # TODO: Hmph
- l = Puke.thread(tid).map { |m| Puke.metadata(m) } << {
- mid: mid, date: date, subject: subject }
- l.sort_by! { |i| [i[:date], i[:subject]] }
- @gdbm["t#{tid}"] = l.map { |i| i[:mid] }.join("\0")
+ tid = Puke.last_tid + 1
+ @gdbm["last-tid"] = tid.to_s
+ new_mids = [mid]
end
- @gdbm["m#{mid}"] = [tid, subject, date.rfc2822].join("\0")
- File.link(file, fpath)
+
+ [references.first, in_reply_to].compact.each do |i|
+ next if new_mids.include?(i)
+ if Puke.metadata(i)
+ # Hmm, the possible-parent message belongs to another thread.
+ # TODO: Do we want to unite them?
+ else
+ new_mids.unshift(i)
+ @gdbm["m#{i}"] = [tid, Time.utc(0).rfc2822].join("\0")
+ end
+ end
+ @gdbm["t#{tid}"] = new_mids.join("\0")
+ @gdbm["m#{mid}"] = [tid, date.rfc2822, subject].join("\0")
+ File.link(file, File.join(datadir, mangle_mid(mid) + ".mbox"))
}
mid
end
@@ -148,7 +162,8 @@ module Puke
def parse_file(io)
# We can assume the input is in mboxrd format
mid = nil
- parents = []
+ references = []
+ in_reply_to = nil
subject = nil
date = nil
@@ -167,7 +182,7 @@ module Puke
when :subject
subject << line.chomp
when :references
- parents.concat(line.scan(/(<[^<>]+>)/).flatten)
+ references.concat(line.scan(/(<[^<>]+>)/).flatten)
end
else
h, c = line.split(":", 2)
@@ -184,10 +199,11 @@ module Puke
mid = c
state = nil
when "in-reply-to"
- parents.unshift(c)
+ next if in_reply_to
+ in_reply_to = c
state = nil
when "references"
- parents.concat(c.scan(/(<[^<>]+>)/).flatten)
+ references.concat(c.scan(/(<[^<>]+>)/).flatten)
state = :references
when "subject"
next if subject
@@ -200,29 +216,23 @@ module Puke
end
mid or raise "Message-ID not found"
- # Strip < and >
- /\A<(?<imid>.+@.+)>\z/ =~ mid and mid.ascii_only? or
- raise "Message-ID is invalid"
- mid = imid
+ mid = strip_angle_bracket(mid) or raise "Message-ID is invalid"
subject or raise "Subject not found"
date or raise "Date not found"
date = Time.rfc2822(date)
+ # Ignore errors
+ in_reply_to and in_reply_to = strip_angle_bracket(in_reply_to)
+ references.map! { |t| strip_angle_bracket(t) }.compact!
- # TODO: Implement saner threading algorithm
- tid = nil
- parents.each do |pmid|
- # Strip < and >
- /\A<(?<ipmid>.+@.+)>\z/ =~ pmid and pmid.ascii_only? or next
- pmid = ipmid
- item = @gdbm["m#{pmid}"] or next
- tid, = item.split("\0")
- break
- end
+ [mid, subject, date, references, in_reply_to]
+ end
- [mid, tid, subject, date]
+ def strip_angle_bracket(t)
+ /\A<(?<i>.+@.+)>\z/ =~ t and i.ascii_only? or return
+ i
end
def mangle_mid(mid)
- mid.tr("/", "_")
+ mid.gsub(/[\/_]/, { "/" => "_", "_" => "__" })
end
end
diff --git a/lib/puke/server.rb b/lib/puke/server.rb
index 719b19d..9808c5a 100644
--- a/lib/puke/server.rb
+++ b/lib/puke/server.rb
@@ -29,9 +29,11 @@ class Puke::Server
if last_tid >= 0
b = (last_tid-page*THREADS_PER_PAGE+1).clamp(0, last_tid)
e = (last_tid-(page-1)*THREADS_PER_PAGE).clamp(0, last_tid)
- threads = e.downto(b)
- .map { |tid| Puke.thread(tid) }
- .map { |mids| mids.map { |mid| Puke.metadata(mid) } }
+ threads = e.downto(b).map { |tid|
+ Puke.thread(tid)
+ .map { |mid| Puke.metadata(mid) }
+ .select { |m| m[:subject] } # Non-dummy messages
+ }
else
threads = []
end