{-# LANGUAGE ViewPatterns #-}

-----------------------------------------------------------------------------
{- |
Module      :  Data.MBox
Copyright   :  (c) Gershom Bazerman, 2009; ported to Text by Alexander Jerneck, 2012
License     :  BSD 3 Clause
Maintainer  :  gershomb@gmail.com
Stability   :  experimental

Reads and writes mboxrd files as per <http://www.qmail.org/man/man5/mbox.html>.

This module uses Lazy Text pervasively, and should be able to operate as a streaming parser. That is to say, given a lazy stream of Text, and a streaming processing function, you should be able to analyze large mbox files in constant space.

-}
-------------------------------------------------------------------------

module Data.MBox (MBox, Message(..), Header, parseMBox, parseForward, parseDateHeader, showMessage, showMBox, getHeader, isID, isDate) where

import Prelude hiding (tail, init, last, minimum, maximum, foldr1, foldl1, (!!), read)
import Control.Arrow
import Data.Char
import Data.Maybe
import Data.Time
import Safe
import qualified Data.Text.Lazy as T
import qualified Data.Time.Locale.Compat as LC

type MBox = [Message]
data Message = Message {Message -> Text
fromLine :: T.Text, Message -> [Header]
headers :: [Header], Message -> Text
body :: T.Text} deriving (ReadPrec [Message]
ReadPrec Message
Int -> ReadS Message
ReadS [Message]
(Int -> ReadS Message)
-> ReadS [Message]
-> ReadPrec Message
-> ReadPrec [Message]
-> Read Message
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Message]
$creadListPrec :: ReadPrec [Message]
readPrec :: ReadPrec Message
$creadPrec :: ReadPrec Message
readList :: ReadS [Message]
$creadList :: ReadS [Message]
readsPrec :: Int -> ReadS Message
$creadsPrec :: Int -> ReadS Message
Read, Int -> Message -> ShowS
[Message] -> ShowS
Message -> String
(Int -> Message -> ShowS)
-> (Message -> String) -> ([Message] -> ShowS) -> Show Message
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Message] -> ShowS
$cshowList :: [Message] -> ShowS
show :: Message -> String
$cshow :: Message -> String
showsPrec :: Int -> Message -> ShowS
$cshowsPrec :: Int -> Message -> ShowS
Show)
type Header = (T.Text, T.Text)

-- | Reads a date header as a UTCTime
parseDateHeader :: T.Text -> Maybe UTCTime
parseDateHeader :: Text -> Maybe UTCTime
parseDateHeader Text
txt = [UTCTime] -> Maybe UTCTime
forall a. [a] -> Maybe a
listToMaybe ([UTCTime] -> Maybe UTCTime)
-> ([Maybe UTCTime] -> [UTCTime])
-> [Maybe UTCTime]
-> Maybe UTCTime
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe UTCTime] -> [UTCTime]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe UTCTime] -> Maybe UTCTime)
-> [Maybe UTCTime] -> Maybe UTCTime
forall a b. (a -> b) -> a -> b
$ (String -> Maybe UTCTime) -> [String] -> [Maybe UTCTime]
forall a b. (a -> b) -> [a] -> [b]
map String -> Maybe UTCTime
forall {t}. ParseTime t => String -> Maybe t
tryParse [String]
formats where
  header :: String
header = Text -> String
T.unpack Text
txt
  tryParse :: String -> Maybe t
tryParse String
f = TimeLocale -> String -> String -> Maybe t
forall t. ParseTime t => TimeLocale -> String -> String -> Maybe t
parseTime TimeLocale
LC.defaultTimeLocale String
f String
header
  formats :: [String]
formats =
    [ String
"%a, %_d %b %Y %T %z"
    , String
"%a, %_d %b %Y %T %Z"
    , String
"%a, %d %b %Y %T %z"
    , String
"%a, %d %b %Y %T %Z"
    , String
"%a, %_d %b %Y %T %z (%Z)"
    , String
"%a, %_d %b %Y %T %z (GMT%:-z)"
    , String
"%a, %_d %b %Y %T %z (UTC%:-z)"
    , String
"%a, %_d %b %Y %T %z (GMT%:z)"
    , String
"%a, %_d %b %Y %T %z (UTC%:z)"
    , String
"%A, %B %e, %Y %l:%M %p"
    , String
"%e %b %Y %T %z"
    ]

-- | Attempts to retrieve the contents of a forwarded message from an enclosing message.
parseForward :: Message -> Message
parseForward :: Message -> Message
parseForward origMsg :: Message
origMsg@(Message Text
f [Header]
_ Text
b) =
    case Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
drop Int
1 ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= String -> Text
T.pack String
"-----Original Message-----") (Text -> [Text]
T.lines Text
b) of
      [] -> Message
origMsg
      [Text]
xs -> Message -> [Message] -> Message
forall a. a -> [a] -> a
headDef Message
origMsg ([Message] -> Message)
-> ([Text] -> [Message]) -> [Text] -> Message
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Message]
parseMBox (Text -> [Message]) -> ([Text] -> Text) -> [Text] -> [Message]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
T.unlines ([Text] -> Message) -> [Text] -> Message
forall a b. (a -> b) -> a -> b
$ Text
fText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
xs

-- | Parses Text as an mbox file.
parseMBox :: T.Text -> MBox
parseMBox :: Text -> [Message]
parseMBox = [Text] -> [Message]
go ([Text] -> [Message]) -> (Text -> [Text]) -> Text -> [Message]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
    where
      go :: [Text] -> [Message]
go [] = []
      go (Text
x:[Text]
xs) = (Message -> [Message] -> [Message])
-> (Message, [Message]) -> [Message]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (:) ((Message, [Message]) -> [Message])
-> ([Text] -> (Message, [Message])) -> [Text] -> [Message]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> [Text] -> Message
readMsg Text
x ([Text] -> Message)
-> ([Text] -> [Message])
-> ([Text], [Text])
-> (Message, [Message])
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** [Text] -> [Message]
go) (([Text], [Text]) -> (Message, [Message]))
-> ([Text] -> ([Text], [Text])) -> [Text] -> (Message, [Message])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Bool) -> [Text] -> ([Text], [Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break ((String -> Text
T.pack String
"From ") Text -> Text -> Bool
`T.isPrefixOf`) ([Text] -> [Message]) -> [Text] -> [Message]
forall a b. (a -> b) -> a -> b
$ [Text]
xs
      readMsg :: T.Text -> [T.Text] -> Message
      readMsg :: Text -> [Text] -> Message
readMsg Text
x [Text]
xs = ([Header] -> Text -> Message) -> ([Header], Text) -> Message
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (Text -> [Header] -> Text -> Message
Message Text
x) (([Header], Text) -> Message)
-> ([Text] -> ([Header], Text)) -> [Text] -> Message
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Text] -> Text) -> ([Header], [Text]) -> ([Header], Text)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second ([Text] -> Text
T.unlines ([Text] -> Text) -> ([Text] -> [Text]) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
unquoteFrom)(([Header], [Text]) -> ([Header], Text))
-> ([Text] -> ([Header], [Text])) -> [Text] -> ([Header], Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> ([Header], [Text])
readHeaders ([Text] -> Message) -> [Text] -> Message
forall a b. (a -> b) -> a -> b
$ [Text]
xs
      readHeaders :: [T.Text] -> ([Header], [T.Text])
      readHeaders :: [Text] -> ([Header], [Text])
readHeaders [] = ([],[])
      readHeaders (Text
x:[Text]
xs)
          | Text -> Bool
T.null Text
x Bool -> Bool -> Bool
|| (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace Text
x Bool -> Bool -> Bool
|| Bool -> Bool
not ((Char -> Bool) -> Text -> Bool
T.any (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
':') Text
x) = ([],[Text]
xs)
          | Bool
otherwise = ([Header] -> [Header]) -> ([Header], [Text]) -> ([Header], [Text])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (((Text -> Text) -> Header -> Header
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (Text -> Text
T.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanHeader (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text -> Text
`T.append` Text
headerCont) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int64 -> Text -> Text
T.drop Int64
1) (Header -> Header) -> (Text -> Header) -> Text -> Header
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Header
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
':') (Text -> Header) -> Text -> Header
forall a b. (a -> b) -> a -> b
$ Text
x)Header -> [Header] -> [Header]
forall a. a -> [a] -> [a]
:) (([Header], [Text]) -> ([Header], [Text]))
-> ([Header], [Text]) -> ([Header], [Text])
forall a b. (a -> b) -> a -> b
$ [Text] -> ([Header], [Text])
readHeaders [Text]
xs'
          where (Text
headerCont, [Text]
xs') = ([Text] -> Text) -> ([Text], [Text]) -> (Text, [Text])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((String -> Text
T.pack String
" " Text -> Text -> Text
`T.append`) (Text -> Text) -> ([Text] -> Text) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
T.unlines ([Text] -> Text) -> ([Text] -> [Text]) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
T.strip) (([Text], [Text]) -> (Text, [Text]))
-> ([Text] -> ([Text], [Text])) -> [Text] -> (Text, [Text])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Bool) -> [Text] -> ([Text], [Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break Text -> Bool
notCont ([Text] -> (Text, [Text])) -> [Text] -> (Text, [Text])
forall a b. (a -> b) -> a -> b
$ [Text]
xs
                notCont :: T.Text -> Bool
                notCont :: Text -> Bool
notCont Text
s = Text -> Bool
doesNotStartSpace Text
s Bool -> Bool -> Bool
|| Text -> Bool
allSpace Text
s
                allSpace :: Text -> Bool
allSpace = (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace
                doesNotStartSpace :: Text -> Bool
doesNotStartSpace Text
s = case Text -> Int64
T.length Text
s of
                                        Int64
0 -> Bool
True
                                        Int64
_ -> Bool -> Bool
not (Char -> Bool
isSpace (Char -> Bool) -> Char -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Char
T.head Text
s)


      unquoteFrom :: T.Text -> T.Text
      unquoteFrom :: Text -> Text
unquoteFrom xs' :: Text
xs'@(Text -> Text -> Maybe Text
T.stripPrefix (String -> Text
T.pack String
">") -> Just Text
suf) = if (String -> Text
T.pack String
"From ") Text -> Text -> Bool
`T.isPrefixOf` (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') Text
suf
                                                                 then Text
suf
                                                                 else Text
xs'
      unquoteFrom Text
xs = Text
xs

sanHeader :: T.Text -> T.Text
sanHeader :: Text -> Text
sanHeader = Text -> Text -> Text -> Text
T.replace (String -> Text
T.pack String
"\n") (String -> Text
T.pack String
" ")

-- | Renders an MBox into Text
showMBox :: MBox -> T.Text
showMBox :: [Message] -> Text
showMBox = [Text] -> Text
T.concat ([Text] -> Text) -> ([Message] -> [Text]) -> [Message] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Message -> Text) -> [Message] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Message -> Text
showMessage

-- | Renders an individual message into Text.
showMessage :: Message -> T.Text
showMessage :: Message -> Text
showMessage (Message Text
f [Header]
hs Text
b) = [Text] -> Text
T.unlines ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ Text
f Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Header] -> [Text]
formatHeaders [Header]
hs [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ [(String -> Text
T.pack String
"\n")] [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ Text -> [Text]
formatBody Text
b
                               where
                                 formatHeaders :: [Header] -> [Text]
formatHeaders = (Header -> Text) -> [Header] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
x,Text
y) -> Text
x Text -> Text -> Text
`T.append` (String -> Text
T.pack String
": ") Text -> Text -> Text
`T.append` Text
y)
                                 formatBody :: Text -> [Text]
formatBody = (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
unFrom ([Text] -> [Text]) -> (Text -> [Text]) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
                                 unFrom :: Text -> Text
unFrom Text
x
                                     | Text -> Bool
isFrom Text
x = Char
'>' Char -> Text -> Text
`T.cons` Text
x
                                     | Bool
otherwise = Text
x
                                 isFrom :: Text -> Bool
isFrom Text
x = (String -> Text
T.pack String
"From ") Text -> Text -> Bool
`T.isPrefixOf` (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') Text
x

-- | Return True if header is a Message-ID header.
isID :: Header -> Bool
isID :: Header -> Bool
isID (Text
x, Text
_) = Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== String -> Text
T.pack String
"Message-ID"

-- | Return True if header is a Date header.
isDate :: Header -> Bool
isDate :: Header -> Bool
isDate (Text
x, Text
_) = Text
x Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== String -> Text
T.pack String
"Date"

-- | Return the values of headers for which predicate is True
getHeader :: (Header -> Bool) -> Message -> [T.Text]
getHeader :: (Header -> Bool) -> Message -> [Text]
getHeader Header -> Bool
predFunc = (Header -> Text) -> [Header] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Header -> Text
forall a b. (a, b) -> b
snd ([Header] -> [Text]) -> (Message -> [Header]) -> Message -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Header -> Bool) -> [Header] -> [Header]
forall a. (a -> Bool) -> [a] -> [a]
filter Header -> Bool
predFunc ([Header] -> [Header])
-> (Message -> [Header]) -> Message -> [Header]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Message -> [Header]
headers