espial/src/Handler/Archive.hs
Jon Schoning 2e3e7097e6 init
2019-01-30 20:54:47 -06:00

107 lines
3.9 KiB
Haskell

module Handler.Archive where
import Import
import Data.Function ((&))
import qualified Data.Attoparsec.ByteString.Char8 as AP
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString.Char8 as BS8
import qualified Network.HTTP.Client as NH
import qualified Network.HTTP.Client.TLS as NH
import qualified Network.HTTP.Types.Status as NH
import qualified Web.FormUrlEncoded as WH
import qualified Control.Monad.Metrics as MM
shouldArchiveBookmark :: User -> Key Bookmark -> Handler Bool
shouldArchiveBookmark user kbid = do
runDB (get kbid) >>= \case
Nothing -> pure False
Just bm -> do
pure $
(isNothing $ bookmarkArchiveHref bm) &&
(bookmarkShared bm)
&& not (_isArchiveBlacklisted bm)
&& not (userPrivacyLock user)
&& userArchiveDefault user
archiveBookmarkUrl :: Key Bookmark -> String -> Handler ()
archiveBookmarkUrl kbid url =
(_fetchArchiveSubmitInfo >>= \case
Left e -> do
MM.increment "archive.fetchSubmitId_noparse"
$(logError) (pack e)
Right submitInfo -> do
userId <- requireAuthId
let req = _buildArchiveSubmitRequest submitInfo url
MM.increment "archive.submit"
res <- liftIO $ NH.httpLbs req =<< NH.getGlobalManager
let status = NH.responseStatus res
MM.increment ("archive.submit_status_" <> (pack.show) (NH.statusCode status))
let updateArchiveUrl = runDB . updateBookmarkArchiveUrl userId kbid . Just
headers = NH.responseHeaders res
case status of
s | s == NH.status200 ->
for_ (lookup "Refresh" headers >>= _parseRefreshHeaderUrl) updateArchiveUrl
s | s == NH.status302 ->
for_ (lookup "Location" headers) (updateArchiveUrl . decodeUtf8)
_ -> $(logError) (pack (show res)))
`catch` (\(e::SomeException) -> ($(logError) $ (pack.show) e) >> throwIO e)
_isArchiveBlacklisted :: Bookmark -> Bool
_isArchiveBlacklisted (Bookmark {..}) =
[ "hulu"
, "livestream"
, "netflix"
, "skillsmatter"
, "twitch.tv"
, "vimeo"
, "youtu.be"
, "youtube"
, "archive."
] &
any (`isInfixOf` bookmarkHref)
_parseRefreshHeaderUrl :: ByteString -> Maybe Text
_parseRefreshHeaderUrl h = do
let u = BS8.drop 1 $ BS8.dropWhile (/= '=') h
if (not (null u))
then Just $ decodeUtf8 u
else Nothing
_buildArchiveSubmitRequest :: (String, String) -> String -> NH.Request
_buildArchiveSubmitRequest (action, submitId) href =
NH.parseRequest_ ("POST " <> action) & \r ->
r { NH.requestHeaders =
[ ("User-Agent", _archiveUserAgent)
, ("Content-Type", "application/x-www-form-urlencoded")
]
, NH.requestBody = NH.RequestBodyLBS $ WH.urlEncodeAsForm ((
[ ("submitid" , submitId)
, ("url", href)
]) :: [(String, String)])
, NH.redirectCount = 0
}
_fetchArchiveSubmitInfo :: Handler (Either String (String , String))
_fetchArchiveSubmitInfo = do
MM.increment "archive.fetchSubmitId"
res <- liftIO $ NH.httpLbs buildSubmitRequest =<< NH.getGlobalManager
MM.increment ("archive.fetchSubmitId_status_" <> (pack.show) (NH.statusCode (NH.responseStatus res)))
let body = LBS.toStrict (responseBody res)
action = _parseSubstring (AP.string "action=\"") (AP.notChar '"') body
submitId = _parseSubstring (AP.string "submitid\" value=\"") (AP.notChar '"') body
pure $ (,) <$> action <*> submitId
where
buildSubmitRequest =
NH.parseRequest_ "https://archive.li/" & \r ->
r {NH.requestHeaders = [("User-Agent", _archiveUserAgent)]}
_archiveUserAgent :: ByteString
_archiveUserAgent = "espial"
_parseSubstring :: AP.Parser ByteString -> AP.Parser Char -> BS.ByteString -> Either String String
_parseSubstring start inner res = do
(flip AP.parseOnly) res (skipAnyTill start >> AP.many1 inner)
where
skipAnyTill end = go where go = end *> pure () <|> AP.anyChar *> go