Skip to content

Instantly share code, notes, and snippets.

@magthe
Last active August 29, 2015 14:07
Show Gist options
  • Save magthe/744cdda3b59dc9abaa06 to your computer and use it in GitHub Desktop.
Save magthe/744cdda3b59dc9abaa06 to your computer and use it in GitHub Desktop.
Bye bye WordPress
#! /usr/bin/env runhaskell
-- {{{1 imports
import Control.Arrow
import Data.List
import Data.Maybe
import Data.Time.Format
import Data.Time.LocalTime
import System.Directory
import System.Environment
import System.FilePath
import System.IO
import System.Locale
import Text.HTML.TagSoup
import Text.PrettyPrint.ANSI.Leijen hiding ((</>))
-- {{{1 tagsoup parsing
getAllItems :: [Tag String] -> [[Tag String]]
getAllItems = partitions (~== TagOpen "item" [])
getElemText :: String -> [Tag String] -> String
getElemText n = fromTagText . (!! 1) . dropWhile (~/= TagOpen n [])
getPostTitle = getElemText "title"
getPostId = getElemText "wp:post_id"
getPostDate = getElemText "wp:post_date_gmt"
getPostName = getElemText "wp:post_name"
getPostContent = getElemText "content:encoded"
getPostTags :: [Tag String] -> [String]
getPostTags item = map (fromTagText . (!! 1)) tags
where
tags = partitions (~== TagOpen "category" [("domain", "post_tag")]) item
getPostComments :: [Tag String] -> [[Tag String]]
getPostComments = partitions (~== TagOpen "wp:comment" [])
getCommentId = getElemText "wp:comment_id"
getCommentAuthor = getElemText "wp:comment_author"
getCommentDate = getElemText "wp:comment_date_gmt"
getCommentContent = getElemText "wp:comment_content"
-- {{{1 types
data BlogPost = BlogPost
{ bpId :: String
, bpTitle :: String
, bpDate :: ZonedTime
, bpName :: String
, bpContent :: String
, bpTags :: [String]
} deriving (Show)
instance Pretty BlogPost where
pretty bp = vsep ls
where
ls = [ text "---"
, text "id: " <> text (bpId bp)
, text "title: " <> text (bpTitle bp)
, text "date: " <> text (formatTime defaultTimeLocale "%F" $ bpDate bp)
, text "name: " <> text (bpName bp)
, text "tags: " <> text (intercalate ", " $ bpTags bp)
, text "..."
, empty
, text (bpContent bp)
]
data BlogComment = BlogComment
{ bcId :: String
, bcAuthor :: String
, bcDate :: ZonedTime
, bcContent :: String
, bcBlogPost :: BlogPost
} deriving (Show)
instance Pretty BlogComment where
pretty bc = vsep ls
where
ls = [ text "---"
, text "id: " <> text (bcId bc)
, text "author: " <> text (bcAuthor bc)
, text "date: " <> text (formatTime defaultTimeLocale "%F" $ bcDate bc)
, text "..."
, empty
, text (bcContent bc)
]
parseSinglePost i = (post, comments)
where
post = parsePost i
comments = map (parseComment post) $ getPostComments i
parsePost ts = BlogPost
{ bpId = getPostId ts
, bpTitle = getPostTitle ts
, bpDate = fromJust $ parseTime defaultTimeLocale "%F %T" (getPostDate ts)
, bpName = getPostName ts
, bpContent = getPostContent ts
, bpTags = getPostTags ts
-- , bpComments = cs
}
parseComment p ts = BlogComment
{ bcId = getCommentId ts
, bcAuthor = getCommentAuthor ts
, bcDate = fromJust $ parseTime defaultTimeLocale "%F %T" (getCommentDate ts)
, bcContent = getCommentContent ts
, bcBlogPost = p
}
postFileName bp = "posts" </> postDate ++ "-" ++ (bpId bp) ++ "-" ++ (bpName bp) <.> "mkd"
where
postDate = formatTime defaultTimeLocale "%F" $ bpDate bp
commentFileName bc = "comments" </> (takeBaseName $ postFileName $ bcBlogPost bc) ++ "-c" ++ (bcId bc) <.> "mkd"
-- {{{1 IO functions
readExportFile :: FilePath -> IO ([BlogPost], [BlogComment])
readExportFile fn = do
xml <- readFile fn
let xmlTags = parseTags xml
items = getAllItems xmlTags
postsNComments = second concat (unzip $ map parseSinglePost items)
return postsNComments
writePostsNComments (posts, comments) = do
createDirectoryIfMissing False "posts"
createDirectoryIfMissing False "comments"
mapM_ writeOnePost posts
mapM_ writeOneComment comments
where
writeOnePost p = do
h <- openFile (postFileName p) WriteMode
hPutDoc h $ pretty p
hClose h
writeOneComment c = do
h <- openFile (commentFileName c) WriteMode
hPutDoc h $ pretty c
hClose h
-- {{{1 main
main :: IO ()
main = do
[fn] <- getArgs
readExportFile fn >>= writePostsNComments
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment