Skip to content

Instantly share code, notes, and snippets.

@Venomtek
Forked from erantapaa/process-hoogle.hs
Created July 16, 2024 03:07
Show Gist options
  • Save Venomtek/63bec606ea1b5dd694f293b10a256756 to your computer and use it in GitHub Desktop.
Save Venomtek/63bec606ea1b5dd694f293b10a256756 to your computer and use it in GitHub Desktop.
processing a .tar.gz file
{-# LANGUAGE OverloadedStrings #-}
-- build-depends: tar, bytestring, zlib
-- Example of how to iterate through a .tar.gz file
module Lib
where
import qualified Codec.Archive.Tar as Tar
import Codec.Archive.Tar
import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Codec.Compression.GZip as GZip
import Control.Monad
import Data.Monoid
import System.Environment
import Data.List
someFunc :: IO ()
someFunc = putStrLn "someFunc"
test1 = do
contents <- fmap GZip.decompress $ LBS.readFile "hoogle.tar.gz"
let entries = Tar.read contents
files = selectFiles entries
forM_ files $ \entry-> do
let ePath = entryPath entry
NormalFile bytes size = entryContent entry
-- putStrLn $ show size ++ " " ++ ePath
processFile bytes
return ()
selectFiles :: Entries FormatError -> [Entry]
selectFiles Done = []
selectFiles (Fail e) = error $ "some format error: " ++ show e
selectFiles (Next entry next)
= case entryContent entry of
NormalFile _ _ -> entry : selectFiles next
_ -> selectFiles next
processFile bytes = do
let lns = LBS.lines bytes
forM_ lns processLine
processLine str
| LBS.isPrefixOf "@package " str = emit
| LBS.isPrefixOf "@version " str = emit
| LBS.isPrefixOf "module " str = emit
| otherwise = return ()
where emit = LBS.putStrLn str
----------
processTarArchive path = do
contents <- fmap GZip.decompress $ LBS.readFile path
let entries = Tar.read contents
files = selectFiles entries
forM_ files $ \entry-> do
let ePath = entryPath entry
NormalFile bytes size = entryContent entry
-- putStrLn $ show size ++ " " ++ ePath
processFile2 ("???", "???") (LBS.lines bytes)
return ()
processTxtFile path = do
contents <- LBS.readFile path
processFile2 ("???", "???") (LBS.lines contents)
processFile2 (package, version) [] = return ()
processFile2 (package, version) (a:as) =
case LBS.words a of
("@package" : pkg : _) -> processFile2 (pkg, version) as
("@version" : vers : _) -> processFile2 (package, vers) as
("module" : mod : _) -> do LBS.putStrLn $ mod <> " " <> package <> " " <> version
processFile2 (package, version) as
_ -> processFile2 (package, version) as
processArg path
| isSuffixOf ".tar.gz" path = processTarArchive path
| isSuffixOf ".txt" path = processTxtFile path
| otherwise = error $ "unknown file type: " ++ path
main = getArgs >>= mapM_ processArg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment