Monday, January 28, 2008

Haskell Snippet: read CSV file, marshall into type

This listing shows how to open a file, extract the contents, split by the delimiter (regex is a little off) and then mashall into a datatype.


data PageURLFieldInfo = PageURLFieldInfo {
linkUrlField :: String,
aUrlField :: Integer,
blockquoteUrlField :: Integer,
divUrlField :: Integer,
h1UrlField :: Integer,
imgUrlField :: Integer,
pUrlField :: Integer,
strongUrlField :: Integer,
tableUrlField :: Integer
}
--
-- The info content file contains html document information.
-- It may not exist but should, also contains URL info.
readInfoContentFile :: String -> IO PageURLFieldInfo
readInfoContentFile extr_file = do
let extr_n = (length ".extract")
extr_path = take ((length extr_file) - extr_n) extr_file
info_file = extr_path ++ ".info"
-- Extract the file, in CSV format.
-- URL::|a::|b::|blockquote::|div::|h1::|h2::|i::|img::|p::|span::|strong::|table
csvtry <- try $ readFile info_file
-- Handler error
info <- case csvtry of
Left _ -> return defaultPageFieldInfo
Right csv -> do let csv_lst = splitRegex (mkRegex "\\s*[::|]+\\s*") csv
return PageURLFieldInfo {
linkUrlField = csv_lst !! 0,
aUrlField = read (csv_lst !! 1) :: Integer,
blockquoteUrlField = read (csv_lst !! 2) :: Integer,
divUrlField = read (csv_lst !! 3) :: Integer,
h1UrlField = read (csv_lst !! 4) :: Integer,
imgUrlField = read (csv_lst !! 5) :: Integer,
pUrlField = read (csv_lst !! 6) :: Integer,
strongUrlField = read (csv_lst !! 7) :: Integer,
tableUrlField = read (csv_lst !! 8) :: Integer
}
return info
-- End of File

No comments: