{-# LANGUAGE TypeSynonymInstances, TemplateHaskell, QuasiQuotes, MultiParamTypeClasses, FlexibleInstances, DeriveDataTypeable, ScopedTypeVariables #-}

module Examples.GeneAssociation where
import Language.Pads.Padsc
import Language.Forest.Forestc hiding (sources)
import System.IO.Unsafe (unsafePerformIO)
import Language.Pads.GenPretty
import Language.Forest.Graph
{-
config_file = "/Users/kfisher/Sites/cgi-bin/PLConfig.pm"
(config_rep, config_md) :: (Config_f, Config_f_md) = unsafePerformIO $ parseFile config_file
(head_rep, head_md) :: (Header_t, Header_t_md) = unsafePerformIO $ parseFile config_file
-}

ws = RE "[ \t]+"
title = "gene_association"
get_gz_file f = title ++ "." ++ f ++ ".gz"
get_readme_file f = f ++ ".README"
get_conf_file f = title ++ "."  ++ f ++ ".conf"

{- each source is a pair (institute name, list of organisms the institute provides) -}
sources = [
	  ("Compugen", [])
	, ("GeneDB", ["Lmajor","Pfalciparum","Spombe","Tbrucei","tsetse"])
	, ("PAMGO", ["Atumefaciens","Ddadantii","Mgrisea","Oomycetes"])
	, ("aspgd", [])
	, ("cgd", [])
	, ("dictyBase", [])
	, ("ecocyc", [])
	, ("fb", [])
	, ("goa", ["arabidopsis","chicken","cow","human","mouse","pdb","rat","uniprot","uniprot_noiea","zebrafish"])
	, ("gramene", ["oryza"])
	, ("jcvi", ["Aphagocytophilum","Banthracis","Cburnetii","Chydrogenoformans","Cjejuni","Cperfringens",
		    "Cpsychrerythraea","Dethenogenes","Echaffeensis","Gsulfurreducens","Hneptunium","Lmonocytogenes",
		    "Mcapsulatus","Nsennetsu","Pfluorescens","Psyringae","phaseolicola","Soneidensis","Spomeroyi",
		    "Vcholerae"])
	, ("mgi", [])
	, ("pseudocap", [])
	, ("reactome", [])
	, ("rgd", [])
	, ("sgd", [])
	, ("sgn", [])
	, ("tair", [])
	, ("wb", [])
	, ("zfin", []) ]


comb_source [] = []
comb_source ((inst, organs):sources) = 
   let cl = case organs of
	  [] -> [inst]
	  _ -> map (\organism -> inst ++ "_" ++ organism) organs
   in cl ++ (comb_source sources) 

{- the GO files, when unzipped, contain a header like the following:
!CVS Version: Revision: 1.19 $
!GOC Validation Date: 01/27/2007 $
!Submission Date: 1/15/2007
-}

[pads|
  type Pfloat = (Int, '.', Int)
  data Pdate = Pdate {mon :: Int, '/', day :: Int, '/', year :: Int}
  type Purl = ("http://", StringLn)
  type Version_t =    	("!CVS Version: Revision: ", Pfloat, ws, '$')
  type Valid_date_t = 	("!GOC Validation Date: ", Pdate, ws, '$')
  type Sub_date_t =   	("!Submission Date: ", Pdate)
  type Project_name_t = ("!Project_name: ", StringLn)
  type URL_t =		("!URL: ", Purl)
  type Email_t =	("!Contact Email: ", StringLn)
  type Funding_t =	("!Funding: ", StringLn)
  type Gaf_ver_t =	("!gaf-version: ", Pfloat)
  type Organism_t =  	("!organism:", ws, StringLn)
  type Date_t =		("date:", ws, Pdate)
  type Note_t =	('!', ws, StringLn)
 
  data Header_line_t = 
  	  Version Version_t
	| Valid_date Valid_date_t
	| Sub_date Sub_date_t
	| Project_name Project_name_t
	| URL URL_t
	| Email Email_t
	| Funding Funding_t
	| Gaf_ver Gaf_ver_t
	| Organism Organism_t
	| Date Date_t
	| Note Note_t
	| Other ('!', StringLn)
  type Other_line_t = StringLn
 
  data GA_f = GA_f ([Line Header_line_t], [Line Other_line_t] terminator EOF)
|]

[pads|
  data Pair_t = Pair_t {key::StringC '=', '=', val::StringLn}
  data Conf_f = Conf_f ([Line Pair_t] terminator EOF)
|]

[pads|
  type Xml_header = ("<?xml ", StringLn)
  data XML_f = XML_f (Line Xml_header, [Line StringLn])
|]

{- isReadOnly md = get_modes md == "-rw-r--r--" -}

[forest|
  type Readme_d = Directory {
    readmes is [rm :: Maybe TextFile | rm <- <|map get_readme_file (comb_source sources)|>]
  }

  type PTHR_d (name :: String)  = Directory {
   attr is  <| name ++ ".save.attr" |>  :: TextFile,
   gaf  is  <| name ++ ".save.gaf" |>   :: TextFile,
   msa  is  <| name ++ ".save.msa" |>   :: TextFile,
   paint is <| name ++ ".save.paint" |> :: File XML_f,
   sfan is  <| name ++ ".save.sfan" |>  :: TextFile,
   tree is  <| name ++ ".save.tree" |>  :: TextFile,
   txt  is  <| name ++ ".save.txt" |>   :: TextFile, 
   wts  is  <| name ++ ".save.txt" |>   :: TextFile
  }

  type Pre_sub_d = Directory {
    pre_gz_files   is [gz   :: Maybe (Gzip (File GA_f)) | gz   <- <|map get_gz_file   (comb_source sources)|>],
    pre_conf_files is [conf :: Maybe (File Conf_f)      | conf <- <|map get_conf_file (comb_source sources)|>]
  }

  type Paint_d = Directory {
    pthr_dirs is [dir_name :: PTHR_d (dir_name) | dir_name <- matches <| RE "PTHR[0-9]+" |> ],
    pre_sub   is "pre-submission" :: Pre_sub_d
  }
 
  type Submission_d = Directory {
    gz_files    is  [gz   :: Maybe (Gzip (File GA_f)) | gz   <- <|map get_gz_file   (comb_source sources)|>],
    conf_files  is  [conf :: Maybe (File Conf_f)      | conf <- <|map get_conf_file (comb_source sources)|>],
    paint_files is  [cs   :: Maybe (File Conf_f)      | cs   <- <|map (\x -> get_conf_file ("paint" ++ x)) (comb_source sources)|>], 
    paint_d     is  "paint"               :: Paint_d
  }

  type Top_d = Directory {
    data_files is [gz :: Maybe (Gzip (File GA_f)) | 
			gz <- <|map get_gz_file (comb_source sources)|>],
    readme     is "readme"             :: Readme_d,
    sub        is "submission"         :: Submission_d
  }
|]

doImg = do
 { (rep,md) <- top_d_load "Data/ga"
 ; mdToPDF md "Examples/ga.pdf"
 }