#!/usr/bin/env icarus
# BCCM_GENECORNER Plasmid Collection - Syntax file
#
# Version 2018.08.23.01
# Created August 23, 2018
# Derived from bccm_lmbp.is
# Author: Paolo Romano
# (c) CABRI 1999-2023

$fn={
Collection_number:coll_no
Name:name
Type:type
Literature:lit
Related_literature:rlt
History_of_deposit:history
Restricted_distribution:rest_distrib
Host_for_distribution:host_for_dist
Selectable_phenotype:sel_pheno
Replicon:replicon
Host_range:host_range
Promoter:promoter
Cloned_gene:cloned_gene
Medium:medium
Other_culture_collection_numbers:oth_cul_col_no
Class:class
Ribosome_binding_site:ribo_bind_site
Further_information:further_inf
Sequence_detail:seq_det
Terminator:termi
External_links:ext_link
}

$rules={
  entry:	~ {$In:[file:text] $Out pre $Skip:0}
		  ('Collection_number' {$Not} ln)*
		  ('Collection_number' {$entryFip=$Fip $Wrt} ln {$App}
		  ('Collection_number' {$Not} ln {$App})*)?  ~

# fields
  fields:	~ {$In:entry $Out $Skip:1}
                  (/\/[^\n]+\n/ {$Wrt} |
                  tag {$Wrt:$fn.$Ct} (/[\/A-Z]/ {$Not} ln {$App})+)+ ~

#indexing
  coll_no:	~ {$In:[fields c:coll_no] $Out}
		  tag ' ' ( word3 {$Wrt} | punct ) ~

  name:		~ {$In:[fields c:name] $Out}
		  tag ln {$Wrt:[s:$StrRep:[s:$Ct r:"[\/|\.|-|,]" to:""]]} ~
 
  medium:	~ {$In:[fields c:medium] $Out}
		  tag /[^\n]+/{$Uniq:$Itc} ~

  oth_cul_col_no: ~ {$In:[fields c:oth_cul_col_no] $Out}
                    tag ( ' -' | code {$Wrt} ( '; ' code {$Wrt} )* ) ~

  seq_det:	~ {$In:[fields c:seq_det] $Out}
		  tag (seq_str {$Uniq:Itc} | seq_punct )+ ~
  
#  rest_sites:	~ {$In:[fields c:rest_sites] $Out}
#		  tag (httag | rsit_str {$Uniq:Itc} | rsit_punct | '-')+ ~

  lit:		~ {$In:[fields c:lit] $Out}
		  bib_tag
                  (bib_word {$Uniq:$Itc} | bib_vol | bib_punct | ' ')*
                    ('[PMID: ' pmid {$Uniq:$Itc} ']')? ~

  rlt:		~ {$In:[fields c:rlt] $Out}
		  relbib_tag
                  (bib_word {$Uniq:$Itc} | bib_vol | bib_punct | ' ')*
                    ('[PMID: ' pmid {$Uniq:$Itc} ']')? ~

# ext_link:	~ {$In:[fields c:ext_link] $Out}
#		  tag ' '? 
#		    'map' ' '? (('Not available'|ext_links{$Wrt})
#                                (', ' embl {$Wrt})?) ~
#
  ext_link:	~ {$In:[fields c:ext_link] $Out}
		  tag ' '? 
		     ( ( 'map' ' '? ('Not available'|ext_links{$Wrt}) )*
                       (', EMBL ' emblno {$Wrt} ( ' ' emblno {$Wrt})* )?) ~

  strX:		~ {$In:[fields c:{type history sel_pheno replicon
                                  host_range promoter cloned_gene
                                  rest_distrib
                                  host_for_dist termi class
                                  ribo_bind_site further_inf}] $Out}
		  tag ('-' | httag | str{$Uniq:$Itc} | /./ | space)* ~

#HTML Stuff
 h_top:      ~ {$In:[fields c:coll_no t:html] pre if:$ParInt:isTable $Fail}
               'Collection_number' {$Rep:
                     |</TR><TR VALIGN=TOP>
                     |<TD colspan=2 bgcolor=\"#ffffff\">
                     |<font color=\"#000066\">
                     |<B>CABRI:($entry.libName)</B></TD>
                     |</TR>
		     |<TR VALIGN=TOP><TD bgcolor=\"#ffffff\">
                     |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                     |<TD bgcolor=\"#ffffff\">
		    } 
		    /.*/ {$Rep:"$Ct</TD></TR>"}  ~

  h_seq_det: ~ {$In:[fields c:seq_det t:html] pre if:$ParInt:isTable $Fail} 
             'Sequence_detail' {if:$isTable==0 
	      $Rep:
                |<TR valign=top><TD bgcolor=\"#ffffff\">
                |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
              } 
             /.*/ {$Rep:"<TD bgcolor=\"#ffffff\"><pre>$Ct</pre></TD></TR>"} ~

  h_medium: ~ {$In:[fields c:medium t:html] pre if:$ParInt:isTable $Fail} 
              'Medium' {if:$isTable==0 
                $Rep:
                  |<TR valign=top><TD bgcolor=\"#ffffff\">
                  |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                  |<TD bgcolor=\"#ffffff\">
                }
              /[^\n]+/ {$rest=$Ct.rep:[' ' to:'%20']
                        $Rep:$Hlink:[bccm_genecornerR p:{$Ct $Ct}]} ~
#
#	    /.*/ {$Rep:"$Ct</TD></TR>"}  ~

  h_lit:	~ {$In:[fields c:{lit rlt} t:html]
                       pre if:$ParInt:isTable $Fail} 
             	  word {if:$isTable==0 $Rep:
                        |<TR valign=top><TD bgcolor=\"#ffffff\">
                        |<font color=\"#000066\">
                        |<b><i>$Ct</i></b></font></TD>
                        |<TD bgcolor=\"#ffffff\">
                       }
                 /[^\\[]*/ 
                  ('[PMID: ' pmid {$Rep:$Hlink:[pubmedCabriR p:{$Ct $Ct}]} ']')?
                 ( /[^\\[]*/ 
                  ('[PMID: ' pmid {$Rep:$Hlink:[pubmedCabriR p:{$Ct $Ct}]} ']')? )*
                  x{$Rep:"</TD></TR>"}  ~

  h_ext_link:   ~ {$In:[fields c:ext_link t:html] pre if:$ParInt:isTable $Fail}
		   'External_links' {if:$isTable==0
                        $Rep:
                        |<TR valign=top><TD bgcolor=\"#ffffff\">
                        |<font color=\"#000066\">
                        |<b><i>$Ct</i></b></font></TD>
                        |<TD bgcolor=\"#ffffff\">
                      }
		    ' '* 'map ' ( ext_links {$Rep:$Hlink:[bccm_genecorner_extlR p:{$Ct $Ct}]}
                            | 'Not available' )
                    (', EMBL ' emblno {$Rep:$Hlink:[bccm_genecorner_ENA p:{$Ct $Ct}]} ( ';' emblno {$Rep:$Hlink:[bccm_genecorner_ENA p:{$Ct $Ct}]})* )?   ~

  h_fields:      ~ {$In:[fields xc:{coll_no seq_det medium lit ext_link} t:html] pre if:$ParInt:isTable $Fail}
                     tag {if:$isTable==0
                     $Rep:
                       |<TR valign=top><TD bgcolor=\"#ffffff\">
                       |<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
                       |<TD bgcolor=\"#ffffff\">
                     }
                     /.*/ {$Rep:"$Ct</TD></TR>"}  ~

  t_fields:     ~ {$In:[fields] $Out} tag /.*/ {$Wrt:$Itc} ~

# definitions
  tag:          ~ /[A-Z][a-zA-Z_]+/ ~
  httag:        ~ /<[^>]+>/ ~
  ln:		~ /[^\n]*\n/ ~
  number:       ~ /[0-9.]+/ ~
  code: 	~ /[ A-Z0-9-]+/ ~
  word:		~ /[0-9a-zA-Z_\/-]+/ ~
  word3:	~ /[ 0-9a-zA-Z_\/\\(\\)-]+/ ~
  word4:        ~ /[a-zA-Z0-9&\\%\\_-]+/ ~
  word5:        ~ /[a-zA-Z0-9&\\%\\_-]+(\\.[a-zA-Z0-9&\\%\\_-]+)+/ ~
  str:		~ word4 | word5 ~
#  rsit_str:	~ /[a-zA-Z][a-zA-Z0-9-]+/ ~
#  rsit_punct:	~ /[^a-zA-Z0-9<>-]+/ ~
  seq_str:	~ /[a-zA-Z0-9]+/ ~
  seq_punct:	~ /[^a-zA-Z0-9]+/ ~
  punct:	~ /[\t,;:\\.\\(\\)\\+\\*]+/ ~  
  space:	~ /[ \t]+/ ~  
  bib_tag:	~ ('Literature') ~
  relbib_tag:	~ ('Related_literature') ~
  bib_word:	~ /[a-zA-Z]+/ ~
  bib_punct:	~ /[ \t;:.,()-]/ ~
  bib_vol:	~ (number bib_punct?)+ ~
  pmid: 	~ /[0-9]+/ ~
  ext_links:	~ /[A-Za-z0-9_-]+\.[gjp][ipd][fg]/ ~
  embl:		~ 'EMBL '/[A-Z0-9_]+/ ~
  emblno:	~ /[A-Z0-9_]+/ ~
}