#!/usr/bin/env icarus
# BCCM_GENECORNER Plasmid Collection - Syntax file
#
# Version 2018.08.23.01
# Created August 23, 2018
# Derived from bccm_lmbp.is
# Author: Paolo Romano
# (c) CABRI 1999-2023
$fn={
Collection_number:coll_no
Name:name
Type:type
Literature:lit
Related_literature:rlt
History_of_deposit:history
Restricted_distribution:rest_distrib
Host_for_distribution:host_for_dist
Selectable_phenotype:sel_pheno
Replicon:replicon
Host_range:host_range
Promoter:promoter
Cloned_gene:cloned_gene
Medium:medium
Other_culture_collection_numbers:oth_cul_col_no
Class:class
Ribosome_binding_site:ribo_bind_site
Further_information:further_inf
Sequence_detail:seq_det
Terminator:termi
External_links:ext_link
}
$rules={
entry: ~ {$In:[file:text] $Out pre $Skip:0}
('Collection_number' {$Not} ln)*
('Collection_number' {$entryFip=$Fip $Wrt} ln {$App}
('Collection_number' {$Not} ln {$App})*)? ~
# fields
fields: ~ {$In:entry $Out $Skip:1}
(/\/[^\n]+\n/ {$Wrt} |
tag {$Wrt:$fn.$Ct} (/[\/A-Z]/ {$Not} ln {$App})+)+ ~
#indexing
coll_no: ~ {$In:[fields c:coll_no] $Out}
tag ' ' ( word3 {$Wrt} | punct ) ~
name: ~ {$In:[fields c:name] $Out}
tag ln {$Wrt:[s:$StrRep:[s:$Ct r:"[\/|\.|-|,]" to:""]]} ~
medium: ~ {$In:[fields c:medium] $Out}
tag /[^\n]+/{$Uniq:$Itc} ~
oth_cul_col_no: ~ {$In:[fields c:oth_cul_col_no] $Out}
tag ( ' -' | code {$Wrt} ( '; ' code {$Wrt} )* ) ~
seq_det: ~ {$In:[fields c:seq_det] $Out}
tag (seq_str {$Uniq:Itc} | seq_punct )+ ~
# rest_sites: ~ {$In:[fields c:rest_sites] $Out}
# tag (httag | rsit_str {$Uniq:Itc} | rsit_punct | '-')+ ~
lit: ~ {$In:[fields c:lit] $Out}
bib_tag
(bib_word {$Uniq:$Itc} | bib_vol | bib_punct | ' ')*
('[PMID: ' pmid {$Uniq:$Itc} ']')? ~
rlt: ~ {$In:[fields c:rlt] $Out}
relbib_tag
(bib_word {$Uniq:$Itc} | bib_vol | bib_punct | ' ')*
('[PMID: ' pmid {$Uniq:$Itc} ']')? ~
# ext_link: ~ {$In:[fields c:ext_link] $Out}
# tag ' '?
# 'map' ' '? (('Not available'|ext_links{$Wrt})
# (', ' embl {$Wrt})?) ~
#
ext_link: ~ {$In:[fields c:ext_link] $Out}
tag ' '?
( ( 'map' ' '? ('Not available'|ext_links{$Wrt}) )*
(', EMBL ' emblno {$Wrt} ( ' ' emblno {$Wrt})* )?) ~
strX: ~ {$In:[fields c:{type history sel_pheno replicon
host_range promoter cloned_gene
rest_distrib
host_for_dist termi class
ribo_bind_site further_inf}] $Out}
tag ('-' | httag | str{$Uniq:$Itc} | /./ | space)* ~
#HTML Stuff
h_top: ~ {$In:[fields c:coll_no t:html] pre if:$ParInt:isTable $Fail}
'Collection_number' {$Rep:
|</TR><TR VALIGN=TOP>
|<TD colspan=2 bgcolor=\"#ffffff\">
|<font color=\"#000066\">
|<B>CABRI:($entry.libName)</B></TD>
|</TR>
|<TR VALIGN=TOP><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
/.*/ {$Rep:"$Ct</TD></TR>"} ~
h_seq_det: ~ {$In:[fields c:seq_det t:html] pre if:$ParInt:isTable $Fail}
'Sequence_detail' {if:$isTable==0
$Rep:
|<TR valign=top><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
}
/.*/ {$Rep:"<TD bgcolor=\"#ffffff\"><pre>$Ct</pre></TD></TR>"} ~
h_medium: ~ {$In:[fields c:medium t:html] pre if:$ParInt:isTable $Fail}
'Medium' {if:$isTable==0
$Rep:
|<TR valign=top><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
/[^\n]+/ {$rest=$Ct.rep:[' ' to:'%20']
$Rep:$Hlink:[bccm_genecornerR p:{$Ct $Ct}]} ~
#
# /.*/ {$Rep:"$Ct</TD></TR>"} ~
h_lit: ~ {$In:[fields c:{lit rlt} t:html]
pre if:$ParInt:isTable $Fail}
word {if:$isTable==0 $Rep:
|<TR valign=top><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\">
|<b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
/[^\\[]*/
('[PMID: ' pmid {$Rep:$Hlink:[pubmedCabriR p:{$Ct $Ct}]} ']')?
( /[^\\[]*/
('[PMID: ' pmid {$Rep:$Hlink:[pubmedCabriR p:{$Ct $Ct}]} ']')? )*
x{$Rep:"</TD></TR>"} ~
h_ext_link: ~ {$In:[fields c:ext_link t:html] pre if:$ParInt:isTable $Fail}
'External_links' {if:$isTable==0
$Rep:
|<TR valign=top><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\">
|<b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
' '* 'map ' ( ext_links {$Rep:$Hlink:[bccm_genecorner_extlR p:{$Ct $Ct}]}
| 'Not available' )
(', EMBL ' emblno {$Rep:$Hlink:[bccm_genecorner_ENA p:{$Ct $Ct}]} ( ';' emblno {$Rep:$Hlink:[bccm_genecorner_ENA p:{$Ct $Ct}]})* )? ~
h_fields: ~ {$In:[fields xc:{coll_no seq_det medium lit ext_link} t:html] pre if:$ParInt:isTable $Fail}
tag {if:$isTable==0
$Rep:
|<TR valign=top><TD bgcolor=\"#ffffff\">
|<font color=\"#000066\"><b><i>$Ct</i></b></font></TD>
|<TD bgcolor=\"#ffffff\">
}
/.*/ {$Rep:"$Ct</TD></TR>"} ~
t_fields: ~ {$In:[fields] $Out} tag /.*/ {$Wrt:$Itc} ~
# definitions
tag: ~ /[A-Z][a-zA-Z_]+/ ~
httag: ~ /<[^>]+>/ ~
ln: ~ /[^\n]*\n/ ~
number: ~ /[0-9.]+/ ~
code: ~ /[ A-Z0-9-]+/ ~
word: ~ /[0-9a-zA-Z_\/-]+/ ~
word3: ~ /[ 0-9a-zA-Z_\/\\(\\)-]+/ ~
word4: ~ /[a-zA-Z0-9&\\%\\_-]+/ ~
word5: ~ /[a-zA-Z0-9&\\%\\_-]+(\\.[a-zA-Z0-9&\\%\\_-]+)+/ ~
str: ~ word4 | word5 ~
# rsit_str: ~ /[a-zA-Z][a-zA-Z0-9-]+/ ~
# rsit_punct: ~ /[^a-zA-Z0-9<>-]+/ ~
seq_str: ~ /[a-zA-Z0-9]+/ ~
seq_punct: ~ /[^a-zA-Z0-9]+/ ~
punct: ~ /[\t,;:\\.\\(\\)\\+\\*]+/ ~
space: ~ /[ \t]+/ ~
bib_tag: ~ ('Literature') ~
relbib_tag: ~ ('Related_literature') ~
bib_word: ~ /[a-zA-Z]+/ ~
bib_punct: ~ /[ \t;:.,()-]/ ~
bib_vol: ~ (number bib_punct?)+ ~
pmid: ~ /[0-9]+/ ~
ext_links: ~ /[A-Za-z0-9_-]+\.[gjp][ipd][fg]/ ~
embl: ~ 'EMBL '/[A-Z0-9_]+/ ~
emblno: ~ /[A-Z0-9_]+/ ~
}