!ls data/
Crassostrea_gigas.GCA_000297895.1.29.pep.all.fa
!head data/Crassostrea_gigas.GCA_000297895.1.29.pep.all.fa
>EKC21896 pep:novel supercontig:GCA_000297895.1:scaffold39040:587:10567:-1 gene:CGI_10003144 transcript:EKC21896 description:"C-type lectin domain family 10 member A " MEHRVMTQSCDGDWVRYGDSCYRYYTSQMAWIYAFKTCQSDNGFLTDIENADEQAFLQNL TSRAKFWISASDSVGDWFKWMWYGGIHPWGYTNWDTAFYLTEDPVLTLAVEASSNETRLI CAFIFNISDVDFSVEWYLNDKMKCVVTPCMPGNCSMRGSSRESNVISAKIQVW >EKC21897 pep:novel supercontig:GCA_000297895.1:scaffold39040:38002:64552:-1 gene:CGI_10003145 transcript:EKC21897 description:"WD repeat-containing protein C10orf79 " MDAVGSLELSWAQGYNGGKVGYIDKDVICYQAGSNIKFIAEDGAETVFNFKGNGVGPFAV HATNKCFAVAERCLNPKITVYVYPTFREAAVLKDGAKLEYRSLVFSHSEYMVTITGIPEF QLMLWRYTDGTKLTSVDITSDPVSSVTFNPGNWRQLCVTTEKSMTVWNTEQSNDKYVMLP QKIKLPAENPSLNSDEEKDRDIPTRASTRMTRYTIDLPKAAIAGLVGERAEALDEVQDTT PRVVPLSHTWSPSGDVYVGCQGGQILKVDGEIYKAKLFYHPLPPASAPNSRATSATSRFN
!grep ">" data/Crassostrea_gigas.GCA_000297895.1.29.pep.all.fa | wc -l
26089
!grep ">" data/Crassostrea_gigas.GCA_000297895.1.29.pep.all.fa > analyses/pep.title
!head analyses/pep.title
>EKC21896 pep:novel supercontig:GCA_000297895.1:scaffold39040:587:10567:-1 gene:CGI_10003144 transcript:EKC21896 description:"C-type lectin domain family 10 member A " >EKC21897 pep:novel supercontig:GCA_000297895.1:scaffold39040:38002:64552:-1 gene:CGI_10003145 transcript:EKC21897 description:"WD repeat-containing protein C10orf79 " >EKC38298 pep:novel supercontig:GCA_000297895.1:scaffold348:113:793:1 gene:CGI_10022967 transcript:EKC38298 description:"Amidase " >EKC38299 pep:novel supercontig:GCA_000297895.1:scaffold348:3467:4042:1 gene:CGI_10022968 transcript:EKC38299 description:"Ankyrin repeat domain-containing protein 1 " >EKC38300 pep:novel supercontig:GCA_000297895.1:scaffold348:4307:4717:1 gene:CGI_10022969 transcript:EKC38300 >EKC38301 pep:novel supercontig:GCA_000297895.1:scaffold348:46971:56603:1 gene:CGI_10022970 transcript:EKC38301 >EKC38302 pep:novel supercontig:GCA_000297895.1:scaffold348:129346:131334:1 gene:CGI_10022971 transcript:EKC38302 description:"IMPACT-like protein " >EKC38303 pep:novel supercontig:GCA_000297895.1:scaffold348:160739:182763:1 gene:CGI_10022972 transcript:EKC38303 description:"Putative ATP-dependent RNA helicase DDX58 " >EKC38304 pep:novel supercontig:GCA_000297895.1:scaffold348:191643:203885:1 gene:CGI_10022973 transcript:EKC38304 description:"Interferon-induced helicase C domain-containing protein 1 " >EKC38305 pep:novel supercontig:GCA_000297895.1:scaffold348:204032:218386:1 gene:CGI_10022974 transcript:EKC38305 description:"Collagen-like protein 2 "