Realizando uma sumarização automática de resumos de artigos científicos usando um modelo pré-treinado
from transformers import pipeline
summarizer = pipeline("summarization")
No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 (https://huggingface.co/sshleifer/distilbart-cnn-12-6)
import pandas as pd
from IPython.display import display,clear_output, HTML
from ipywidgets import interact
df = pd.read_parquet('abstracts.parquet')
df
title | abstract | |
---|---|---|
0 | Ultrasound imaging for identification of cereb... | Zika virus is a novel teratogenic agent associ... |
1 | USP38 Inhibits Zika Virus Infection by Removin... | Zika virus (ZIKV) is a mosquito-borne flavivir... |
2 | Purification of Dengue and Zika Virus Non-stru... | Dengue Virus (DENV) and ZIKA Virus (ZIKV) are ... |
3 | WhatsApp-Based Focus Groups Among Mexican-Orig... | Despite unprecedented advances in worldwide ac... |
4 | Cellular and molecular basis of IR3535 percept... | IR3535 is among the most widely used synthetic... |
... | ... | ... |
9870 | Zika virus: a report on three cases of human i... | |
9871 | Epidemiological notes on some viruses isolated... | |
9872 | Comparison by electron microscopy of the Ntaya... | |
9873 | Zika virus. II. Pathogenicity and physical p... | |
9874 | Zika virus. I. Isolations and serological sp... |
9875 rows × 2 columns
@interact(id=(0, len(df)))
def sumariza(id=20):
text = df.iloc[id].abstract.strip()
title = df.iloc[id].title
su = summarizer(text, max_length=len(text.split()), min_length=30, do_sample=False)
display(HTML(f"<h2>{title}</h2><br><h3>Summary:</h3><p>{su[0]['summary_text']}</p><h3>Abstract</h3><p>{text}</p>"))
interactive(children=(IntSlider(value=20, description='id', max=9875), Output()), _dom_classes=('widget-intera…