Title: | Retrieve and summarize bioRxiv and medRxiv preprints with a local LLM using ollama |
---|---|
Description: | Retrieve and summarize bioRxiv and medRxiv preprints with a local LLM using ollama. |
Authors: | Stephen Turner [aut, cre] |
Maintainer: | Stephen Turner <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.2.1 |
Built: | 2024-10-27 06:05:44 UTC |
Source: | https://github.com/stephenturner/biorecap |
Add prompt to a data frame of preprints
add_prompt(preprints, ...)
add_prompt(preprints, ...)
preprints |
Result from |
... |
Additional arguments to |
A data frame of preprints with a prompt added.
preprints <- get_preprints(subject=c("bioinformatics", "genomics")) preprints <- add_prompt(preprints) preprints
preprints <- get_preprints(subject=c("bioinformatics", "genomics")) preprints <- add_prompt(preprints) preprints
Add prompts for an entire subject
add_prompt_subject(preprints, ...)
add_prompt_subject(preprints, ...)
preprints |
Output from |
... |
Additional arguments to |
A tibble with a subject and prompt column.
subjects <- example_preprints |> dplyr::group_by(subject) |> add_prompt_subject() subjects
subjects <- example_preprints |> dplyr::group_by(subject) |> add_prompt_subject() subjects
Generate a summary from a data frame of prompts
add_summary(preprints, model = "llama3.2", host = NULL)
add_summary(preprints, model = "llama3.2", host = NULL)
preprints |
Output from |
model |
A model available to Ollama (run |
host |
The base URL to use. Default is |
A tibble, with a response column added.
## Not run: # Individual papers preprints <- get_preprints(c("genomics", "bioinformatics")) |> add_prompt() |> add_summary() preprints ## End(Not run)
## Not run: # Individual papers preprints <- get_preprints(c("genomics", "bioinformatics")) |> add_prompt() |> add_summary() preprints ## End(Not run)
Create a report from bioRxiv/medRxiv preprints
biorecap_report( output_dir = ".", subject = NULL, nsentences = 2L, model = "llama3.2", host = NULL, use_example_preprints = FALSE, ... )
biorecap_report( output_dir = ".", subject = NULL, nsentences = 2L, model = "llama3.2", host = NULL, use_example_preprints = FALSE, ... )
output_dir |
Directory to save the report. |
subject |
Character vector of subjects to include in the report. |
nsentences |
Number of sentences to summarize each paper in. |
model |
The model to use for generating summaries. See |
host |
The base URL to use. Default is |
use_example_preprints |
Use the example preprints data included with the package instead of fetching new data from bioRxiv/medRxiv. For diagnostic/testing purposes only. |
... |
Other arguments passed to |
Nothing; called for its side effects to produce a report.
## Not run: output_dir <- tempdir() biorecap_report(use_example_preprints=TRUE, output_dir=output_dir) biorecap_report(subject=c("bioinformatics", "genomics", "synthetic_biology"), output_dir=output_dir) ## End(Not run)
## Not run: output_dir <- tempdir() biorecap_report(use_example_preprints=TRUE, output_dir=output_dir) biorecap_report(subject=c("bioinformatics", "genomics", "synthetic_biology"), output_dir=output_dir) ## End(Not run)
Construct a prompt to summarize a paper
build_prompt_preprint( title, abstract, nsentences = 2L, instructions = c("I am giving you a paper's title and abstract.", "Summarize the paper in as many sentences as I instruct.", "Do not include any preamble text to the summary", "just give me the summary with no preface or intro sentence.") )
build_prompt_preprint( title, abstract, nsentences = 2L, instructions = c("I am giving you a paper's title and abstract.", "Summarize the paper in as many sentences as I instruct.", "Do not include any preamble text to the summary", "just give me the summary with no preface or intro sentence.") )
title |
The title of the paper. |
abstract |
The abstract of the paper. |
nsentences |
The number of sentences to summarize the paper in. |
instructions |
Instructions to the prompt. This can be a character vector that gets collapsed into a single string. |
A string containing the prompt.
build_prompt_preprint(title="A great paper", abstract="This is the abstract.")
build_prompt_preprint(title="A great paper", abstract="This is the abstract.")
Construct a prompt to summarize a set of papers from a subject
build_prompt_subject( subject, title, summary, nsentences = 5L, instructions = c("I am giving you information about recent bioRxiv/medRxiv preprints.", "I'll give you the subject, preprint titles, and short summary of each paper.", "Please provide a general summary new advances in this subject/field in general.", "Provide this summary of the field in as many sentences as I instruct.", "Do not include any preamble text to the summary", "just give me the summary with no preface or intro sentence.") )
build_prompt_subject( subject, title, summary, nsentences = 5L, instructions = c("I am giving you information about recent bioRxiv/medRxiv preprints.", "I'll give you the subject, preprint titles, and short summary of each paper.", "Please provide a general summary new advances in this subject/field in general.", "Provide this summary of the field in as many sentences as I instruct.", "Do not include any preamble text to the summary", "just give me the summary with no preface or intro sentence.") )
subject |
The name of the subject. |
title |
A character vector of titles in the subject |
summary |
A character vector of the summaries of the paper provided by |
nsentences |
The number of sentences to summarize the subject in. |
instructions |
Instructions to the prompt. This can be a character vector that gets collapsed into a single string. |
A string containing the prompt.
title <- example_preprints |> dplyr::filter(subject=="bioinformatics") |> dplyr::pull(title) summary <- example_preprints |> dplyr::filter(subject=="bioinformatics") |> dplyr::pull(summary) build_prompt_subject(subject="bioinformatics", title=title, summary=summary)
title <- example_preprints |> dplyr::filter(subject=="bioinformatics") |> dplyr::pull(title) summary <- example_preprints |> dplyr::filter(subject=="bioinformatics") |> dplyr::pull(summary) build_prompt_subject(subject="bioinformatics", title=title, summary=summary)
Example preprints with summaries from August 6, 2024.
example_preprints
example_preprints
A tibble returned from get_preprints()
followed by add_prompt()
followed by add_summary()
.
example_preprints
example_preprints
Get bioRxiv/medRxiv preprints
get_preprints(subject = "all", clean = TRUE)
get_preprints(subject = "all", clean = TRUE)
subject |
A character vector of valid bioRxiv and/or medRxiv subjects. See subjects. |
clean |
Logical; try to strip out graphical abstract information? If TRUE, this strips away any text between |
A data frame of preprints from bioRxiv and/or medRxiv.
preprints <- get_preprints(subject=c("bioinformatics", "Public_and_Global_Health")) preprints
preprints <- get_preprints(subject=c("bioinformatics", "Public_and_Global_Health")) preprints
Safely query bioRxiv/medRxiv RSS feeds
safely_query_rss(subject, server = c("biorxiv", "medrxiv"))
safely_query_rss(subject, server = c("biorxiv", "medrxiv"))
subject |
A character vector of valid bioRxiv and/or medRxiv subjects. See subjects. |
server |
A character vector of either "biorxiv" or "medrxiv". |
A data frame of preprints from bioRxiv and/or medRxiv.
Names of subjects with RSS feeds in biorXiv
subjects
subjects
A list of character vectors of subjects, one for bioRxiv, one for medRxiv.
https://www.biorxiv.org/alertsrss
subjects
subjects
Create a markdown table from prepreprint summaries
tt_preprints(preprints, cols = c("title", "summary"), width = c(1, 3))
tt_preprints(preprints, cols = c("title", "summary"), width = c(1, 3))
preprints |
Output from |
cols |
Columns to display in the resulting markdown table. |
width |
Vector of relative widths equal to |
A tinytable table.
# Use built-in example data example_preprints tt_preprints(example_preprints[1:2,])
# Use built-in example data example_preprints tt_preprints(example_preprints[1:2,])