peter-2024.bib

@comment{{This file has been generated by bib2bib 1.99}}
@comment{{Command line: bib2bib -c 'year=2024 and not annote:"skip_html" and not annote:"unpublished"' -ob peter-2024.bib -oc peter-2024.cite peter.bib}}
@inproceedings{McGinness:etal:Highlighting:AJCAI:2024,
  author = {McGinness, Lachlan
and Baumgartner, Peter
and Onyango, Esther
and Lema, Zelalem},
  editor = {Gong, Mingming
and Song, Yiliao
and Koh, Yun Sing
and Xiang, Wei
and Wang, Derui},
  title = {Highlighting Case Studies in LLM Literature Review of Interdisciplinary System Science},
  booktitle = {AI 2024: Advances in Artificial Intelligence},
  year = {2024},
  publisher = {Springer Nature Singapore},
  address = {Singapore},
  pages = {29--43},
  url = {AJCAI-highlighting.pdf},
  doi = {10.1007/978-981-96-0348-0_3},
  abstract = {Large Language Models (LLMs) were used to assist four Commonwealth Scientific and Industrial Research Organisation (CSIRO) researchers to perform systematic literature reviews (SLR). We evaluate the performance of LLMs for SLR tasks in these case studies. In each, we explore the impact of changing parameters on the accuracy of LLM responses. The LLM was tasked with extracting evidence from chosen academic papers to answer specific research questions. We evaluate the models' performance in faithfully reproducing quotes from the literature and subject experts were asked to assess the model performance in answering the research questions. We developed a semantic text highlighting tool to facilitate expert review of LLM responses.},
  isbn = {978-981-96-0348-0}
}
@article{McGinness:Baumgartner:CON-FOLD:TPLP:2024,
  title = {{{CON-FOLD Explainable Machine Learning}} with {{Confidence}}},
  author = {Mcginness, Lachlan and Baumgartner, Peter},
  year = {2024},
  journal = {Theory and Practice of Logic Programming},
  pages = {1--19},
  issn = {1471-0684, 1475-3081},
  doi = {10.1017/S1471068424000346},
  urldate = {2024-10-31},
  abstract = {FOLD-RM is an explainable machine learning classification algorithm that uses training data to create a set of classification rules. In this paper, we introduce CON-FOLD which extends FOLD-RM in several ways. CON-FOLD assigns probability-based confidence scores to rules learned for a classification task. This allows users to know how confident they should be in a prediction made by the model. We present a confidence-based pruning algorithm that uses the unique structure of FOLD-RM rules to efficiently prune rules and prevent overfitting. Furthermore, CON-FOLD enables the user to provide preexisting knowledge in the form of logic program rules that are either (fixed) background knowledge or (modifiable) initial rule candidates. The paper describes our method in detail and reports on practical experiments. We demonstrate the performance of the algorithm on benchmark datasets from the UCI Machine Learning Repository. For that, we introduce a new metric, Inverse Brier Score, to evaluate the accuracy of the produced confidence scores. Finally, we apply this extension to a real-world example that requires explainability: marking of student responses to a short answer question from the Australian Physics Olympiad.},
  langid = {english},
  file = {/Users/bau050/Zotero/storage/XUZC36YE/Mcginness and Baumgartner - 2024 - CON-FOLD Explainable Machine Learning with Confide.pdf}
}
@misc{McGinness:Baumgartner:LLM-reasoning-evaluation:Arxiv:2024,
  title = {Steamroller Problems: An Evaluation of LLM Reasoning Capability with Automated Theorem Prover Strategies},
  author = {Lachlan McGinness and Peter Baumgartner},
  year = {2024},
  eprint = {2407.20244},
  archiveprefix = {arXiv},
  primaryclass = {cs.CL},
  url = {https://arxiv.org/abs/2407.20244}
}
@inproceedings{McGinness:Baumgartner:ATP-help-LLM:LPAR:2024,
  author = {Lachlan McGinness and Peter Baumgartner},
  title = {Automated Theorem Provers Help Improve Large Language Model Reasoning},
  booktitle = {Proceedings of 25th Conference on Logic for Programming, Artificial Intelligence and Reasoning},
  editor = {Nikolaj Bj\{\textbackslash{}o\}rner and Marijn Heule and Andrei Voronkov},
  series = {EPiC Series in Computing},
  volume = {100},
  pages = {51--69},
  year = {2024},
  publisher = {EasyChair},
  bibsource = {EasyChair, https://easychair.org},
  issn = {2398-7340},
  url = {https://easychair.org/publications/paper/vzpW},
  doi = {10.29007/2n9m}
}