Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
Poster: Exploring the Zero-Shot Potential of Large Language Models for Detecting Algorithmically Generated Domains Proceedings Article
In: Egele, Manuel; Moonsamy, Veelasha; Gruss, Daniel; Carminati, Michele (Ed.): Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, pp. 86–92, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-97623-0.
Abstract | Links | BibTeX | Tags: Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection
@inproceedings{PelayoBenedetRG-DIMVA-25,
title = {Poster: Exploring the Zero-Shot Potential of Large Language Models for Detecting Algorithmically Generated Domains},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
editor = {Manuel Egele and Veelasha Moonsamy and Daniel Gruss and Michele Carminati},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-DIMVA-25.pdf},
doi = {10.1007/978-3-031-97623-0_5},
isbn = {978-3-031-97623-0},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment},
volume = {15748},
pages = {86–92},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Domain generation algorithms enable resilient malware communication by generating pseudo-random domain names. While traditional detection relies on task-specific algorithms, the use of Large Language Models (LLMs) to identify Algorithmically Generated Domains (AGDs) remains largely unexplored. This work evaluates nine LLMs from four major vendors in a zero-shot environment, without fine-tuning. The results show that LLMs can distinguish AGDs from legitimate domains, but they often exhibit a bias, leading to high false positive rates and overconfident predictions. Adding linguistic features offers minimal accuracy gains while increasing complexity and errors. These findings highlight both the promise and limitations of LLMs for AGD detection, indicating the need for further research before practical implementation.},
keywords = {Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection},
pubstate = {published},
tppubtype = {inproceedings}
}
Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
The Machines are Watching: Exploring the Potential of Large Language Models for Detecting Algorithmically Generated Domains Journal Article
In: Journal of Information Security and Applications, vol. 93, pp. 104176, 2025, ISSN: 2214-2134.
Abstract | Links | BibTeX | Tags: Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection
@article{PelayoBenedetRG-JISA-25,
title = {The Machines are Watching: Exploring the Potential of Large Language Models for Detecting Algorithmically Generated Domains},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-JISA-25.pdf},
doi = {10.1016/j.jisa.2025.104176},
issn = {2214-2134},
year = {2025},
date = {2025-09-01},
journal = {Journal of Information Security and Applications},
volume = {93},
pages = {104176},
abstract = {Algorithmically Generated Domains (AGDs) are integral to many modern malware campaigns, allowing adversaries to establish resilient command and control channels. While machine learning techniques are increasingly employed to detect AGDs, the potential of Large Language Models (LLMs) in this domain remains largely underexplored. In this paper, we examine the ability of nine commercial LLMs to identify malicious AGDs, without parameter tuning or domain-specific training. We evaluate zero-shot approaches and few-shot learning approaches, using minimal labeled examples and diverse datasets with multiple prompt strategies. Our results show that certain LLMs can achieve detection accuracy between 77.3% and 89.3%. In a 10-shot classification setting, the largest models excel at distinguishing between malware families, particularly those employing hash-based generation schemes, underscoring the promise of LLMs for advanced threat detection. However, significant limitations arise when these models encounter real-world DNS traffic. Performance degradation on benign but structurally suspect domains highlights the risk of false positives in operational environments. This shortcoming has real-world consequences for security practitioners, given the need to avoid erroneous domain blocking that disrupt legitimate services. Our findings underscore the practicality of LLM-driven AGD detection, while emphasizing key areas where future research is needed (such as more robust warning design and model refinement) to ensure reliability in production environments.},
keywords = {Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.
A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis Journal Article
In: Data in Brief, vol. PP, no. PP, pp. PP, 2025, ISSN: 2352-3409, (Accepted for publication. To appear.).
Abstract | Links | BibTeX | Tags: Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries
@article{HuiciR-DIB-25b,
title = {A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis},
author = {Daniel Huici and Ricardo J. Rodríguez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciR-DIB-25.pdf},
issn = {2352-3409},
year = {2025},
date = {2025-01-01},
journal = {Data in Brief},
volume = {PP},
number = {PP},
pages = {PP},
abstract = {Similarity digest algorithms, such as TLSH, ssdeep, or sdhash, to name a few, generate intermediate representations (i.e., digests) of digital artifacts to efficiently identify similar objects and measure their degree of similarity. This dataset provides the results of a static analysis performed on system binary files extracted from multiple versions of the Windows operating system, accompanied by their similarity digests. An automated static analysis process was applied to all extracted binaries to decompose them into individual functions and capture detailed metadata for each of them. Specifically, similarity hashes (in particular, TLSH, ssdeep, and LZJD) were computed to enable forensic analysts to effectively assess artifact similarities. The dataset serves as an “allow list” of legitimate Windows artifacts, allowing forensic analysts to detect deviations from trusted binaries, verify system integrity, perform software audits, and improve malware detection efforts. This paper describes the structure of the dataset, the methodology and tools used in its creation, and its value for forensic analysis and cybersecurity investigation.},
note = {Accepted for publication. To appear.},
keywords = {Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries},
pubstate = {published},
tppubtype = {article}
}