Huici, Daniel; Rodríguez, Ricardo J.
A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis Journal Article
In: Data in Brief, vol. PP, no. PP, pp. PP, 2025, ISSN: 2352-3409, (Accepted for publication. To appear.).
Abstract | Links | BibTeX | Tags: Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries
@article{HuiciR-DIB-25b,
title = {A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis},
author = {Daniel Huici and Ricardo J. Rodríguez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciR-DIB-25.pdf},
issn = {2352-3409},
year = {2025},
date = {2025-01-01},
journal = {Data in Brief},
volume = {PP},
number = {PP},
pages = {PP},
abstract = {Similarity digest algorithms, such as TLSH, ssdeep, or sdhash, to name a few, generate intermediate representations (i.e., digests) of digital artifacts to efficiently identify similar objects and measure their degree of similarity. This dataset provides the results of a static analysis performed on system binary files extracted from multiple versions of the Windows operating system, accompanied by their similarity digests. An automated static analysis process was applied to all extracted binaries to decompose them into individual functions and capture detailed metadata for each of them. Specifically, similarity hashes (in particular, TLSH, ssdeep, and LZJD) were computed to enable forensic analysts to effectively assess artifact similarities. The dataset serves as an “allow list” of legitimate Windows artifacts, allowing forensic analysts to detect deviations from trusted binaries, verify system integrity, perform software audits, and improve malware detection efforts. This paper describes the structure of the dataset, the methodology and tools used in its creation, and its value for forensic analysis and cybersecurity investigation.},
note = {Accepted for publication. To appear.},
keywords = {Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries},
pubstate = {published},
tppubtype = {article}
}
Carrillo-Mondéjar, Javier; Suárez-Tangil, Guillermo; Costin, Andrei; Rodríguez, Ricardo J.
Exploring Shifting Patterns in Recent IoT Malware Proceedings Article
In: Proceedings of the 23rd European Conference on Cyber Warfare and Security (ECCWS), pp. 96–106, ACI, 2024.
Abstract | Links | BibTeX | Tags: Dynamic Analysis, Malware Evolution, Malware IoT, Malware lineage, Static Analysis
@inproceedings{CSCR-ECCWS-24b,
title = {Exploring Shifting Patterns in Recent IoT Malware},
author = {Javier Carrillo-Mondéjar and Guillermo Suárez-Tangil and Andrei Costin and Ricardo J. Rodríguez},
url = {http://webdiis.unizar.es/~ricardo/files/papers/CSCR-ECCWS-24.pdf},
doi = {10.34190/eccws.23.1.2280},
year = {2024},
date = {2024-07-01},
booktitle = {Proceedings of the 23rd European Conference on Cyber Warfare and Security (ECCWS)},
volume = {23},
number = {1},
pages = {96–106},
publisher = {ACI},
abstract = {The rise of malware targeting interconnected infrastructures has surged in recent years, driven largely by the widespread presence of vulnerable legacy IoT devices and inadequately secured networks. Despite the strong interest attackers have in targeting this infrastructure, a significant gap remains in understanding how the landscape has recently evolved. Addressing this knowledge gap is essential to thwarting the proliferation of massive botnets, thereby safeguarding end-users and preventing disruptions in critical infrastructures. This work offers a contemporary analysis of Linux-based malware, specifically tailored to IoT malware operating in 2021-2023. Using automated techniques involving both static and dynamic analysis, we classify malware into related threats. By scrutinizing the most recent dataset of Linux-based malware and comparing it to previous studies, we unveil distinctive insights into emerging trends, offering an unparalleled understanding of the evolving landscape. Although Mirai and Gafgyt remain the most prominent families and present a large number of variants, our results show that (i) there is an increase in the sophistication of malware, (ii) malware authors are adding new exploits to their arsenal, and (iii) malware families that originally attacked Windows systems have been adapted to attack Linux-based devices.},
keywords = {Dynamic Analysis, Malware Evolution, Malware IoT, Malware lineage, Static Analysis},
pubstate = {published},
tppubtype = {inproceedings}
}