Uroz, Daniel; Pinilla, Abraham Díaz-Campo; Rodríguez, Ricardo J.
Structural Analysis of the Windows NT Heap for Memory Forensics Journal Article
In: Forensic Science International: Digital Investigation, vol. PP, no. PP, pp. PP, 2026, ISSN: 2666-2817, (Selected Papers of the Thirdteenth Annual DFRWS Europe Conference. Accepted for publication. To appear.).
Abstract | Links | BibTeX | Tags: Heap Forensics, Low Fragmentation Heap, memory forensics, Volatility 3, Windows NT Heap
@article{Uroz2026,
title = {Structural Analysis of the Windows NT Heap for Memory Forensics},
author = {Daniel Uroz and Abraham Díaz-Campo Pinilla and Ricardo J. Rodríguez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/UrozDR-FSIDI-26.pdf},
issn = {2666-2817},
year = {2026},
date = {2026-03-01},
journal = {Forensic Science International: Digital Investigation},
volume = {PP},
number = {PP},
pages = {PP},
abstract = {Modern attacks increasingly target user-space memory, leveraging dynamic heap allocations to store payloads, obfuscate runtime behavior, and evade traditional detection mechanisms. These heap-based techniques complicate memory forensics, as existing tools typically treat dynamic memory as a flat, unstructured region. To address this gap, in this paper we present a forensic methodology for the extraction and structural analysis of Windows NT heap entries, implemented in an open-source plugin for the Volatility 3 framework, called tt HeapList. Our approach supports all major Windows versions, from Vista to Windows 11, on both x86 and x64 architectures. We reconstruct the backend and frontend heap layers, decode encoded metadata, and enable navigation and directed extraction of heap entries. We validate our methodology through cross-verification with tt WinDbg and controlled testing using the Windows Heap API. Additionally, we discuss how our plugin can facilitate reverse engineering, the identification of dynamic payloads, heap layout inspection, and memory triage. By providing structured access to user-space heap memory, our work improves forensic visibility into dynamic memory and enables deeper analysis of heap-centric behavior in modern threat landscapes. Finally, we demonstrate the applicability of our approach in real-world scenarios by extracting information relevant to forensic analysis of user-space applications (specifically, from Telegram Desktop) through heap analysis.},
note = {Selected Papers of the Thirdteenth Annual DFRWS Europe Conference. Accepted for publication. To appear.},
keywords = {Heap Forensics, Low Fragmentation Heap, memory forensics, Volatility 3, Windows NT Heap},
pubstate = {published},
tppubtype = {article}
}
Miró, Daniel Lastanao; Carrillo, Javier; Rodríguez, Ricardo J.
Characterizing Tactics, Techniques, and Procedures in the macOS Threat Landscape Journal Article
In: Computers & Security, vol. 162, pp. 104806, 2026, ISSN: 0167-4048.
Abstract | Links | BibTeX | Tags: macOS malware, Malware behavior, MITRE ATT&CK framework, Static and dynamic analysis
@article{Miro2026,
title = {Characterizing Tactics, Techniques, and Procedures in the macOS Threat Landscape},
author = {Daniel Lastanao Miró and Javier Carrillo and Ricardo J. Rodríguez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/LastanaoCR-COSE-26.pdf},
doi = {10.1016/j.cose.2025.104806},
issn = {0167-4048},
year = {2026},
date = {2026-03-01},
journal = {Computers & Security},
volume = {162},
pages = {104806},
abstract = {As macOS systems increasingly become malware targets, understanding the tactics, techniques, and procedures (TTPs) used by adversaries is essential to improving defense strategies. This paper provides a systematic and detailed analysis of macOS malware using the MITRE ATT&CK framework, focusing on TTPs at key stages of the malware attack cycle. Leveraging a comprehensive dataset of 57,636 macOS malware samples collected between November 2006 and October 2024, we employ both static and dynamic analysis techniques to uncover patterns in adversary behavior. Our analysis, primarily based on static analysis techniques, offers a broad representation of macOS malware and highlights common characteristics across samples. While we only partially explore dynamic behaviors, we identify recurring patterns that align with specific TTPs in the MITRE ATT&CK framework, such as persistence and defense evasion. This mapping contributes to a more structured understanding of macOS threats and can help inform future detection and mitigation efforts.},
keywords = {macOS malware, Malware behavior, MITRE ATT&CK framework, Static and dynamic analysis},
pubstate = {published},
tppubtype = {article}
}
Carrillo-Mondéjar, Javier; Rodríguez, Ricardo J.
Identifying Runtime Libraries in Statically Linked Linux Binaries Journal Article
In: Future Generation Computer Systems, vol. 164, pp. 107602, 2025, ISSN: 0167-739X.
Abstract | Links | BibTeX | Tags: Binary code analysis, IoT, malware, Runtime library identification, Statically linked binaries
@article{CarrilloR-FGCS-25,
title = {Identifying Runtime Libraries in Statically Linked Linux Binaries},
author = {Javier Carrillo-Mondéjar and Ricardo J. Rodríguez},
url = {http://webdiis.unizar.es/~ricardo/files/papers/CarrilloR-FGCS-25.pdf},
doi = {10.1016/j.future.2024.107602},
issn = {0167-739X},
year = {2025},
date = {2025-01-01},
journal = {Future Generation Computer Systems},
volume = {164},
pages = {107602},
abstract = {Vulnerabilities in unpatched applications can originate from third-party dependencies in statically linked applications, as they must be relinked each time to take advantage of libraries that have been updated to fix any vulnerability. Despite this, malware binaries are often statically linked to ensure they run on target platforms and to complicate malware analysis. In this sense, identification of libraries in malware analysis becomes crucial to help filter out those library functions and focus on malware function analysis. In this paper, we introduce tt MANTILLA, a system for identifying runtime libraries in statically linked Linux-based binaries. Our system is based on radare2 to identify functions and extract their features (independent of the underlying architecture of the binary) through static binary analysis and on the K-nearest neighbors supervised machine learning model and a majority rule to predict final values. tt MANTILLA is evaluated on a dataset consisting of binaries built for different architectures (tt MIPSeb, tt ARMel, tt Intel x86, and tt Intel x86-64) and different runtime libraries (tt uClibc, tt glibc, and tt musl), achieving very high accuracy. We also evaluate it in two case studies. First, using a dataset of binary files belonging to the tt binutils collection and second, using an IoT malware dataset. In both cases, good accuracy results are obtained both in terms of runtime library detection ($94.4%$ and $95.5%$, respectively) and architecture identification ($100%$ and $98.6%$, respectively).},
keywords = {Binary code analysis, IoT, malware, Runtime library identification, Statically linked binaries},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.; Mena, Eduardo
APOTHEOSIS: An efficient approximate similarity search system Journal Article
In: SoftwareX, vol. 29, pp. 102016, 2025, ISSN: 2352-7110.
Abstract | Links | BibTeX | Tags: Approximate K-nearest neighbors, Approximate matching, Approximate search methods, Data similarity analysis, similarity digest algorithms
@article{HuiciRM-SoftX-25,
title = {APOTHEOSIS: An efficient approximate similarity search system},
author = {Daniel Huici and Ricardo J. Rodríguez and Eduardo Mena},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciRM-SoftX-25.pdf},
doi = {10.1016/j.softx.2024.102016},
issn = {2352-7110},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-01},
journal = {SoftwareX},
volume = {29},
pages = {102016},
abstract = {APOTHEOSIS is a tool for efficiently identifying and comparing data similarity in large datasets, addressing challenges faced by traditional methods such as scalability and speed. APOTHEOSIS overcomes them by combining advanced algorithms and data structures, enabling fast and accurate similarity analysis. Specifically, it uses a custom hierarchical small navigation world as an approximate $K$-nearest neighbors search method, and approximate similarity digests algorithms to find common features between similar data items, also supporting various distance metrics beyond vector-based approaches. Our software tool is designed for seamless integration into research workflows, improving reproducibility and facilitating the comparison of large-scale, high-dimensional data comparison across multiple domains.},
keywords = {Approximate K-nearest neighbors, Approximate matching, Approximate search methods, Data similarity analysis, similarity digest algorithms},
pubstate = {published},
tppubtype = {article}
}
Raducu, Razvan; Villagrasa-Labrador, Alain; Rodríguez, Ricardo J.; Álvarez, Pedro
MALVADA: A Framework for Generating Datasets of Malware Execution Traces Journal Article
In: SoftwareX, vol. 30, pp. 102082, 2025, ISSN: 2352-7110.
Abstract | Links | BibTeX | Tags: Dataset generation, Execution traces, Malware behavior, Malware classification
@article{RaducuVRA-SoftwareX-25,
title = {MALVADA: A Framework for Generating Datasets of Malware Execution Traces},
author = {Razvan Raducu and Alain Villagrasa-Labrador and Ricardo J. Rodríguez and Pedro Álvarez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/RaducuVRA-SoftwareX-25.pdf},
doi = {10.1016/j.softx.2025.102082},
issn = {2352-7110},
year = {2025},
date = {2025-05-01},
journal = {SoftwareX},
volume = {30},
pages = {102082},
abstract = {Malware attacks have been growing steadily in recent years, making more sophisticated detection methods necessary. These approaches typically rely on analyzing the behavior of malicious applications, for example by examining execution traces that capture their runtime behavior. However, many existing execution trace datasets are simplified, often resulting in the omission of relevant contextual information, which is essential to capture the full scope of a malware sample’s behavior. This paper introduces MALVADA, a flexible framework designed to generate extensive datasets of execution traces from Windows malware. These traces provide detailed insights into program behaviors and help malware analysts to classify a malware sample. MALVADA facilitates the creation of large datasets with minimal user effort, as demonstrated by the WinMET dataset, which includes execution traces from approximately 10,000 Windows malware samples.},
keywords = {Dataset generation, Execution traces, Malware behavior, Malware classification},
pubstate = {published},
tppubtype = {article}
}
Filho, Ailton Santos; Rodríguez, Ricardo J.; Feitosa, Eduardo L.
Automated broken object-level authorization attack detection in REST APIs through OpenAPI to colored petri nets transformation Journal Article
In: International Journal of Information Security, vol. 24, no. 2, pp. 83, 2025, ISSN: 1615-5270.
Abstract | Links | BibTeX | Tags: Broken access control, Colored Petri nets, OpenAPI, RESTful web services, Security analysis, vulnerabilities, Web application security
@article{SantosFilhoRF-IJIS-25,
title = {Automated broken object-level authorization attack detection in REST APIs through OpenAPI to colored petri nets transformation},
author = {Ailton Santos Filho and Ricardo J. Rodríguez and Eduardo L. Feitosa},
url = {https://webdiis.unizar.es/~ricardo/files/papers/SantosFilhoRF-IJIS-25.pdf},
doi = {10.1007/s10207-024-00970-5},
issn = {1615-5270},
year = {2025},
date = {2025-02-01},
journal = {International Journal of Information Security},
volume = {24},
number = {2},
pages = {83},
abstract = {The representational state transfer architectural style (REST) specifies a set of rules for creating web services. In REST, data and functionality are considered resources, accessed, and manipulated using a uniform, well-defined set of rules. RESTful web services are web services that follow the REST architectural style and are exposed to the Internet using RESTful APIs. Most of them are described by OpenAPI, a standard language-independent interface for RESTful APIs. RESTful APIs are continuously available on the Internet and are therefore a common target for cyberattacks. To prevent vulnerabilities and reduce risks in web systems, there are several security guidelines available, such as those provided by the Open Web Application Security Project (OWASP) foundation. A common vulnerability in web services is broken object level authorization (BOLA), which allows an attacker to modify or delete data or perform actions intended only for authorized users. For example, an attacker can change an order status, delete a user account, or add unauthorized data to the server. In this paper, we propose a transformation from OpenAPI to Petri nets, which enables formal modeling and analysis of REST APIs using existing Petri net analysis techniques to detect potential security risks directly from the analysis of web server logs. In addition, we also provide a tool, named Links2CPN, which automatically performs model transformation (taking the OpenAPI specification as input) and BOLA attack detection by analyzing web server execution traces. We apply it to a case study of a vulnerable web application to demonstrate its applicability. Our results show that it is capable of detecting BOLA attacks with an accuracy greater than 95% in the proposed scenarios.},
keywords = {Broken access control, Colored Petri nets, OpenAPI, RESTful web services, Security analysis, vulnerabilities, Web application security},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.; Mena, Eduardo
An Extensible and Scalable System for Hash Lookup and Approximate Similarity Search with Similarity Digest Algorithms Journal Article
In: Forensic Science International: Digital Investigation, vol. 53, pp. 301930, 2025, ISSN: 2666-2817, (DFRWS USA 2025 - Selected Papers from the 25th Annual Digital Forensics Research Conference USA).
Abstract | Links | BibTeX | Tags: Approximate matching, hash lookup, similarity digest algorithms, Similarity hashing, similarity search
@article{HuiciRM-FSIDI-25,
title = {An Extensible and Scalable System for Hash Lookup and Approximate Similarity Search with Similarity Digest Algorithms},
author = {Daniel Huici and Ricardo J. Rodríguez and Eduardo Mena},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciRM-FSIDI-25.pdf},
doi = {10.1016/j.fsidi.2025.301930},
issn = {2666-2817},
year = {2025},
date = {2025-07-01},
journal = {Forensic Science International: Digital Investigation},
volume = {53},
pages = {301930},
abstract = {Efficient management and analysis of large volumes of digital data has emerged as a major challenge in the field of digital forensics. To quickly identify and analyze relevant artifacts within large datasets, we introduce tt Apotheosis, an approximate similarity search system designed for scalability and efficiency. Our system integrates approximate search techniques (which allow searching for a match on a close value) with Similarity Digest Algorithms (SDA; which capture common features between similar elements), using a space-saving radix tree and a graph-based hierarchical navigable small world structure to perform fast approximate nearest neighbor searches. We demonstrate the effectiveness and versatility of our system through two key case studies: first, in plagiarism detection, demonstrating the effectiveness of our system in identifying similar or duplicate documents within a large source code dataset; then, in memory artifact detection, showing its scalability and performance in processing large-scale forensic data collected from various versions of Microsoft Windows. Our comprehensive evaluation shows that tt Apotheosis not only efficiently handles large datasets, but also provides a way to evaluate the performance of various SDA and their approximate similarity search in different forensic scenarios.},
note = {DFRWS USA 2025 - Selected Papers from the 25th Annual Digital Forensics Research Conference USA},
keywords = {Approximate matching, hash lookup, similarity digest algorithms, Similarity hashing, similarity search},
pubstate = {published},
tppubtype = {article}
}
Abascal, León; Rodríguez, Ricardo J.
Poster: Extracting Cryptographic Keys from Windows Live Processes Proceedings Article
In: Egele, Manuel; Moonsamy, Veelasha; Gruss, Daniel; Carminati, Michele (Ed.): Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, pp. 213–219, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-97620-9.
Abstract | Links | BibTeX | Tags: cryptography, digital forensics, malware, Windows
@inproceedings{AbascalR-DIMVA-25,
title = {Poster: Extracting Cryptographic Keys from Windows Live Processes},
author = {León Abascal and Ricardo J. Rodríguez},
editor = {Manuel Egele and Veelasha Moonsamy and Daniel Gruss and Michele Carminati},
url = {https://webdiis.unizar.es/~ricardo/files/papers/AbascalR-DIMVA-25.pdf},
doi = {10.1007/978-3-031-97620-9_12},
isbn = {978-3-031-97620-9},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment},
volume = {15748},
pages = {213–219},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Cryptographic keys are a fundamental aspect of modern system security, but when compromised, they become a critical vulnerability, especially in ransomware attacks. Paradoxically, these keys must be available in memory at runtime to function, creating a unique opportunity for defensive tools. We introduce nameTool, an open-source tool designed to locate cryptographic keys in active Windows processes using advanced memory analysis. Unlike traditional approaches that rely on static memory dumps, nameTool performs dynamic analysis in real time, restricting the search to process heap memory to improve efficiency and accuracy. It employs robust key identification heuristics to minimize false positives and is designed for seamless integration with Endpoint Detection and Response systems. nameTool also encourages extensibility: its open-source nature allows researchers and practitioners to enhance its capabilities with custom key detection algorithms. We validated our approach through extensive experiments involving both proof-of-concept ransomware and real-world samples, demonstrating the effectiveness of key extraction and decryption success. Our tool provides a practical path to strengthening ransomware mitigation strategies.},
keywords = {cryptography, digital forensics, malware, Windows},
pubstate = {published},
tppubtype = {inproceedings}
}
Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
Poster: Exploring the Zero-Shot Potential of Large Language Models for Detecting Algorithmically Generated Domains Proceedings Article
In: Egele, Manuel; Moonsamy, Veelasha; Gruss, Daniel; Carminati, Michele (Ed.): Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, pp. 86–92, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-97623-0.
Abstract | Links | BibTeX | Tags: Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection
@inproceedings{PelayoBenedetRG-DIMVA-25,
title = {Poster: Exploring the Zero-Shot Potential of Large Language Models for Detecting Algorithmically Generated Domains},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
editor = {Manuel Egele and Veelasha Moonsamy and Daniel Gruss and Michele Carminati},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-DIMVA-25.pdf},
doi = {10.1007/978-3-031-97623-0_5},
isbn = {978-3-031-97623-0},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment},
volume = {15748},
pages = {86–92},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Domain generation algorithms enable resilient malware communication by generating pseudo-random domain names. While traditional detection relies on task-specific algorithms, the use of Large Language Models (LLMs) to identify Algorithmically Generated Domains (AGDs) remains largely unexplored. This work evaluates nine LLMs from four major vendors in a zero-shot environment, without fine-tuning. The results show that LLMs can distinguish AGDs from legitimate domains, but they often exhibit a bias, leading to high false positive rates and overconfident predictions. Adding linguistic features offers minimal accuracy gains while increasing complexity and errors. These findings highlight both the promise and limitations of LLMs for AGD detection, indicating the need for further research before practical implementation.},
keywords = {Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection},
pubstate = {published},
tppubtype = {inproceedings}
}
Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
RAMPAGE: A Software Framework To Ensure Reproducibility in Algorithmically Generated Domains Detection Journal Article
In: Expert Systems With Applications, vol. 293, pp. 128629, 2025, ISSN: 0957-4174.
Abstract | Links | BibTeX | Tags: algorithmically generated domains detection, evaluation, machine learning models, malware, neural network models
@article{PelayoBenedetRG-ESWA-25,
title = {RAMPAGE: A Software Framework To Ensure Reproducibility in Algorithmically Generated Domains Detection},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-ESWA-25.pdf},
doi = {10.1016/j.eswa.2025.128629},
issn = {0957-4174},
year = {2025},
date = {2025-12-01},
urldate = {2025-12-01},
journal = {Expert Systems With Applications},
volume = {293},
pages = {128629},
abstract = {As part of its life cycle, malware can establish communication with its command and control server. To bypass static protection techniques, such as blocking certain IPs in firewalls or DNS server deny lists, malware can use em algorithmically generated domains (AGD). Many different solutions based on deep learning have been proposed during the last years to detect this type of domains. However, there is a lack of ability to compare the proposed models because there is no common framework that allows experiments to be replicated under the same conditions. Each previous work shows its evaluation results, but under different experimentation conditions and even with different datasets. In this paper, we address this gap by proposing a software framework, dubbed sc RAMPAGE (em fRAMework to comPAre aGd dEtectors), focused on training and comparing machine learning models for AGD detection. Furthermore, we propose a new model that uses logistic regression and, using sc RAMPAGE to obtain a fair comparison with different state-of-the-art models, achieves slightly better results than those obtained so far. In addition, the dataset built from real-world samples for evaluation, as well as the source code of sc RAMPAGE, are also publicly released to facilitate its use and promote experimental reproducibility in this research field.},
keywords = {algorithmically generated domains detection, evaluation, machine learning models, malware, neural network models},
pubstate = {published},
tppubtype = {article}
}
Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
The Machines are Watching: Exploring the Potential of Large Language Models for Detecting Algorithmically Generated Domains Journal Article
In: Journal of Information Security and Applications, vol. 93, pp. 104176, 2025, ISSN: 2214-2134.
Abstract | Links | BibTeX | Tags: Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection
@article{PelayoBenedetRG-JISA-25,
title = {The Machines are Watching: Exploring the Potential of Large Language Models for Detecting Algorithmically Generated Domains},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-JISA-25.pdf},
doi = {10.1016/j.jisa.2025.104176},
issn = {2214-2134},
year = {2025},
date = {2025-09-01},
journal = {Journal of Information Security and Applications},
volume = {93},
pages = {104176},
abstract = {Algorithmically Generated Domains (AGDs) are integral to many modern malware campaigns, allowing adversaries to establish resilient command and control channels. While machine learning techniques are increasingly employed to detect AGDs, the potential of Large Language Models (LLMs) in this domain remains largely underexplored. In this paper, we examine the ability of nine commercial LLMs to identify malicious AGDs, without parameter tuning or domain-specific training. We evaluate zero-shot approaches and few-shot learning approaches, using minimal labeled examples and diverse datasets with multiple prompt strategies. Our results show that certain LLMs can achieve detection accuracy between 77.3% and 89.3%. In a 10-shot classification setting, the largest models excel at distinguishing between malware families, particularly those employing hash-based generation schemes, underscoring the promise of LLMs for advanced threat detection. However, significant limitations arise when these models encounter real-world DNS traffic. Performance degradation on benign but structurally suspect domains highlights the risk of false positives in operational environments. This shortcoming has real-world consequences for security practitioners, given the need to avoid erroneous domain blocking that disrupt legitimate services. Our findings underscore the practicality of LLM-driven AGD detection, while emphasizing key areas where future research is needed (such as more robust warning design and model refinement) to ensure reliability in production environments.},
keywords = {Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.
A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis Journal Article
In: Data in Brief, vol. PP, no. PP, pp. PP, 2025, ISSN: 2352-3409, (Accepted for publication. To appear.).
Abstract | Links | BibTeX | Tags: Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries
@article{HuiciR-DIB-25b,
title = {A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis},
author = {Daniel Huici and Ricardo J. Rodríguez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciR-DIB-25.pdf},
issn = {2352-3409},
year = {2025},
date = {2025-01-01},
journal = {Data in Brief},
volume = {PP},
number = {PP},
pages = {PP},
abstract = {Similarity digest algorithms, such as TLSH, ssdeep, or sdhash, to name a few, generate intermediate representations (i.e., digests) of digital artifacts to efficiently identify similar objects and measure their degree of similarity. This dataset provides the results of a static analysis performed on system binary files extracted from multiple versions of the Windows operating system, accompanied by their similarity digests. An automated static analysis process was applied to all extracted binaries to decompose them into individual functions and capture detailed metadata for each of them. Specifically, similarity hashes (in particular, TLSH, ssdeep, and LZJD) were computed to enable forensic analysts to effectively assess artifact similarities. The dataset serves as an “allow list” of legitimate Windows artifacts, allowing forensic analysts to detect deviations from trusted binaries, verify system integrity, perform software audits, and improve malware detection efforts. This paper describes the structure of the dataset, the methodology and tools used in its creation, and its value for forensic analysis and cybersecurity investigation.},
note = {Accepted for publication. To appear.},
keywords = {Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries},
pubstate = {published},
tppubtype = {article}
}
Raducu, Razvan; Rodríguez, Ricardo J.; Álvarez, Pedro
MalGraphIQ: A Tool for Generating Behavior Representations of Malware Execution Traces Journal Article
In: SoftwareX, vol. 32, pp. 102407, 2025, ISSN: 2352-7110.
Abstract | Links | BibTeX | Tags: Behavioral Patterns, Comparative Malware Analysis, Execution traces, Malware Analysis, Visual Analytics
@article{Raducu2025a,
title = {MalGraphIQ: A Tool for Generating Behavior Representations of Malware Execution Traces},
author = {Razvan Raducu and Ricardo J. Rodríguez and Pedro Álvarez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/RaducuRA-SoftwareX-25.pdf},
doi = {10.1016/j.softx.2025.102407},
issn = {2352-7110},
year = {2025},
date = {2025-12-01},
urldate = {2025-12-01},
journal = {SoftwareX},
volume = {32},
pages = {102407},
abstract = {Understanding and interpreting malware behavior remains an open challenge in the field of cybersecurity. The dynamic analysis of malware execution traces has emerged as a promising approach for discovering behavioral insights that allow the visual explanation of malware activity. sc MalGraphIQ is an open-source tool for the analysis and visualization of malware behavior. It is based on a structured and hierarchical taxonomy of API-based behavior patterns, which facilitates the interpretation of malware objectives, strategies, and low-level interactions with the attacked system. These interpretations support the comparative analysis of collections of suspicious programs, particularly across malware families and types, enhancing security research, malware triage, and the development of behavior-aware detection systems.},
keywords = {Behavioral Patterns, Comparative Malware Analysis, Execution traces, Malware Analysis, Visual Analytics},
pubstate = {published},
tppubtype = {article}
}
Raducu, Razvan; Villagrasa-Labrador, Alain; Rodríguez, Ricardo J.; Álvarez, Pedro
A Dataset of Windows Malware Execution Traces Journal Article
In: Data in Brief, vol. 63, pp. 112273, 2025, ISSN: 2352-3409.
Abstract | Links | BibTeX | Tags: Behavioral execution trace, Malware dynamic analysis, System calls, Windows API
@article{Raducu2025b,
title = {A Dataset of Windows Malware Execution Traces},
author = {Razvan Raducu and Alain Villagrasa-Labrador and Ricardo J. Rodríguez and Pedro Álvarez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/RaducuVRA-DIB-25.pdf},
doi = {10.1016/j.dib.2025.112273},
issn = {2352-3409},
year = {2025},
date = {2025-12-01},
journal = {Data in Brief},
volume = {63},
pages = {112273},
abstract = {Malware continues to be a major cybersecurity concern, with increasing volume and sophistication making effective detection methods essential. Behavior-based approaches rely on high-quality execution trace data to analyze how malicious software interacts with systems during runtime. Publicly available datasets often lack sufficient detail, contain limited family diversity, or provide only simplified API call sequences. In this paper, we present a dataset that addresses this gap by offering a large collection of richly detailed Windows malware execution traces generated in controlled environments. It has been generated through automated dynamic analysis, executing the malware samples in a controlled virtualized environment, specifically, in the CAPEv2 Sandbox on Windows 10 virtual machines. The raw sandbox analysis reports have been then processed using the MALVADA framework, a modular Python-based pipeline that filters, structures, labels, and standardizes execution traces. The resulting dataset consists of 31,844 JSON execution trace files where each trace contains static metadata, dynamic behavioral information, and labelling fields. The dataset is suitable for reuse in multiple research contexts, including the development and benchmarking of malware detection methods, behavioral clustering, dynamic analysis of malicious software, and automated labelling studies. Its standardized JSON structure facilitates integration with existing data analysis and machine learning pipelines, as well as combination with other datasets for extended studies.},
keywords = {Behavioral execution trace, Malware dynamic analysis, System calls, Windows API},
pubstate = {published},
tppubtype = {article}
}
Uroz, Daniel; Rodr'ıguez, Ricardo J.; Gañán, Carlos H.
Poster: Empirical Analysis of Lifespan Increase of IoT C&C Domains Proceedings Article
In: Proceedings of the 2024 ACM on Internet Measurement Conference, pp. 767–768, Association for Computing Machinery, Madrid, Spain, 2024, ISBN: 9798400705922.
Abstract | Links | BibTeX | Tags: c&c lifetime, iot malware
@inproceedings{UrozRG-IMC-24,
title = {Poster: Empirical Analysis of Lifespan Increase of IoT C&C Domains},
author = {Daniel Uroz and Ricardo J. Rodr'ıguez and Carlos H. Gañán},
doi = {10.1145/3646547.3689670},
isbn = {9798400705922},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 2024 ACM on Internet Measurement Conference},
pages = {767–768},
publisher = {Association for Computing Machinery},
address = {Madrid, Spain},
series = {IMC '24},
abstract = {The increasing prevalence of Internet of Things (IoT) devices have made them attractive targets for malware, highlighting the critical need to understand the dynamics of IoT Command and Control (C&C). While previous research observed short-lived C&Cs, recent observations indicate that the lifespan of domain names linked to IoT botnets is extending, deviating from previously recorded survival rates. To understand and characterize this emerging trend, we collected and examined 1049 IoT malware samples from late 2022 to early 2023, identifying 549 unique domains contacted by these samples. Domains were classified as malicious if detected by VirusTotal or followed a Domain Generation Algorithm pattern. Using data from WhoisXMLAPI and DNSDB Scout, we analyzed registration information and historical DNS resolutions, and identified relationships. Our findings reveal that the majority of C&C domains belong to Qsnatch and Mirai malware families, with an average lifespan of 2.7 years. Notably, seven active domains had an average lifespan of 5.7 years. We also observed a significant number of domains under the .vg and .ws TLDs, but with lack of passive DNS and registration information.},
keywords = {c&c lifetime, iot malware},
pubstate = {published},
tppubtype = {inproceedings}
}