Carrillo-Mondéjar, Javier; Rodríguez, Ricardo J.
Identifying Runtime Libraries in Statically Linked Linux Binaries Journal Article
In: Future Generation Computer Systems, vol. 164, pp. 107602, 2025, ISSN: 0167-739X.
Abstract | Links | BibTeX | Tags: Binary code analysis, IoT, malware, Runtime library identification, Statically linked binaries
@article{CarrilloR-FGCS-25,
title = {Identifying Runtime Libraries in Statically Linked Linux Binaries},
author = {Javier Carrillo-Mondéjar and Ricardo J. Rodríguez},
url = {http://webdiis.unizar.es/~ricardo/files/papers/CarrilloR-FGCS-25.pdf},
doi = {10.1016/j.future.2024.107602},
issn = {0167-739X},
year = {2025},
date = {2025-01-01},
journal = {Future Generation Computer Systems},
volume = {164},
pages = {107602},
abstract = {Vulnerabilities in unpatched applications can originate from third-party dependencies in statically linked applications, as they must be relinked each time to take advantage of libraries that have been updated to fix any vulnerability. Despite this, malware binaries are often statically linked to ensure they run on target platforms and to complicate malware analysis. In this sense, identification of libraries in malware analysis becomes crucial to help filter out those library functions and focus on malware function analysis. In this paper, we introduce tt MANTILLA, a system for identifying runtime libraries in statically linked Linux-based binaries. Our system is based on radare2 to identify functions and extract their features (independent of the underlying architecture of the binary) through static binary analysis and on the K-nearest neighbors supervised machine learning model and a majority rule to predict final values. tt MANTILLA is evaluated on a dataset consisting of binaries built for different architectures (tt MIPSeb, tt ARMel, tt Intel x86, and tt Intel x86-64) and different runtime libraries (tt uClibc, tt glibc, and tt musl), achieving very high accuracy. We also evaluate it in two case studies. First, using a dataset of binary files belonging to the tt binutils collection and second, using an IoT malware dataset. In both cases, good accuracy results are obtained both in terms of runtime library detection ($94.4%$ and $95.5%$, respectively) and architecture identification ($100%$ and $98.6%$, respectively).},
keywords = {Binary code analysis, IoT, malware, Runtime library identification, Statically linked binaries},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.; Mena, Eduardo
APOTHEOSIS: An efficient approximate similarity search system Journal Article
In: SoftwareX, vol. 29, pp. 102016, 2025, ISSN: 2352-7110.
Abstract | Links | BibTeX | Tags: Approximate K-nearest neighbors, Approximate matching, Approximate search methods, Data similarity analysis, similarity digest algorithms
@article{HuiciRM-SoftX-25,
title = {APOTHEOSIS: An efficient approximate similarity search system},
author = {Daniel Huici and Ricardo J. Rodríguez and Eduardo Mena},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciRM-SoftX-25.pdf},
doi = {10.1016/j.softx.2024.102016},
issn = {2352-7110},
year = {2025},
date = {2025-02-01},
urldate = {2025-02-01},
journal = {SoftwareX},
volume = {29},
pages = {102016},
abstract = {APOTHEOSIS is a tool for efficiently identifying and comparing data similarity in large datasets, addressing challenges faced by traditional methods such as scalability and speed. APOTHEOSIS overcomes them by combining advanced algorithms and data structures, enabling fast and accurate similarity analysis. Specifically, it uses a custom hierarchical small navigation world as an approximate $K$-nearest neighbors search method, and approximate similarity digests algorithms to find common features between similar data items, also supporting various distance metrics beyond vector-based approaches. Our software tool is designed for seamless integration into research workflows, improving reproducibility and facilitating the comparison of large-scale, high-dimensional data comparison across multiple domains.},
keywords = {Approximate K-nearest neighbors, Approximate matching, Approximate search methods, Data similarity analysis, similarity digest algorithms},
pubstate = {published},
tppubtype = {article}
}
Raducu, Razvan; Villagrasa-Labrador, Alain; Rodríguez, Ricardo J.; Álvarez, Pedro
MALVADA: A Framework for Generating Datasets of Malware Execution Traces Journal Article
In: SoftwareX, vol. 30, pp. 102082, 2025, ISSN: 2352-7110.
Abstract | Links | BibTeX | Tags: Dataset generation, Execution traces, Malware behavior, Malware classification
@article{RaducuVRA-SoftwareX-25,
title = {MALVADA: A Framework for Generating Datasets of Malware Execution Traces},
author = {Razvan Raducu and Alain Villagrasa-Labrador and Ricardo J. Rodríguez and Pedro Álvarez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/RaducuVRA-SoftwareX-25.pdf},
doi = {10.1016/j.softx.2025.102082},
issn = {2352-7110},
year = {2025},
date = {2025-05-01},
journal = {SoftwareX},
volume = {30},
pages = {102082},
abstract = {Malware attacks have been growing steadily in recent years, making more sophisticated detection methods necessary. These approaches typically rely on analyzing the behavior of malicious applications, for example by examining execution traces that capture their runtime behavior. However, many existing execution trace datasets are simplified, often resulting in the omission of relevant contextual information, which is essential to capture the full scope of a malware sample’s behavior. This paper introduces MALVADA, a flexible framework designed to generate extensive datasets of execution traces from Windows malware. These traces provide detailed insights into program behaviors and help malware analysts to classify a malware sample. MALVADA facilitates the creation of large datasets with minimal user effort, as demonstrated by the WinMET dataset, which includes execution traces from approximately 10,000 Windows malware samples.},
keywords = {Dataset generation, Execution traces, Malware behavior, Malware classification},
pubstate = {published},
tppubtype = {article}
}
Filho, Ailton Santos; Rodríguez, Ricardo J.; Feitosa, Eduardo L.
Automated broken object-level authorization attack detection in REST APIs through OpenAPI to colored petri nets transformation Journal Article
In: International Journal of Information Security, vol. 24, no. 2, pp. 83, 2025, ISSN: 1615-5270.
Abstract | Links | BibTeX | Tags: Broken access control, Colored Petri nets, OpenAPI, RESTful web services, Security analysis, vulnerabilities, Web application security
@article{SantosFilhoRF-IJIS-25,
title = {Automated broken object-level authorization attack detection in REST APIs through OpenAPI to colored petri nets transformation},
author = {Ailton Santos Filho and Ricardo J. Rodríguez and Eduardo L. Feitosa},
url = {https://webdiis.unizar.es/~ricardo/files/papers/SantosFilhoRF-IJIS-25.pdf},
doi = {10.1007/s10207-024-00970-5},
issn = {1615-5270},
year = {2025},
date = {2025-02-01},
journal = {International Journal of Information Security},
volume = {24},
number = {2},
pages = {83},
abstract = {The representational state transfer architectural style (REST) specifies a set of rules for creating web services. In REST, data and functionality are considered resources, accessed, and manipulated using a uniform, well-defined set of rules. RESTful web services are web services that follow the REST architectural style and are exposed to the Internet using RESTful APIs. Most of them are described by OpenAPI, a standard language-independent interface for RESTful APIs. RESTful APIs are continuously available on the Internet and are therefore a common target for cyberattacks. To prevent vulnerabilities and reduce risks in web systems, there are several security guidelines available, such as those provided by the Open Web Application Security Project (OWASP) foundation. A common vulnerability in web services is broken object level authorization (BOLA), which allows an attacker to modify or delete data or perform actions intended only for authorized users. For example, an attacker can change an order status, delete a user account, or add unauthorized data to the server. In this paper, we propose a transformation from OpenAPI to Petri nets, which enables formal modeling and analysis of REST APIs using existing Petri net analysis techniques to detect potential security risks directly from the analysis of web server logs. In addition, we also provide a tool, named Links2CPN, which automatically performs model transformation (taking the OpenAPI specification as input) and BOLA attack detection by analyzing web server execution traces. We apply it to a case study of a vulnerable web application to demonstrate its applicability. Our results show that it is capable of detecting BOLA attacks with an accuracy greater than 95% in the proposed scenarios.},
keywords = {Broken access control, Colored Petri nets, OpenAPI, RESTful web services, Security analysis, vulnerabilities, Web application security},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.; Mena, Eduardo
An Extensible and Scalable System for Hash Lookup and Approximate Similarity Search with Similarity Digest Algorithms Journal Article
In: Forensic Science International: Digital Investigation, vol. 53, pp. 301930, 2025, ISSN: 2666-2817, (DFRWS USA 2025 - Selected Papers from the 25th Annual Digital Forensics Research Conference USA).
Abstract | Links | BibTeX | Tags: Approximate matching, hash lookup, similarity digest algorithms, Similarity hashing, similarity search
@article{HuiciRM-FSIDI-25,
title = {An Extensible and Scalable System for Hash Lookup and Approximate Similarity Search with Similarity Digest Algorithms},
author = {Daniel Huici and Ricardo J. Rodríguez and Eduardo Mena},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciRM-FSIDI-25.pdf},
doi = {10.1016/j.fsidi.2025.301930},
issn = {2666-2817},
year = {2025},
date = {2025-07-01},
journal = {Forensic Science International: Digital Investigation},
volume = {53},
pages = {301930},
abstract = {Efficient management and analysis of large volumes of digital data has emerged as a major challenge in the field of digital forensics. To quickly identify and analyze relevant artifacts within large datasets, we introduce tt Apotheosis, an approximate similarity search system designed for scalability and efficiency. Our system integrates approximate search techniques (which allow searching for a match on a close value) with Similarity Digest Algorithms (SDA; which capture common features between similar elements), using a space-saving radix tree and a graph-based hierarchical navigable small world structure to perform fast approximate nearest neighbor searches. We demonstrate the effectiveness and versatility of our system through two key case studies: first, in plagiarism detection, demonstrating the effectiveness of our system in identifying similar or duplicate documents within a large source code dataset; then, in memory artifact detection, showing its scalability and performance in processing large-scale forensic data collected from various versions of Microsoft Windows. Our comprehensive evaluation shows that tt Apotheosis not only efficiently handles large datasets, but also provides a way to evaluate the performance of various SDA and their approximate similarity search in different forensic scenarios.},
note = {DFRWS USA 2025 - Selected Papers from the 25th Annual Digital Forensics Research Conference USA},
keywords = {Approximate matching, hash lookup, similarity digest algorithms, Similarity hashing, similarity search},
pubstate = {published},
tppubtype = {article}
}
Abascal, León; Rodríguez, Ricardo J.
Poster: Extracting Cryptographic Keys from Windows Live Processes Proceedings Article
In: Egele, Manuel; Moonsamy, Veelasha; Gruss, Daniel; Carminati, Michele (Ed.): Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, pp. 213–219, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-97620-9.
Abstract | Links | BibTeX | Tags: cryptography, digital forensics, malware, Windows
@inproceedings{AbascalR-DIMVA-25,
title = {Poster: Extracting Cryptographic Keys from Windows Live Processes},
author = {León Abascal and Ricardo J. Rodríguez},
editor = {Manuel Egele and Veelasha Moonsamy and Daniel Gruss and Michele Carminati},
url = {https://webdiis.unizar.es/~ricardo/files/papers/AbascalR-DIMVA-25.pdf},
doi = {10.1007/978-3-031-97620-9_12},
isbn = {978-3-031-97620-9},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment},
volume = {15748},
pages = {213–219},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Cryptographic keys are a fundamental aspect of modern system security, but when compromised, they become a critical vulnerability, especially in ransomware attacks. Paradoxically, these keys must be available in memory at runtime to function, creating a unique opportunity for defensive tools. We introduce nameTool, an open-source tool designed to locate cryptographic keys in active Windows processes using advanced memory analysis. Unlike traditional approaches that rely on static memory dumps, nameTool performs dynamic analysis in real time, restricting the search to process heap memory to improve efficiency and accuracy. It employs robust key identification heuristics to minimize false positives and is designed for seamless integration with Endpoint Detection and Response systems. nameTool also encourages extensibility: its open-source nature allows researchers and practitioners to enhance its capabilities with custom key detection algorithms. We validated our approach through extensive experiments involving both proof-of-concept ransomware and real-world samples, demonstrating the effectiveness of key extraction and decryption success. Our tool provides a practical path to strengthening ransomware mitigation strategies.},
keywords = {cryptography, digital forensics, malware, Windows},
pubstate = {published},
tppubtype = {inproceedings}
}
Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
Poster: Exploring the Zero-Shot Potential of Large Language Models for Detecting Algorithmically Generated Domains Proceedings Article
In: Egele, Manuel; Moonsamy, Veelasha; Gruss, Daniel; Carminati, Michele (Ed.): Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment, pp. 86–92, Springer Nature Switzerland, Cham, 2025, ISBN: 978-3-031-97623-0.
Abstract | Links | BibTeX | Tags: Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection
@inproceedings{PelayoBenedetRG-DIMVA-25,
title = {Poster: Exploring the Zero-Shot Potential of Large Language Models for Detecting Algorithmically Generated Domains},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
editor = {Manuel Egele and Veelasha Moonsamy and Daniel Gruss and Michele Carminati},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-DIMVA-25.pdf},
doi = {10.1007/978-3-031-97623-0_5},
isbn = {978-3-031-97623-0},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 22nd International Conference on Detection of Intrusions and Malware, and Vulnerability Assessment},
volume = {15748},
pages = {86–92},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {Domain generation algorithms enable resilient malware communication by generating pseudo-random domain names. While traditional detection relies on task-specific algorithms, the use of Large Language Models (LLMs) to identify Algorithmically Generated Domains (AGDs) remains largely unexplored. This work evaluates nine LLMs from four major vendors in a zero-shot environment, without fine-tuning. The results show that LLMs can distinguish AGDs from legitimate domains, but they often exhibit a bias, leading to high false positive rates and overconfident predictions. Adding linguistic features offers minimal accuracy gains while increasing complexity and errors. These findings highlight both the promise and limitations of LLMs for AGD detection, indicating the need for further research before practical implementation.},
keywords = {Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection},
pubstate = {published},
tppubtype = {inproceedings}
}
Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
RAMPAGE: A Software Framework To Ensure Reproducibility in Algorithmically Generated Domains Detection Journal Article
In: Expert Systems With Applications, vol. 293, pp. 128629, 2025, ISSN: 0957-4174.
Abstract | Links | BibTeX | Tags: algorithmically generated domains detection, evaluation, machine learning models, malware, neural network models
@article{PelayoBenedetRG-ESWA-25,
title = {RAMPAGE: A Software Framework To Ensure Reproducibility in Algorithmically Generated Domains Detection},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-ESWA-25.pdf},
doi = {10.1016/j.eswa.2025.128629},
issn = {0957-4174},
year = {2025},
date = {2025-12-01},
urldate = {2025-12-01},
journal = {Expert Systems With Applications},
volume = {293},
pages = {128629},
abstract = {As part of its life cycle, malware can establish communication with its command and control server. To bypass static protection techniques, such as blocking certain IPs in firewalls or DNS server deny lists, malware can use em algorithmically generated domains (AGD). Many different solutions based on deep learning have been proposed during the last years to detect this type of domains. However, there is a lack of ability to compare the proposed models because there is no common framework that allows experiments to be replicated under the same conditions. Each previous work shows its evaluation results, but under different experimentation conditions and even with different datasets. In this paper, we address this gap by proposing a software framework, dubbed sc RAMPAGE (em fRAMework to comPAre aGd dEtectors), focused on training and comparing machine learning models for AGD detection. Furthermore, we propose a new model that uses logistic regression and, using sc RAMPAGE to obtain a fair comparison with different state-of-the-art models, achieves slightly better results than those obtained so far. In addition, the dataset built from real-world samples for evaluation, as well as the source code of sc RAMPAGE, are also publicly released to facilitate its use and promote experimental reproducibility in this research field.},
keywords = {algorithmically generated domains detection, evaluation, machine learning models, malware, neural network models},
pubstate = {published},
tppubtype = {article}
}
Pelayo-Benedet, Tomás; Rodríguez, Ricardo J.; Gañán, Carlos H.
The Machines are Watching: Exploring the Potential of Large Language Models for Detecting Algorithmically Generated Domains Journal Article
In: Journal of Information Security and Applications, vol. 93, pp. 104176, 2025, ISSN: 2214-2134.
Abstract | Links | BibTeX | Tags: Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection
@article{PelayoBenedetRG-JISA-25,
title = {The Machines are Watching: Exploring the Potential of Large Language Models for Detecting Algorithmically Generated Domains},
author = {Tomás Pelayo-Benedet and Ricardo J. Rodríguez and Carlos H. Gañán},
url = {https://webdiis.unizar.es/~ricardo/files/papers/PelayoBenedetRG-JISA-25.pdf},
doi = {10.1016/j.jisa.2025.104176},
issn = {2214-2134},
year = {2025},
date = {2025-09-01},
journal = {Journal of Information Security and Applications},
volume = {93},
pages = {104176},
abstract = {Algorithmically Generated Domains (AGDs) are integral to many modern malware campaigns, allowing adversaries to establish resilient command and control channels. While machine learning techniques are increasingly employed to detect AGDs, the potential of Large Language Models (LLMs) in this domain remains largely underexplored. In this paper, we examine the ability of nine commercial LLMs to identify malicious AGDs, without parameter tuning or domain-specific training. We evaluate zero-shot approaches and few-shot learning approaches, using minimal labeled examples and diverse datasets with multiple prompt strategies. Our results show that certain LLMs can achieve detection accuracy between 77.3% and 89.3%. In a 10-shot classification setting, the largest models excel at distinguishing between malware families, particularly those employing hash-based generation schemes, underscoring the promise of LLMs for advanced threat detection. However, significant limitations arise when these models encounter real-world DNS traffic. Performance degradation on benign but structurally suspect domains highlights the risk of false positives in operational environments. This shortcoming has real-world consequences for security practitioners, given the need to avoid erroneous domain blocking that disrupt legitimate services. Our findings underscore the practicality of LLM-driven AGD detection, while emphasizing key areas where future research is needed (such as more robust warning design and model refinement) to ensure reliability in production environments.},
keywords = {Algorithmically Generated Domains, DNS Traffic Analysis, Large Language Models, Malware Detection},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.
A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis Journal Article
In: Data in Brief, vol. PP, no. PP, pp. PP, 2025, ISSN: 2352-3409, (Accepted for publication. To appear.).
Abstract | Links | BibTeX | Tags:
@article{HuiciR-DIB-25,
title = {A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis},
author = {Daniel Huici and Ricardo J. Rodríguez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciR-DIB-25.pdf},
issn = {2352-3409},
year = {2025},
date = {2025-01-01},
journal = {Data in Brief},
volume = {PP},
number = {PP},
pages = {PP},
abstract = {Similarity digest algorithms, such as TLSH, ssdeep, or sdhash, to name a few, generate intermediate representations (i.e., digests) of digital artifacts to efficiently identify similar objects and measure their degree of similarity. This dataset provides the results of a static analysis performed on system binary files extracted from multiple versions of the Windows operating system, accompanied by their similarity digests. An automated static analysis process was applied to all extracted binaries to decompose them into individual functions and capture detailed metadata for each of them. Specifically, similarity hashes (in particular, TLSH, ssdeep, and LZJD) were computed to enable forensic analysts to effectively assess artifact similarities. The dataset serves as an “allow list” of legitimate Windows artifacts, allowing forensic analysts to detect deviations from trusted binaries, verify system integrity, perform software audits, and improve malware detection efforts. This paper describes the structure of the dataset, the methodology and tools used in its creation, and its value for forensic analysis and cybersecurity investigation.},
note = {Accepted for publication. To appear.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Huici, Daniel; Rodríguez, Ricardo J.
A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis Journal Article
In: Data in Brief, vol. PP, no. PP, pp. PP, 2025, ISSN: 2352-3409, (Accepted for publication. To appear.).
Abstract | Links | BibTeX | Tags: Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries
@article{HuiciR-DIB-25b,
title = {A Dataset of Windows System Binaries and Similarity Digests for Enhanced Forensic Analysis},
author = {Daniel Huici and Ricardo J. Rodríguez},
url = {https://webdiis.unizar.es/~ricardo/files/papers/HuiciR-DIB-25.pdf},
issn = {2352-3409},
year = {2025},
date = {2025-01-01},
journal = {Data in Brief},
volume = {PP},
number = {PP},
pages = {PP},
abstract = {Similarity digest algorithms, such as TLSH, ssdeep, or sdhash, to name a few, generate intermediate representations (i.e., digests) of digital artifacts to efficiently identify similar objects and measure their degree of similarity. This dataset provides the results of a static analysis performed on system binary files extracted from multiple versions of the Windows operating system, accompanied by their similarity digests. An automated static analysis process was applied to all extracted binaries to decompose them into individual functions and capture detailed metadata for each of them. Specifically, similarity hashes (in particular, TLSH, ssdeep, and LZJD) were computed to enable forensic analysts to effectively assess artifact similarities. The dataset serves as an “allow list” of legitimate Windows artifacts, allowing forensic analysts to detect deviations from trusted binaries, verify system integrity, perform software audits, and improve malware detection efforts. This paper describes the structure of the dataset, the methodology and tools used in its creation, and its value for forensic analysis and cybersecurity investigation.},
note = {Accepted for publication. To appear.},
keywords = {Approximate matching, forensic artifacts, Malware Detection, operating system Windows, Similarity digest algorithm, Static Analysis, system binaries},
pubstate = {published},
tppubtype = {article}
}
Uroz, Daniel; Rodr'ıguez, Ricardo J.; Gañán, Carlos H.
Poster: Empirical Analysis of Lifespan Increase of IoT C&C Domains Proceedings Article
In: Proceedings of the 2024 ACM on Internet Measurement Conference, pp. 767–768, Association for Computing Machinery, Madrid, Spain, 2024, ISBN: 9798400705922.
Abstract | Links | BibTeX | Tags: c&c lifetime, iot malware
@inproceedings{UrozRG-IMC-24,
title = {Poster: Empirical Analysis of Lifespan Increase of IoT C&C Domains},
author = {Daniel Uroz and Ricardo J. Rodr'ıguez and Carlos H. Gañán},
doi = {10.1145/3646547.3689670},
isbn = {9798400705922},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 2024 ACM on Internet Measurement Conference},
pages = {767–768},
publisher = {Association for Computing Machinery},
address = {Madrid, Spain},
series = {IMC '24},
abstract = {The increasing prevalence of Internet of Things (IoT) devices have made them attractive targets for malware, highlighting the critical need to understand the dynamics of IoT Command and Control (C&C). While previous research observed short-lived C&Cs, recent observations indicate that the lifespan of domain names linked to IoT botnets is extending, deviating from previously recorded survival rates. To understand and characterize this emerging trend, we collected and examined 1049 IoT malware samples from late 2022 to early 2023, identifying 549 unique domains contacted by these samples. Domains were classified as malicious if detected by VirusTotal or followed a Domain Generation Algorithm pattern. Using data from WhoisXMLAPI and DNSDB Scout, we analyzed registration information and historical DNS resolutions, and identified relationships. Our findings reveal that the majority of C&C domains belong to Qsnatch and Mirai malware families, with an average lifespan of 2.7 years. Notably, seven active domains had an average lifespan of 5.7 years. We also observed a significant number of domains under the .vg and .ws TLDs, but with lack of passive DNS and registration information.},
keywords = {c&c lifetime, iot malware},
pubstate = {published},
tppubtype = {inproceedings}
}
Cambronero, María Emilia; Martínez, Miguel A.; Llana, Luis; Rodríguez, Ricardo J.; Russo, Alejandro
Towards a GDPR-compliant cloud architecture with data privacy controlled through sticky policies Journal Article
In: PeerJ Computer Science, vol. 10:e1898, pp. 1–44, 2024.
Abstract | Links | BibTeX | Tags: Cloud computing, Data privacy, Datatracking, General data protection regulation, Model validation, Object Constraint Language, Sticky policies, UMLprofiling, Unified Modeling Language
@article{CMLRR-PeerJ-24,
title = {Towards a GDPR-compliant cloud architecture with data privacy controlled through sticky policies},
author = {María Emilia Cambronero and Miguel A. Martínez and Luis Llana and Ricardo J. Rodríguez and Alejandro Russo},
url = {http://webdiis.unizar.es/~ricardo/files/papers/CMLRR-PeerJ-24.pdf},
doi = {10.7717/peerj-cs.1898},
year = {2024},
date = {2024-03-01},
journal = {PeerJ Computer Science},
volume = {10:e1898},
pages = {1–44},
abstract = {Data privacy is one of the biggest challenges facing system architects at the systemdesign stage. Especially when certain laws, such as the General Data ProtectionRegulation (GDPR), have to be complied with by cloud environments. In this article,we want to help cloud providers comply with the GDPR by proposing aGDPR-compliant cloud architecture. To do this, we use model-driven engineeringtechniques to design cloud architecture and analyze cloud interactions. In particular,we develop a complete framework, called MDCT, which includes a Unified ModelingLanguage profile that allows us to define specific cloud scenarios and profilevalidation to ensure that certain required properties are met. The validation processis implemented through the Object Constraint Language (OCL) rules, which allow usto describe the constraints in these models. To comply with many GDPR articles, theproposed cloud architecture considers data privacy and data tracking, enabling safeand secure data management and tracking in the context of the cloud. For thispurpose, sticky policies associated with the data are incorporated to definepermission for third parties to access the data and track instances of data access. As aresult, a cloud architecture designed with MDCT contains a set of OCL rules tovalidate it as a GDPR-compliant cloud architecture. Our tool models key GDPRpoints such as user consent/withdrawal, the purpose of access, and data transparencyand auditing, and considers data privacy and data tracking with the help of stickypolicies.},
keywords = {Cloud computing, Data privacy, Datatracking, General data protection regulation, Model validation, Object Constraint Language, Sticky policies, UMLprofiling, Unified Modeling Language},
pubstate = {published},
tppubtype = {article}
}
Carrillo-Mondéjar, Javier; Suárez-Tangil, Guillermo; Costin, Andrei; Rodríguez, Ricardo J.
Exploring Shifting Patterns in Recent IoT Malware Proceedings Article
In: Proceedings of the 23rd European Conference on Cyber Warfare and Security (ECCWS), pp. 96–106, ACI, 2024.
Abstract | Links | BibTeX | Tags: Dynamic Analysis, Malware Evolution, Malware IoT, Malware lineage, Static Analysis
@inproceedings{CSCR-ECCWS-24b,
title = {Exploring Shifting Patterns in Recent IoT Malware},
author = {Javier Carrillo-Mondéjar and Guillermo Suárez-Tangil and Andrei Costin and Ricardo J. Rodríguez},
url = {http://webdiis.unizar.es/~ricardo/files/papers/CSCR-ECCWS-24.pdf},
doi = {10.34190/eccws.23.1.2280},
year = {2024},
date = {2024-07-01},
booktitle = {Proceedings of the 23rd European Conference on Cyber Warfare and Security (ECCWS)},
volume = {23},
number = {1},
pages = {96–106},
publisher = {ACI},
abstract = {The rise of malware targeting interconnected infrastructures has surged in recent years, driven largely by the widespread presence of vulnerable legacy IoT devices and inadequately secured networks. Despite the strong interest attackers have in targeting this infrastructure, a significant gap remains in understanding how the landscape has recently evolved. Addressing this knowledge gap is essential to thwarting the proliferation of massive botnets, thereby safeguarding end-users and preventing disruptions in critical infrastructures. This work offers a contemporary analysis of Linux-based malware, specifically tailored to IoT malware operating in 2021-2023. Using automated techniques involving both static and dynamic analysis, we classify malware into related threats. By scrutinizing the most recent dataset of Linux-based malware and comparing it to previous studies, we unveil distinctive insights into emerging trends, offering an unparalleled understanding of the evolving landscape. Although Mirai and Gafgyt remain the most prominent families and present a large number of variants, our results show that (i) there is an increase in the sophistication of malware, (ii) malware authors are adding new exploits to their arsenal, and (iii) malware families that originally attacked Windows systems have been adapted to attack Linux-based devices.},
keywords = {Dynamic Analysis, Malware Evolution, Malware IoT, Malware lineage, Static Analysis},
pubstate = {published},
tppubtype = {inproceedings}
}
Mlot, Esteban Damián Gutiérrez; Saldana, Jose; Rodríguez, Ricardo J.; Kotsiuba, Igor; Gañan, Carlos H.
A dataset to train intrusion detection systems based on machine learning models for electrical substations Journal Article
In: Data in Brief, vol. 57, pp. 111153, 2024, ISSN: 2352-3409.
Abstract | Links | BibTeX | Tags: critical infrastructure, cybersecurity, IEC104, IEC60870-5-104, IEC61850, testbed
@article{MlotSRKG-DIB-24,
title = {A dataset to train intrusion detection systems based on machine learning models for electrical substations},
author = {Esteban Damián Gutiérrez Mlot and Jose Saldana and Ricardo J. Rodríguez and Igor Kotsiuba and Carlos H. Gañan},
url = {https://webdiis.unizar.es/~ricardo/files/papers/GutierrezMlotSRKG-DIB-24.pdf},
doi = {10.1016/j.dib.2024.111153},
issn = {2352-3409},
year = {2024},
date = {2024-12-01},
journal = {Data in Brief},
volume = {57},
pages = {111153},
abstract = {The growing integration of Information and Communication Technology into Operational Technology environments in electrical substations exposes them to new cybersecurity threats. This paper presents a comprehensive dataset of substation traffic, aimed at improving the training and benchmarking of Intrusion Detection Systems (IDS) installed in these facilities that are based on machine learning techniques. The dataset includes raw network captures and flows from real substations, filtered and anonymized to ensure privacy. It covers the main protocols and standards used in substation environments: IEC61850, IEC104, NTP, and PTP. Additionally, the dataset includes traces obtained during several cyberattacks, which were simulated in a controlled laboratory environment, providing a rich resource for developing and testing machine learning models for cybersecurity applications in substations. A set of complementary tools for dataset creation and preprocessing are also included to standardize the methodology, ensuring consistency and reproducibility. In summary, the dataset addresses the critical need for high-quality, targeted data for tuning IDS at electrical substations and contributes to the advancement of secure and reliable power distribution networks.},
keywords = {critical infrastructure, cybersecurity, IEC104, IEC60870-5-104, IEC61850, testbed},
pubstate = {published},
tppubtype = {article}
}