Saeed

PENDULUM: Assessing Sycophancy in MLLMs
Description: “dataset contains 751 images, each paired with approximately 2,000 visual question–answer (VQA) examples. The images span six distinct domains, each presenting different visual and reasoning challenges. For each question, three prompt types are included: a baseline (equilibrium) prompt with no user influence, a positive-influence prompt that provides a correct hint aligned with the visual evidence, and a negative-influence prompt that is misleading or adversarial. ”
[Dataset] [Input Prompts] [BibTeX] [PDF]

@article{Rahman2025PENDULUM,
      title={PENDULUM: A Benchmark for Assessing Sycophancy in Multimodal Large Language Models}, 
      author={A. B. M. Ashikur Rahman and Saeed Anwar and Muhammad Usman and Irfan Ahmad and Ajmal Mian},
      year={2025},
      eprint={2512.19350},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2512.19350}, 
}

IARD: Intruder Activity Recognition Dataset
Description: “IARD contains four classes — Armed Intruder, Door Kick, Intruder Inside, and Lock Breaking. Together, these classes span more than 69,000 frames. The dataset was built from carefully selected YouTube videos, cropped and manually annotated into the target activity classes, with standardized 224×224 frames. IARD was built with strict ethical safeguards: all videos were sourced from public sources, faces were anonymized, no identifying metadata was retained, and any sensitive or inappropriate content was removed. The dataset follows responsible AI and privacy principles.”
[Dataset] [BibTeX] [PDF]

@inproceedings{Ali2025IARD,
	author = {Ali, Shehzad and Islam, Md Tanvir and Lee, Ik Hyun and Anwar, Saeed and Ser, Javier Del and Muhammad, Khan},
	title = {IARD: Intruder Activity Recognition Dataset for Threat Detection},
	isbn = {9798400720406},
	publisher = {Association for Computing Machinery},
	address = {New York, NY, USA},
	url = {https://doi.org/10.1145/3746252.3761619},
	doi = {10.1145/3746252.3761619},
	booktitle = {Proceedings of the 34th ACM International Conference on Information and Knowledge Management},
	pages = {6317–6322},
	numpages = {6},
	year = {2025}
}

HazeSpace2M: Hazy Image Dataset
Description: “a collection of over 2 million images designed to enhance dehazing through haze type classification. HazeSpace2M includes diverse scenes with 10 haze intensity levels, featuring Fog, Cloud, and Environmental Haze.”
[Dataset] [BibTeX] [PDF]

@inproceedings{Tanvir2024hazespace2m,
  title={HazeSpace2M: A Dataset for Haze Aware Single Image Dehazing},
  author={Islam, Md Tanvir and Rahim, Nasir and Anwar, Saeed and Saqib Muhammad and Bakshi, Sambit and Muhammad, Khan},
  booktitle={Proceedings of the 32nd ACM International Conference on Multimedia},
  year={2024},
  doi = {10.1145/3664647.3681382}
}

LoLI-Street: Low-light Images of Streets
Description: “The training consists of 30k, while validation has 3k paired low and high-light images. Moreover, we collected high-resolution videos (4K/8K at 60fps) from various cities under low-light conditions, extracting and manually reviewing frames to create the Real Low-light Testset (RLLT) of our LoLI-Street dataset. We used Photoshop v25.0 to generate the synthetic images of our dataset.”
[Dataset (kaggle)] [Dataset (Google Drive)] [BibTeX] [PDF]

@inproceedings{Islam2024loli,
      title={LoLI-Street: Benchmarking Low-Light Image Enhancement and Beyond}, 
      author={Md Tanvir Islam and Inzamamul Alam and Simon S. Woo and Saeed Anwar and IK Hyun Lee and Khan Muhammad},
      booktitle={Proceedings of the Asian Conference on Computer Vision (ACCV)},
      year={2024}
}

DefAn: Definitive Answer Dataset
Description: “DefAn is a comprehensive evaluation benchmark dataset, with more than 75000 samples, designed to assess the hallucination tendencies of large language models (LLMs). The dataset is categorized into eight knowledge domains: Sports, Census Australia, Nobel Prize, Entertainment, World Organizations, QS Ranking, Conference Venue, and Math. The dataset is structured into two parts: public and hidden.”
[Dataset] [BibTeX] [PDF]

@article{rahman2024defan,
      title={DefAn: Definitive Answer Dataset for LLMs Hallucination Evaluation}, 
      author={A B M Ashikur Rahman and Saeed Anwar and Muhammad Usman and Ajmal Mian},
      year={2024},
      eprint={2406.09155},
      archivePrefix={arXiv},
}

SSID: Sequential Storytelling Image Dataset
Description: “Visual Storytelling Task (VST) takes a set of images as input and aims to generate a coherent story relevant to the input images. We collected and manually annotated images from publicly available documentaries, lifestyles, and movies. In summary, the SSID dataset has 17,365 images of 3,473 unique sets of five images. Each set of images is associated with four ground truths, resulting in 13,892 unique ground truths (i.e., written stories). Each ground truth is composed of five connected sentences written as a story.”
[Dataset] [BibTeX] [PDF]

@article{Malakan2023SSID,
 author={Malakan, Zainy M. and Anwar, Saeed and Hassan, Ghulam Mubashar and Mian, Ajmal},
  journal={IEEE Access}, 
  title={Sequential Vision to Language as Story: A Storytelling Dataset and Benchmarking}, 
  year={2023},
  volume={},
  number={},
  pages={1-1},
  doi={10.1109/ACCESS.2023.3293646}
}

Swan: A Point Cloud Dataset
Description: “A large-scale outdoor point cloud dataset captured with LiDAR sensor and annotated manually for semantic segmentation, instance segmentation and object detection.”
[Dataset] [BibTeX] [PDF]

@article{Ibrahim2023SWAN,
  author={Ibrahim, Muhammad and Akhtar, Naveed and Anwar, Saeed and Mian, Ajmal},
  journal={IEEE Transactions on Intelligent Transportation Systems}, 
  title={SAT3D: Slot Attention Transformer for 3D Point Cloud Semantic Segmentation}, 
  year={2023},
  pages={1-11},
}

Perth-WA Localization: A Point Cloud Dataset
Description: “LiDAR 3D point cloud map of Perth, Western Australia is provided with ground truth locations and orientations.”
[Dataset] [BibTeX] [PDF]

@article{Ibrahim2023slice,
  title={Slice Transformer and Self-supervised Learning for 6DoF Localization in 3D Point Cloud Maps},
  author={Ibrahim, Muhammad and Akhtar, Naveed and Anwar, Saeed and Wise, Michael and Mian, Ajmal},
  journal={IEEE International Conference on Robotics and Automation (ICRA)},
  year={2023}
}

FGBR: Fine Grained Butterflies and Reef Fish Dataset
Description: “FGBR contains two species, butterfly and reef fish. The butterfly dataset has 50 species, consisting of 2613 images, most of which are ecological. The reef fish also has 50 species. Most of the 3825 images in the reef fish dataset are ecological.”
[Dataset] [BibTeX] [PDF]

@article{Anwar2022FGBR,
  title={Towards Low-Cost Classification for Novel Fine-Grained Datasets},
  author={Anwar, Abbas and Anwar, Hafeez and Anwar, Saeed},
  journal={Electronics},
  year={2022}
}

ANUBIS: Australian National University Benchmarking Indoor Skeleton
Description: “ANUBIS is a large-scale human skeleton dataset containing 80 actions. Compared with previously collected datasets, ANUBIS is advantageous in the following four aspects: (1) employing more recently released sensors; (2) containing novel back view; (3) encouraging high enthusiasm of subjects; (4) including actions of the COVID pandemic era.”
[Dataset] [BibTeX] [PDF]

@article{Qin2022ANUBIS,
  title={ANUBIS: Skeleton Action Recognition Dataset, Review, and Benchmark},
  author={Qin, Zhenyue and Liu, Yang and Perera, Madhawa and Gedeon, Tom and Ji, Pan and Kim, Dongwoo and Anwar, Saeed},
  journal={arXiv preprint arXiv:2205.02071},
  year={2022}
}

DVLPD: Diverse Vehicle and License Plates Dataset
Description: “We collect a novel dataset that has 10k images of Pakistani vehicles. These vehicles are trucks, buses, vans, carry vans, and cars. There are two sub-classes of trucks called type1 trucks having a single axle and type2 trucks with double axles. After dataset collection, the pre-processing steps and ground truth generation are performed to train and test the CNN models.”
[Dataset] [BibTeX] [PDF]

@InProceedings{Usama2022VTLPR,
      title={Vehicle and License Plate Recognition with Novel Dataset for Toll Collection}, 
      author={Muhammad Usama and Hafeez Anwar and Muhammad Muaz Shahid and Abbas Anwar and Saeed Anwar and Helmuth Hlavacs},
      year={2022},
      eprint={2202.05631},
      archivePrefix={arXiv},
      primaryClass={eess.IV}
}

HICRD: Heron Island Coral Reef Dataset
Description: “Heron Island Coral Reef Dataset (HICRD) contains 6003 low-quality images, 3673 good-quality images, and 2000 restored images. We use low-quality images and restored images as the unpaired training set (trainA + trainB). In contrast, the paired training set contains good-quality (trainA_paired) images and corresponding restored images (trainB_paired). The test set contains 300 good-quality images (testA) as well as 300 paired restored images (testB) as ground truth. All images are in 1842 x 980 resolution. The copyright belongs to CSIRO (Commonwealth Scientific and Industrial Research Organisation).”
[Dataset] [BibTeX] [arXiv]

@article{Han2021CWR,
      title={Underwater Image Restoration via Contrastive Learning and a Real-world Dataset}, 
      author={Junlin Han and Mehrdad Shoeiby and Tim Malthus and Elizabeth Botha and Janet Anstee and Saeed Anwar and Ran Wei and Mohammad Ali Armin and Hongdong Li and Lars Petersson},
      year={2021},
      eprint={2106.10718},
      archivePrefix={arXiv},
      primaryClass={eess.IV}
}

RRCD: Roman Republican Coin Dataset
Description: “Based on Crawford’s work, we collect the most diverse and extensive image dataset of the reverse sides. For most of the Roman Republic coin classes, the ob- verse side depicts more discriminative information than the observe side. Our dataset has 228 motif classes, including 100 classes that are the main classes for training and testing, which we call the main dataset RRCD-Main. The images of the additional 128 classes constitute the disjoint test set, RRCD-Disjoint, which we allocate to assess the generalization ability of our models.,”
[Dataset] [BibTeX] [PDF] [arXiv]

@article{Anwar2021CoinNet,
title = {Deep Ancient Roman Republican Coin Classification via Feature Fusion and Attention},
journal = {Pattern Recognition},
pages = {107871},
year = {2021},
issn = {0031-3203},
doi = {https://doi.org/10.1016/j.patcog.2021.107871},
url = {https://www.sciencedirect.com/science/article/pii/S0031320321000583},
author = {Hafeez Anwar and Saeed Anwar and Sebastian Zambanini and Fatih Porikli},
}

NCD: Natural-Color Dataset
Description: “The Natural-Color Dataset (NCD) is an image colorization dataset where images are true to their colors. For example, a carrot will have an orange color in most images. Bananas will be either greenish or yellowish. It contains 723 images from the internet distributed in 20 categories. Each image has an object and a white background.”
[Dataset (GrayScale Images)] [Dataset (Color GroundTruth)] [BibTeX] [PDF]

@article{Anwar2020ColorSurvey,
  title={Image Colorization: A Survey and Dataset},
  author={Anwar, Saeed and Tahir, Muhammad and Li, Chongyi and Mian, Ajmal and Khan, Fahad Shahbaz and Muzaffar, Abdul Wahab},
  journal={arXiv preprint arXiv:2008.10774},
  year={2020}
}

UWSD: UnderWater Synthetic Dataset
Description: “To synthesize underwater image degradation datasets, we use the attenuation coefficients described in Table 1 for the different water types of oceanic and coastal classes (i.e., I, IA, IB, II, and III for open ocean waters, and 1, 3, 5, 7, and 9 for coastal waters). Type-I is the clearest and Type-III is the most turbid open ocean water. Similarly, for coastal waters, Type-1 is the clearest and Type-9 is the most turbid. We apply Eqs (1) and (2) (please check the paper) to build ten types of underwater image datasets by using the RGB-D NYU-v2 indoor dataset which consists of 1449 images. To improve the quality of datasets, we crop the original size (480x640) of NYU-v2 to 460x620. This dataset is for non-commercial use only. The size of each dataset is 1.2GB”
[Dataset] [BibTeX] [PR Version] [PDF]

@article{Anwar2019UWE,
  title={Underwater Scene Prior Inspired Deep Underwater Image and Video Enhancement},
  author={Li, Chongyi and Anwar, Saeed},
  journal={Pattern Recognition},
  pages={107038},
  year={2019},
  publisher={Elsevier}
}

SMD: Synthetic Monocular Depth
Description: “ Synthetic Monocular Depth generation code,”
[BibTeX] [PDF] [arXiv] [Code]

@article{Anwar2021DefocusMVA,
  title={Deblur and deep depth from single defocus image},
  author={Anwar, Saeed and Hayder, Zeeshan and Porikli, Fatih},
  journal={Machine Vision and Applications},
  volume={32},
  number={1},
  pages={1--13},
  year={2021},
  publisher={Springer}
}