@ARTICLE{10804123, author={Seyedfaraji, Saeed and Shakibhamedan, Salar and Seyedfaraji, Amire and Mesgari, Baset and Taherinejad, Nima and Jantsch, Axel and Rehman, Semeen}, journal={IEEE Journal on Exploratory Solid-State Computational Devices and Circuits}, title={E-MAC: Enhanced In-SRAM MAC Accuracy via Digital-to-Time Modulation}, year={2024}, volume={10}, number={}, pages={178-186}, keywords={Voltage;Circuits;Accuracy;Transistors;Discharges (electric);Complexity theory;Energy efficiency;Microprocessors;Memory management;Logic gates;6T-static random access memory (SRAM);convolutional neural network (CNN);image classification;processing in memory (PIM)}, doi={10.1109/JXCDC.2024.3518633}}
In this article, we introduce a novel technique called E-multiplication and accumulation (MAC) (EMAC), aimed at enhancing energy efficiency, reducing latency, and improving the accuracy of analog-based in-static random access memory (SRAM) MAC accelerators. Our approach involves a digital-to-time word-line (WL) modulation technique that encodes the WL voltage while preserving the necessary linear voltage drop for precise computations. This eliminates the need for an additional digital-to-analog converter (DAC) in the design. Furthermore, the SRAM-based logical weight encoding scheme we present reduces the reliance on capacitance-based techniques, which typically introduce area overhead in the circuit. This approach ensures consistent voltage drops for all equivalent cases [i.e., (a×b)=(b×a) ], addressing a persistent issue in existing state-of-the-art methods. Compared with state-of-the-art analog-based in-SRAM techniques, our E-MAC approach demonstrates significant energy savings ( 1.89× ) and improved accuracy (73.25%) per MAC computation from a 1-V power supply, while achieving a 11.84× energy efficiency improvement over baseline digital approaches. Our application analysis shows a marginal overall reduction in accuracy, i.e., a 0.1% and 0.17% reduction for LeNet5-based CNN and VGG16, respectively, when trained on the MNIST and ImageNet datasets.
@article{P2AAC_2024, title = {Approximated 2–bit adders for parallel in–memristor computing with a novel sum–of–product architecture}, author = {Simonides, Christian and Gausepohl, Dominik and Hinkel, Peter and Seiler, Fabian and Taherinejad, Nima}, year = 2024, journal = {IEEE Journal on Exploratory Solid-State Computational Devices and Circuits}, journal = {IEEE Journal on Exploratory Solid-State Computational Devices and Circuits}, doi = {10.1109/jxcdc.2024.3497720} }
Conventional computing methods struggle with the exponentially increasing demand for computational power, caused by applications including image processing and Machine Learning (ML). Novel computing paradigms such as In–Memory Computing (IMC) and Approximate Computing (AxC) provide promising solutions to this problem. Due to their low energy consumption and inherent ability to store data in a non–volatile fashion, memristors are an increasingly popular choice in these fields. There is a wide range of logic forms compatible with memristive IMC, each offering different advantages. We present a novel mixed–logic solution that utilizes properties of the Sum–of–Products (SOP) representation and propose a full–adder circuit that works efficiently in 2–bit units. To further improve the speed, area usage, and energy consumption, we propose two additional Approximate (Ax) 2–bit adders that exhibit inherent parallelization capabilities. We apply the proposed adders in selected image processing applications, where our Ax approach reduces the energy consumption by 31%–40% and improves the speed by 50%. To demonstrate the potential gains of our approximations in more complex applications, we applied them in ML. Our experiments indicate that with up to 6/16 Ax adders there is no accuracy degradation when applied in a Convolutional Neural Network (CNN) that is evaluated on MNIST. Our approach can save up to 125.6mJ of energy and 505 million steps compared to our exact approach.
@INPROCEEDINGS{seiler2025serial, author={Seiler, Fabian and TaheriNejad, Nima}, booktitle={2025 IEEE 16th Latin America Symposium on Circuits and Systems (LASCAS)}, title={An Improved Serial IMPLY Adder Algorithm for Efficient Neural Network Applications}, year={2025}, volume={}, number={}, pages={1-5}, keywords={Adder;Energy-efficient;IMPLY Logic;In-Memory Computing;Memristor;Neural Networks}, doi={}, }
Memristive systems are one of the most promising candidates for a post-CMOS era. They are small, energy-efficient, and are ideal targets for In-Memory Computation (IMC) via stateful logic. As adders are critical building blocks for any computing systems, improving them is an essential design goal. With the rise of Artificial Intelligence (AI), providing memristive adders that are optimized for Neural Networks (NNs) is extremely important. For this, we propose a Material Implication (IMPLY) based adder algorithm in the serial topology that can preserve the weights in memory, which was not addressed in the State-of-the-Art (SoA). Our approach is 20% − 23% faster and requires 1% − 12% less energy when the adder is used repeatedly. We propose a flowchart for IMPLY-based algorithms that can represent the state changes of individual memristors and apply it to our adder. We embed our adder in a shift-and-add multiplier and evaluate the potential gains on the 8-bit quantized ResNet18. Our approach is up to 17% more energy-efficient and requires up to 20% fewer cycles for the inference than SoA adder.
@inproceedings{ shakibhamedan2024an, title={An Analytical Approach to Enhancing {DNN} Efficiency and Accuracy Using Approximate Multiplication}, author={Salar Shakibhamedan and Anice Jahanjoo and Amin Aminifar and Nima Amirafshar and Nima TaheriNejad and Axel Jantsch}, booktitle={2nd Workshop on Advancing Neural Network Training: Computational Efficiency, Scalability, and Resource Optimization (WANT@ICML 2024)}, year={2024}, url={https://openreview.net/forum?id=rver7enVfY} }
Achieving higher accuracy in Deep Neural Networks (DNNs) often reaches a plateau despite extensive training, retraining, and fine-tuning. This paper introduces an analytical study using approximate multipliers to investigate potential accuracy improvements. Leveraging the principles of the Information Bottleneck (IB) theory, we analyze the enhanced information and feature extraction capabilities provided by approximate multipliers. Through Information Plane (IP) analysis, we gain a detailed understanding of DNN behavior under this approach. Our analysis indicates that this technique can break through existing accuracy barriers while offering computational and energy efficiency benefits. Compared to traditional methods that are computationally intensive, our approach uses less demanding optimization techniques. Additionally, approximate multipliers contribute to reduced energy consumption during both the training and inference phases. Experimental results support the potential of this method, suggesting it is a promising direction for DNN optimization.
@misc{seiler2024atomic, title={ATOMIC: Automatic Tool for Memristive IMPLY-based Circuit-level Simulation and Validation}, author={Fabian Seiler and Nima TaheriNejad}, year={2024}, eprint={2410.15893}, archivePrefix={arXiv}, primaryClass={cs.ET}, url={https://arxiv.org/abs/2410.15893}, }
Since performance improvements of computers are stagnating, new technologies and computer paradigms are hot research topics. Memristor-based In-Memory Computing is one of the promising candidates for the post-CMOS era, which comes in many flavors. Processing In memory Array (PIA) or using memory, is on of them which is a relatively new approach, and substantially different than traditional CMOS-based logic design. Consequently, there is a lack of publicly available CAD tools for memristive PIA design and evaluation. Here, we present ATOMIC: an Automatic Tool for Memristive IMPLY-based Circuit-level Simulation and Validation. Using our tool, a large portion of the simulation, evaluation, and validation process can be performed automatically, drastically reducing the development time for memristive PIA systems, in particular those using IMPLY logic. The code is available at https://github.com/fabianseiler/ATOMIC.
@ARTICLE{10670396, author={Chatur, Ameya and Haghi, Mostafa and Ganapathy, Nagarajan and Taherinejad, Nima and Seepold, Ralf and Madrid, Natividad Martínez}, journal={IEEE Access}, title={Advanced Classifiers and Feature Reduction for Accurate Insomnia Detection Using Multimodal Dataset}, year={2024}, volume={}, number={}, pages={1-1}, keywords={Heart rate variability;Support vector machines;Sleep;Principal component analysis;Accuracy;Feature extraction;Resonant frequency;Detection algorithms;Actigraphy;classification;feature reduction;heart rate variability;insomnia}, doi={10.1109/ACCESS.2024.3456904}}
Sleep deprivation is a significant contributor to various diseases, leading to poor cognitive function, decreased performance, and heart disorders. Insomnia, the most prevalent sleep disorder, requires more effective diagnosis and screening for proper treatment. Actigraphic data and its combination with physiological sensors like electroencephalogram (EEG), electrocardiogram (ECG), and body temperature have proven significant in predicting insomnia using machine learning methods. Studies focusing solely on actigraphic data achieved an accuracy of 84%, combining it with other wearable devices increased accuracy to 88%, and 2-channel EEG alone yielded an accuracy of 92%, but limits scalability and practicality in real-world settings. Here we show that using the hybrid approach of incorporating both recursive feature elimination (RFE) and principal component analysis (PCA) on sleep and heart data features yields outstanding results, with the multi-layer perception (MLP) achieving an accuracy of 95.83% and an F1 score of 0.93. The top-ranked features are predominantly sleep-related and time-domain RR interval. Our findings emphasize the importance of tailoring feature sets and employing appropriate reduction techniques for optimal predictive modeling in sleep-related studies. Our results demonstrate that the ensemble classifiers generalize well on the dataset regardless of the feature count, while other algorithms are hindered by the curse of dimensionality.
@article{tohidinejad2024designing, title={Designing a Hybrid Energy-Efficient Harvesting System for Head-or Wrist-Worn Healthcare Wearable Devices}, author={Tohidinejad, Zahra and Danyali, Saeed and Valizadeh, Majid and Seepold, Ralf and TaheriNejad, Nima and Haghi, Mostafa}, journal={Sensors}, volume={24}, number={16}, pages={5219}, year={2024}, publisher={MDPI} }
Battery power is crucial for wearable devices as it ensures continuous operation, which is critical for real-time health monitoring and emergency alerts. One solution for long-lasting monitoring is energy harvesting systems. Ensuring a consistent energy supply from variable sources for reliable device performance is a major challenge. Additionally, integrating energy harvesting components without compromising the wearability, comfort, and esthetic design of healthcare devices presents a significant bottleneck. Here, we show that with a meticulous design using small and highly efficient photovoltaic (PV) panels, compact thermoelectric (TEG) modules, and two ultra-low-power BQ25504 DC-DC boost converters, the battery life can increase from 9.31 h to over 18 h. The parallel connection of boost converters at two points of the output allows both energy sources to individually achieve maximum power point tracking (MPPT) during battery charging. We found that under specific conditions such as facing the sun for more than two hours, the device became self-powered. Our results demonstrate the long-term and stable performance of the sensor node with an efficiency of 96%. Given the high-power density of solar cells outdoors, a combination of PV and TEG energy can harvest energy quickly and sufficiently from sunlight and body heat. The small form factor of the harvesting system and the environmental conditions of particular occupations such as the oil and gas industry make it suitable for health monitoring wearables worn on the head, face, or wrist region, targeting outdoor workers.
@article{haghi2024evolution, title={Evolution of Bed-Based Sensor Technology in Unobtrusive Sleep Monitoring: A Review}, author={Haghi, Mostafa and Gaiduk, Maksym and Stoffers, Marvin and TaheriNejad, Nima and Penzel, Thomas and Madrid, Natividad Mart{\'\i}nez and Seepold, Ralf}, journal={IEEE Sensors Journal}, year={2024}, publisher={IEEE} }
With the emergence of new sensor technologies, such as fiber optic sensors (FOSs), compared to traditional mechanical sensors, unobtrusive sleep monitoring has been a research focus for decades. This work aims to provide a guide to current bed-based sensor technologies with diverse applications in various settings. We conducted a retrospective literature review, summarizing the state-of-the-art research over the past decade on non-contact bed-based sensor technology in sleep monitoring. We developed a three-category terminology: unobtrusive sensor technology, application, and subject. A total of 263 unique articles were acquired from three databases and screened for relevance, resulting in 21 papers selected for in-depth analysis. The findings revealed eight types of sensors: six mechanical sensors (pressure, accelerometer, piezoelectric, load cell, electromechanical film (EMFI), and hydraulic) and two FOSs (fiber Bragg grating and microbend FOS) that are integrated with or positioned under the bed at three levels of unobtrusiveness. We identified 15 parameters, with heart rate (14) and respiratory rate (13) being the most frequently measured. These parameters are generally categorized into three applications: disease-related diagnosis (18), general sleep analysis (9), and general well-being (11). The results indicated that sleep apnea (5) and insomnia (2) were the most frequently detected sleep disorders. Additionally, 59.1% (13) of the systems were tested in a lab environment, with only one undergoing clinical trials. In summary, there is a clear lack of convincing proof of the systems’ effectiveness in continuous in-home sleep monitoring.
@misc{traunmuller2024wearablehealthcaredevicesmonitoring, title={Wearable Healthcare Devices for Monitoring Stress and Attention Level in Workplace Environments}, author={Peter Traunmuller and Anice Jahanjoo and Soheil Khooyooz and Amin Aminifar and Nima TaheriNejad}, year={2024}, eprint={2406.05813}, archivePrefix={arXiv}, primaryClass={cs.HC}, url={https://arxiv.org/abs/2406.05813}, }
Battery power is crucial for wearable devices in physiological parameters measurements as it ensures continuous operation and reliability, critical for real-time health monitoring and emergency alerts. One of the solutions used to supply low-power wearable medical devices is energy harvesting systems. In this work, a hybrid photovoltaic-thermoelectric (PV-TEG) energy harvesting system is proposed. It utilizes small and highly efficient PV panels, compact TEG modules, and two ultra-low-power BQ25504 DC-DC boost converters for enabling efficient operating of energy harvesting system. Given the high-power density of solar cells in outdoors, a combination of PV and TEG energy is utilized to quickly and sufficiently harvest energy from sunlight and body heat. The boost converters are parallel connected at two points of the output, allowing both energy sources to individually achieve maximum power point tracking (MPPT) during battery charging and loading. The system is designed to be compatible with the wearables for health monitoring in the head, face, or wrist region, targeting people engaged in activities outdoors, such as workers in the oil and gas industry. The node is designed to include a photoplethysmogram (PPG), an accelerometer, and a low-power microcontroller, with an average power consumption of approximately 106.29 mW. The power source for the node is a rechargeable lithium-polymer battery, though with a limited lifespan (9.31 hours). With the new PV/TEG energy harvesting system, the battery life increased to more than 18 hours. Under particular condition, i.e., facing the sun for more than two hours, the device turned into a self-powered wearable device. The system was experimentally tested outdoors with 25 °C temperature and 1000 W/m2 irradiance. Experimental results demonstrated the long-term and stable performance of the sensor node with an efficiency of 96%.
@article{202403.1487, doi = {10.20944/preprints202403.1487.v1}, url = {https://doi.org/10.20944/preprints202403.1487.v1}, year = 2024, month = {March}, publisher = {Preprints}, author = {Zahra Tohidinejad and Saeed Danyali and Majid Valizadeh and Ralf Seepold and Nima TaheriNejad and Mostafa Haghi}, title = {Designing an Efficient Hybrid Energy Harvesting System Compatible with Head/Wrist-Worn Medical Wearable Devices}, journal = {Preprints} }
Wearable devices have revolutionized healthcare monitoring, allowing us to track physiological conditions without disrupting daily routines. Whereas monitoring physical health and physical activities have been widely studied, their application and impact on mental health are significantly understudied. This work reviews the state-of-the-art, focusing on stress and concentration levels. These two can play an important role in workplace humanization. For instance, they can guide breaks in high-pressure workplaces, indicating when and how long to take. Those are important to avoid overwork and burn-out, harming employees and employers. To this end, it is necessary to study which sensors can accurately determine stress and attention levels, considering that they should not interfere with their activities and be comfortable to wear. From the software point of view, it is helpful to know the capabilities and performance of various algorithms, especially for uncontrolled workplace environments. This work aims to research, review, and compare commercially available non-intrusive measurement devices, which can be worn during the day and possibly integrated with healthcare systems for stress and concentration assessment. We analyze the performance of various algorithms used for stress and concentration level assessment and discuss future paths for reliable detection of these two parameters.
@article{seiler2024SSAxIMC, title={Efficient Image Processing via Memristive-based Approximate In-Memory Computing}, author={Seiler, Fabian and TaheriNejad, Nima}, journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems}, year={2024}, volume={}, number={}, pages={1-12}, keywords={Approximate;Memristor;In-Memory Computing;IMPLY;Image Processing}, doi={10.1109/TCAD.2024.3438113} }
Image processing algorithms continue to demand higher performance from computers. However, computer performance is not improving at the same rate as before. In response to the current challenges in enhancing computing performance, a wave of new technologies and computing paradigms is surfacing. Among these, memristors stand out as one of the most promising components due to their technological prospects and low power consumption. With efficient data storage capabilities and their ability to directly perform logical operations within the memory, they are well-suited for In-Memory Computation (IMC). Approximate computing emerges as another promising paradigm, offering improved performance metrics, notably speed. The trade-off for this gain is the reduction of accuracy. In this paper, we are using the stateful logic Material Implication (IMPLY) in the semi-serial topology and combine both paradigms to further enhance the computational performance. We present three novel approximated adders that drastically improve speed and energy consumption with a Normalized Mean Error Distance (NMED) lower than 0.02 for most scenarios. We evaluated partially approximated Ripple Carry Adder (RCA) at circuit-level and compared them to the State-of-the-Art (SoA). The proposed adders are applied in different image processing applications and the quality metrics are calculated. While maintaining acceptable quality, our approach achieves significant energy savings of 6%-38% and reduces the delay (number of computation cycles) by 5%-35%, demonstrating notable efficiency compared to exact calculations.
@ARTICLE{seiler2024nocarry, author={Seiler, Fabian and TaheriNejad, Nima}, journal={IEEE Transactions on Circuits and Systems I: Regular Papers}, title={Accelerated Image Processing Through IMPLY-Based NoCarry Approximated Adders}, year={2024}, volume={71}, number={11}, pages={5141-5154}, keywords={Memristors;Adders;Topology;Logic;Measurement;Image processing;Energy efficiency;Approximate;memristor;in-memory computing;IMPLY;energy efficiency;image processing}, doi={10.1109/TCSI.2024.3426926}}
As the demand for computational power increases drastically, traditional solutions to address those needs struggle to keep up. Consequently, there has been a proliferation of alternative computing paradigms aimed at tackling this disparity. Approximate Computing (AxC) has emerged as a modern way of improving speed, area efficiency, and energy consumption in error-resilient applications such as image processing or machine learning. The trade-off for these enhancements is the loss in accuracy. From a technology point of view, memristors have garnered significant attention due to their low power consumption and inherent non-volatility that makes them suitable for In-Memory Computation (IMC). Another computing paradigm that has risen to tackle the aforementioned disparity between the demand growth and performance improvement. In this work, we leverage a memristive stateful in-memory logic, namely Material Implication (IMPLY). We investigate advanced adder topologies within the context of AxC, aiming to combine the strengths of both of these novel computing paradigms. We present two approximated algorithms for each IMPLY based adder topology. When embedded in an Ripple Carry Adder (RCA), they reduce the number of steps by 6%−54% and the energy consumption by 7%−54% compared to the corresponding exact full adders. We compare our work to State-of-the-Art (SoA) approximations at circuit-level, which improves the speed and energy efficiency by up to 72% and 34% , while lowering the Normalized Median Error Distance (NMED) by up to 81% . We evaluate our adders in four common image processing applications, for which we introduce two new test datasets as well. When applied to image processing, our proposed adders can reduce the number of steps by up to 60% and the energy consumption by up to 57% , while also improving the quality metrics over the SoA in most cases.
@article{16732675, title = {In-Memory Computing: Global Energy Consumption, Carbon Footprint, Technology, and Products Status Quo}, booktitle = {2024 IEEE Nano Conference}, pages = {1-6}, year = {2024}, author = {Nima TaheriNejad}, keywords = {In-Memory Computing, Memory Technology, Global Impact, Energy Consumption, Carbon Emission, Indus- trial Products} }
In this paper, we highlight and quantify the impor- tance and potential role of In-Memory Computation (IMC) and memory technologies in the future of humans’ global footprint. To this end, we calculate the estimated energy consumption and carbon emission associated with the data movement inside computing systems and put them in perspective using tangible examples. Next, we review various memory technologies as well as their advantages and disadvantages (especially regarding their energy consumption), for usage in computing systems as memory and computing elements. We calculate what their impact is and what would be the potential savings of migrating towards emerging memory technologies. We discuss some of the challenges these emerging memory technologies face, before presenting the highlights of the IMC products on or near the market. This paper aims at providing an insight on the impact of IMC and memory technology on the society at large and clarify the importance of working on IMC and emerging memory technologies to lower the power consumption and overall footprint of computing systems. The status of IMC products show that while moving in the right direction, there is a substantial body of work to be done. We hope this will help engineers to better grasp the extent of the impact they can produce and motivate them further in the pursuit of better computing systems.
@INPROCEEDINGS{ShakibISV2024, author={Shakibhamedan, Salar and Aminifar, Amin and Vassallo, Luke and TaheriNejad, Nima}, booktitle={2024 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)}, title={Harnessing Approximate Computing for Machine Learning}, year={2024}, volume={}, number={}, pages={1-6}, }
This paper explores the integration and application of Approximate Computing (AxC) approaches to Machine Learning (ML), especially Deep Learning (DL) models. We focus on four principal techniques—quantization, approximate multiplication, approximate in-memory computing, and input-dependent AxC . We demonstrate how each contributes to reducing the energy demands of current Artificial Intelligence (AI) systems, while maintaining acceptable levels of computational accuracy. These techniques may be deployed on software or hardware platforms. Quantization and input-dependent techniques can be implemented through software on general-purpose systems, enhancing flexibility and ease of deployment. Approximate multiplier and in-memory computing require specialized hardware integration, e.g., as custom System-on-Chip (SoC) or Systemin-Package (SiP) solutions. We also discuss the crucial aspect of reliability, emphasizing robust design and error resilience to ensure the operational integrity of AI applications. By thoroughly examining these AxC techniques, the paper discusses an approach to designing energy-efficient and reliable AI accelerators, especially for SoC/SiP systems, providing essential support for use cases such as mobile and edge devices.
@inproceedings{Khooyooz2024novel, title={A Novel Machine-Learning-Based Noise Detection Method for Photoplethysmography Signals}, author={Khooyooz, Soheil and Jahanjoo, Anice and Aminifar, Amin and TaheriNejad, Nima}, booktitle={46th Annual International Conference of the IEEE Engineering in Medicine & Biology Society (EMBC)}, year={2024}, pages={}, organization={IEEE} }
Wearable devices are widespread for continuous health monitoring; capturing various physiological parameters for remote health monitoring and early detection of health issues. These devices are susceptible to interference such as Motion Artifacts (MA) and Baseline Wanders (BW). Mitigating potential false alarms due to those artifacts is an important challenge in wearable healthcare. To tackle this challenge, it is crucial to first identify noise in the signals recorded by wearable systems. Most of the conventional methods rely on reference data like accelerometer data to detect noise in Photoplethysmogram (PPG) signals. This study proposes a Machine Learning (ML)-based approach to distinguish between clean and corrupted segments in PPG signals without relying on other sensors’ data. Binary and three-class classification on clean, MA-, and BW-corrupted signals produce promising F1-scores from 89.3% to 99.4%. Index Terms—earable devices; health monitoring; photo- plethysmography; noise detection; motion artifact; baseline wan- derearable devices; health monitoring; photoplethysmography; noise detection; motion artifact; baseline wanderW
@inproceedings{Bieg2024Neurophysiological, title={Neurophysiological Data Collection at the Digital Workplace}, author={Till Bieg and Cornelia Gerdenitsch and Philip Schörpf and Anice Jahanjoo and Nima Taherinejad}, year={2024}, address={Vienna, Austria}, organization={NeuroIS Retreat}} }
Interest in the use of neurophysiological instruments for real-world studies in the workplace is increasing, also intensified by the simultaneously growing use of various commercial self-tracking technologies. However, the application of neurophysiological tools for real-world workplace research is associated with challenges - an aspect that has received little attention in previous research. This article outlines the key challenges encountered when applying neurophysiological measurements in the workplace, drawing on insights gained in an interdisciplinary research project on digital workplaces. We identify challenges along four main themes: technical tool requirements, data processing and interpretation, tool interaction, and organizational collaboration. Additionally, we discuss how these challenges were addressed within our case. As a contribution, this article offers important considerations and recommendations for the effective application of neurophysiological tools in real-world workplace research.
@article{DAMSGAARD2024103114, title = {Adaptive approximate computing in edge AI and IoT applications: A review}, journal = {Journal of Systems Architecture}, volume = {150}, pages = {103114}, year = {2024}, issn = {1383-7621}, doi = {https://doi.org/10.1016/j.sysarc.2024.103114}, url = {https://www.sciencedirect.com/science/article/pii/S1383762124000511}, author = {Hans Jakob Damsgaard and Antoine Grenier and Dewant Katare and Zain Taufique and Salar Shakibhamedan and Tiago Troccoli and Georgios Chatzitsompanis and Anil Kanduri and Aleksandr Ometov and Aaron Yi Ding and Nima Taherinejad and Georgios Karakonstantis and Roger Woods and Jari Nurmi}, keywords = {Approximate computing, Autonomous driving, Edge computing, Positioning, Smart sensing} }
This paper presents the design and development of Signed Carry Disregard Multiplier (SCDM8), a family of signed approximate multipliers tailored for integration into Convolutional Neural Networks (CNNs). Extensive experiments were conducted on popular pre-trained CNN models, including VGG16, VGG19, ResNet101, ResNet152, MobileNetV2, InceptionV3, and ConvNeXt-T to evaluate the trade-off between accuracy and approximation. The results demonstrate that ACE-CNN outperforms other configurations, offering a favorable balance between accuracy and computational efficiency. In our experiments, when applied to VGG16, SCDM8 achieves an average reduction in power consumption of 35% with a marginal decrease in accuracy of only 1.5%. Similarly, when incorporated into ResNet152, SCDM8 yields an energy saving of 42% while sacrificing only 1.8% in accuracy. ACE-CNN provides the first approximate version of ConvNeXt which yields up to 72% energy improvement at the price of less than only 1.3% Top-1 accuracy. These results highlight the suitability of SCDM8 as an approximation method across various CNN models. Our analysis shows that the ACE-CNN outperforms state-of-the-art approaches in accuracy, energy efficiency, and computation precision for image classification tasks in CNNs. Our study investigated the resiliency of CNN models to approximate multipliers, revealing that ResNet101 demonstrated the highest resiliency with an average difference in the accuracy of 0.97%, whereas LeNet5 Inspired-CNN exhibited the lowest resiliency with an average difference of 2.92%. These findings aid in selecting energy-efficient approximate multipliers for CNN-based systems, and contribute to the development of energy-efficient deep learning systems by offering an effective approximation technique for multipliers in CNNs. The proposed SCDM8 family of approximate multipliers opens new avenues for efficient deep learning applications, enabling significant energy savings with virtually no loss in accuracy.
@ARTICLE{10457067, author={Shakibhamedan, Salar and Amirafshar, Nima and Baroughi, Ahmad Sedigh and Shahhoseini, Hadi Shahriar and Taherinejad, Nima}, journal={IEEE Transactions on Circuits and Systems I: Regular Papers}, title={ACE-CNN: Approximate Carry Disregard Multipliers for Energy-Efficient CNN-Based Image Classification}, year={2024}, volume={}, number={}, pages={1-14}, keywords={Hardware;Convolutional neural networks;Image classification;Task analysis;Energy efficiency;Delays;Computer architecture;Energy efficiency;convolutional neural network;approximate multiplier;image classification}, doi={10.1109/TCSI.2024.3369230}}
This paper presents the design and development of Signed Carry Disregard Multiplier (SCDM8), a family of signed approximate multipliers tailored for integration into Convolutional Neural Networks (CNNs). Extensive experiments were conducted on popular pre-trained CNN models, including VGG16, VGG19, ResNet101, ResNet152, MobileNetV2, InceptionV3, and ConvNeXt-T to evaluate the trade-off between accuracy and approximation. The results demonstrate that ACE-CNN outperforms other configurations, offering a favorable balance between accuracy and computational efficiency. In our experiments, when applied to VGG16, SCDM8 achieves an average reduction in power consumption of 35% with a marginal decrease in accuracy of only 1.5%. Similarly, when incorporated into ResNet152, SCDM8 yields an energy saving of 42% while sacrificing only 1.8% in accuracy. ACE-CNN provides the first approximate version of ConvNeXt which yields up to 72% energy improvement at the price of less than only 1.3% Top-1 accuracy. These results highlight the suitability of SCDM8 as an approximation method across various CNN models. Our analysis shows that the ACE-CNN outperforms state-of-the-art approaches in accuracy, energy efficiency, and computation precision for image classification tasks in CNNs. Our study investigated the resiliency of CNN models to approximate multipliers, revealing that ResNet101 demonstrated the highest resiliency with an average difference in the accuracy of 0.97%, whereas LeNet5 Inspired-CNN exhibited the lowest resiliency with an average difference of 2.92%. These findings aid in selecting energy-efficient approximate multipliers for CNN-based systems, and contribute to the development of energy-efficient deep learning systems by offering an effective approximation technique for multipliers in CNNs. The proposed SCDM8 family of approximate multipliers opens new avenues for efficient deep learning applications, enabling significant energy savings with virtually no loss in accuracy.
@ARTICLE{100010665, author={Shakibhamedan, Salar and Aminifar, Amin and Taherinejad, Nima and Jantsch, Axel}, journal={TechRxiv}, title={EASE: Energy Optimization through Adaptation – A Review of Runtime Energy-Aware Approximate Deep Learning Algorithms}, year={2024}, volume={}, number={}, pages={}, doi={10.36227/techrxiv.170723230.09169589/v1}}
This survey provides an overview of the state-of- the-art in runtime adaptive Approximate Computing (AxC) for Deep Learning (DL) algorithms, highlighting the challenges and opportunities in the field. The survey covers a broad spectrum of applications, including medical applications, computer vision, and natural language processing. Various power-constrained platforms, such as System-on-Chips (SoCs), Application Specific Integrated Circuits (ASICs), and Field Programmable Gate Ar- rays (FPGAs), are explored for their utilization in implementing runtime adaptive AxC. The survey explores various techniques, such as dynamic quantization, adaptive pruning, and low-rank approximations, offering a detailed discussion of their advantages and disadvantages. Specifically, in some surveyed research works, the runtime approximation is achieved through the utilization of machine learning algorithms, with a notable emphasis on Reinforcement Learning (RL). These approaches aim to realize runtime conditions and exploit them appropriately. By pro- viding insights into the advancements and trends in runtime adaptive AxC, this survey serves as a valuable resource for researchers and practitioners interested in this rapidly evolving area of computing. This survey conducts an in-depth investigation into the application, challenges, and scope of runtime adaptive AxC techniques, aiming to mitigate energy consumption while preserving acceptable levels of accuracy in DL models. Our primary focus lies on Convolutional Neural Networks (CNNs), with an emphasis on their application in diverse domains. In striving for comprehensiveness, the survey encompasses selected research works that extend beyond CNNs, including alternative DL models like Recurrent Neural Networks (RNNs). our scope of applications, focuses on CNNs; however, to make a comprehensive survey, we cover some surveyed research works that contain other DL models, such as RNNs. It also highlights the importance of considering specific application requirements and available resources when choosing the appropriate technique.
@INPROCEEDINGS{aminifar2024recognoise, title={RecogNoise: Machine-Learning-Based Recognition of Noisy Segments in Electrocardiogram Signals}, author={Aminifar, Amin and Khooyooz, Soheil and Jahanjoo, Anice and Shakibhamedan, Salar and TaheriNejad, Nima}, booktitle={2024 IEEE International Symposium on Circuits and Systems (ISCAS)}, year={2024}, pages={}, organization={IEEE}}
Today, wearable technology is frequently used for continuous monitoring of physiological indicators in the health- care domain. However, mobile-health and wearable devices are generally used in ambulatory settings, hence vulnerable to noise. This interferes with the accuracy of Machine Learning (ML) models running on such systems and their decision-making procedures. To address this issue, we first need to identify the presence of noise. In this paper, we propose RecogNoise to detect noisy segments in Electrocardiography (ECG) recordings using heartbeat detection algorithms and ML. We evaluate our approach based on the MIT-BIH arrhythmia database and three types of noise, i.e., Electrode Motion (EM) , Baseline Wander (BW), and Muscle Artifact (MA), with different Signal to Noise Ratios (SNRs). We show that RecogNoise can detect noisy segments with an F1-score of 86.9% and an accuracy of 88.3%
@INPROCEEDINGS{jahanjoo2024high, title={High-Accuracy Stress Detection Using Wrist-Worn PPG Sensors}, author={Jahanjoo, Anice and TaheriNejad, Nima and Aminifar, Amin}, booktitle={2024 IEEE International Symposium on Circuits and Systems (ISCAS)}, year={2024}, pages={}, organization={IEEE}}
Stress has become a prevalent issue affecting individ- uals’ physical and mental well-being. Detecting stress is the first crucial step to managing it and preventing it from causing other health issues. In this paper, we present a new method to improve the performance of detecting stress, using a comfortable to wear sensor, namely Photoplethysmography (PPG), which is embedded virtually in all smartwatches. To this end, we use PPG sensor data from the publicly available wearable stress and affect detection dataset (WESAD). Using new denoising processes, segmentation methods, and key feature extract, we achieve 95.55% accuracy in detecting stress using the Support Vector Machine (SVM) algorithm. Simplifying the process alongside improved accuracy in this paper facilitates smartphone usage as a real-time stress detection, which we plan as future work.
@INPROCEEDINGS{inproceedings, author = {Shoushtari Moghadam, Mehran and Aygun, Sercan and Riahi Alam, Mohsen and Schmidt, Jonas and Najafi, M. Hassan and Taherinejad, Nima}, booktitle={2023 ACM International Symposium on Nanoscale Architectures (NANOARCH)}, title={Accurate and Energy-Efficient Stochastic Computing with Van Der Corput Sequences}, year = {2023}, month = {10}, pages = {}, doi = {10.1145/3611315.3633265}}
In stochastic computing (SC), data is represented using random bit- streams. The efficiency and accuracy of SC systems rely heavily on the stochastic number generator (SNG), which converts data from binary to stochastic bit-streams. While previous research has shown the benefits of using low-discrepancy (LD) sequences like Sobol and Halton in the SNG, the potential of other well-known random sequences remains unexplored. This study investigates new random sequences for potential use in SC. We find that Van Der Corput (VDC) sequences hold promise as a random number generator for accurate and energy-efficient SC, exhibiting intriguing correlation properties. Our evaluation of VDC-based bit-streams includes basic SC operations (multiplication and addition) and image processing tasks like image scaling. Our experimental results demonstrate high accuracy, reduced hardware cost, and lower energy consumption compared to state-of-the-art methods.
@INPROCEEDINGS{10305490, author={Seiler, Fabian and TaheriNejad, Nima}, booktitle={2023 IEEE Nordic Circuits and Systems Conference (NorCAS)}, title={An IMPLY-based Semi-Serial Approximate In-Memristor Adder}, year={2023}, volume={}, number={}, pages={1-7}, doi={10.1109/NorCAS58970.2023.10305490}}
To alleviate the Von Neumann bottleneck, new technologies and computing paradigms have been a hot topic in research and development in recent years. Memristors offer new innovative possibilities from technological and computational points of view. They can store data well and are suitable for in In-Memory Computation (IMC) since they are able to perform logical operations in memory. Another emerging computing paradigm to reduce computing time and area consumption is approximate computing, which is used in error-resistant applications. Here, we propose a novel approximated full adder hat uses the stateful logic Material Implication (IMPLY) in a semi-serial structure. We embedd this full adder in a Ripple Carry Adder (RCA) that we then evaluate on the circuit-level. The error metrics were evaluated and compared to State-of-the-Art (SoA) IMPLY-based adders. At 8-bit our approach requires up to 29% fewer steps and up to 34% less energy compared to the exact algorithm, while the Normalized Median Error Distance (NMED) is less than 0.01 for most scenarios. The proposed adder is applied in image processing and the respective quality metrics are calculated. All of the tested approximation degrees create a satisfactory result since the Peak Signal-to-Noise Ratio (PSNR) is over 30 dB. Thanks to the proposed approach, we save more than 13.5mJ of energy in gray-scale filtering of a 684×912 8-bit image compared to the exact calculations.
@INPROCEEDINGS{10305464, author={Huemer, S. and Baroughi, A. S. and Shahhoseini, H. S. and TaheriNejad, N.}, booktitle={2023 IEEE Nordic Circuits and Systems Conference (NorCAS)}, title={Approximation-aware Task Partitioning on an Approximate-Exact MPSoC (AxE)}, year={2023}, volume={}, number={}, pages={1-7}, doi={10.1109/NorCAS58970.2023.10305464}}
As the demand for increased performance and reduced energy consumption continues to grow, Quality of Service (QoS) adjustment approaches offer an effective way to tackle those demands. One such method, approximation, has gained popularity in recent years, facilitating faster executions as well as a smaller power consumption by providing an approximated result. The areas in which these trade-offs are acceptable are numerous, but hardware-based solutions are usually domain-specific and expensive to integrate. To tackle this issue, we take a different approach, in which approximate hardware can be used (or not) in a general purpose environment and via software decisions. That is, a Multi-Processor System-on-Chip (MPSoC) that contains Central Processing Units (CPUs) that offer approximate calculations alongside the ones that offer exact calculations. However, current task partitioning algorithms do not consider the specific capabilities or requirements of such a MPSoC. This paper introduces approximation-aware partitioning algorithms using different strategies and compares the results to the State-of-the-Art (SoA). Additionally, the resulted task partitions are executed to gauge their quality compared to the SoA. Experimental results show, that the usage of an approximate CPU and approximation-aware task partitioning leads to an increased partition success rate of 21.5%. Furthermore, the execution, i.e., scheduling of the partitioned tasks until energy starvation, achieves a 3.4% extended run-time.
@ARTICLE{10235317, author={Amirafshar, Nima and Baroughi, Ahmad Sadigh and Shahhoseini, Hadi Shahriar and TaheriNejad, Nima}, journal={IEEE Transactions on Circuits and Systems I: Regular Papers}, title={Carry Disregard Approximate Multipliers}, year={2023}, volume={70}, number={12}, pages={4840-4853}, keywords={Delays;Computer architecture;Power demand;Adders;Approximate computing;Logic gates;Hardware;Energy efficiency;Approximate computing;carry disregard multiplier;power-efficient;image processing}, doi={10.1109/TCSI.2023.3306071}}
Several challenges in improving the performance of computing systems have given rise to emerging computing paradigms. One of these paradigms is approximate computing. Many applications require different levels of accuracy and are error-tolerance to a certain degree. Approximate computations can reduce the calculation complexities significantly and thus improve the performance. Here, we propose a methodology for designing approximate N-bit array multipliers based on carry disregarding. We evaluate and analyze the proposed multipliers both experimentally and theoretically. The proposed 8-bit multipliers, compared to the exact multiplier, reduce the critical path delay, power consumption, and area by 29%, 29%, and 30%, on average. Compared to the existing approximate array architectures in the literature, they have improved 14.3%, 22.8%, and 26.4%, respectively. Compared to the exact 16-bit multiplier, the proposed 16-bit multipliers have reduced the delay, power consumption, and area by 35%, 24%, and 23% on average. In an image processing application, we have also demonstrated the applicability of a wide range of proposed multipliers, which have Peak Signal-to-Noise Ratio (PSNR) and Structural Similarity Index Measure (SSIM) over 30 dB and 94%, respectively.
The analysis of motor evoked potentials (MEPs) generated by transcranial magnetic stimulation (TMS) is crucial in research and clinical medical practice. MEPs are characterized by their latency and the treatment of a single patient may require the characterization of thousands of MEPs. Given the difficulty of developing reliable and accurate algorithms, currently the assessment of MEPs is performed with visual inspection and manual annotation by a medical expert; making it a time-consuming, inaccurate, and error-prone process. In this study, we developed DELMEP, a deep learning-based algorithm to automate the estimation of MEP latency. Our algorithm resulted in a mean absolute error of about 0.5 ms and an accuracy that was practically independent of the MEP amplitude. The low computational cost of the DELMEP algorithm allows employing it in on-the-fly characterization of MEPs for brain-state-dependent and closed-loop brain stimulation protocols. Moreover, its learning ability makes it a particularly promising option for artificial-intelligence-based personalized clinical applications.
In-Memory Computing (IMC) is a promising computing paradigm to accelerate Big Data applications. It reduces the data movement between memory and processing units, and provides massive parallelism. Memristive technology is one of the promising technologies for IMC. This emerging technology, however, is still in evolution, facing practical challenges. Memristive memories are prone to soft-error while storing the data and during computations. The traditional binary encoding commonly used in memristive IMC is highly sensitive to soft-errors, which makes developing reliable memristive IMC more challenging. Stochastic Computing (SC) is a re-emerging computing paradigm that is highly robust against soft-errors as any bit flip leads to only a least significant bit error. In this work, we study SC as a solution to increase the reliability of memristive IMC. We investigate how and to what extent SC may address or improve the reliability issues of current memristive technology, and memristive IMC. We also evaluate the characteristics yielded by memristive stochastic IMC and compare them with those of the traditional reliability techniques.
@ARTICLE{10032497, author={Fatemieh, Seyed Erfan and Reshadinezhad, Mohammad Reza and TaheriNejad, Nima}, journal={IEEE Journal on Emerging and Selected Topics in Circuits and Systems}, title={Fast and Compact Serial IMPLY-Based Approximate Full Adders Applied in Image Processing}, year={2023}, volume={13}, number={1}, pages={175-188}, doi={10.1109/JETCAS.2023.3241012}}
The barriers to improving computers’ performance have led to the emergence of new computing paradigms and technologies. Among these, the memristors are of great concern. In addition to storing data, memristors can perform logical operations and are proper for In-Memory Computation (IMC). Furthermore, approximate computing is an emerging paradigm introduced to improve performance by reducing the accuracy of calculations in error-resistant applications. These two concepts are combined and presented in four serial Material Implication (IMPLY)-based approximate full adders. In addition, to the positive features of the serial method, the proposed circuits reduce the number of calculation steps by 7%–43%, and the energy consumption improves by 56%–68% compared to the existing exact full adders. The accuracy loss of proposed circuits in different simulated scenarios combining exact and approximate adders are analyzed. Four different image processing applications are applied to ensure the proper functionality of the proposed circuits. The results indicate that in most scenarios, the quality of the images is acceptable, and the Peak Signal-to-Noise Ratio (PSNR) criterion is more than 30 dB.
@ARTICLE{10041128, author={Schober, Peter and Estiri, Seyedeh Newsha and Aygun, Sercan and Jalilvand, Amir Hossein and Najafi, M. Hassan and TaheriNejad, Nima}, journal={IEEE Journal on Emerging and Selected Topics in Circuits and Systems}, title={Stochastic Computing Design and Implementation of a Sound Source Localization System}, year={2023}, volume={13}, number={1}, pages={295-311}, doi={10.1109/JETCAS.2023.3243604}}
Stochastic computing (SC) is an alternative computing paradigm that processes data in the form of uniform bit-streams. SC is fault-tolerant and can compute on small, efficient circuits. However, SC is primarily used in scientific research, and its practical implementations for end-users are rare. Digital sound source localization (SSL) is a useful signal processing technique that locates speakers using multiple microphones. SC has not been integrated into SSL in practice or theory. In this work, for the first time to the best of our knowledge, we implement an SSL algorithm in the stochastic domain and develop a functional SC-based sound source localizer. The practical part of this work shows that the proposed stochastic circuit does not depend on conventional analog-to-digital conversion and can process data in the form of pulse-width-modulated (PWM) signals. The proposed SC design consumes up to 39% less area than the conventional binary design. It can also consume less power depending on the computational accuracy, for example, 6% less power consumption for 3-bit inputs. We propose a new cross-correlation (CC) design based on the state-of-the-art Sobol bit-streams for further area and power saving. The proposed design utilizes a MUX unit for bit-stream generation. It saves the area footprint up to 64% and the power consumption up to 82% compared to the counter-based SC design of CC, which relies on a comparator for bit-stream generation. The presented stochastic circuits, are not limited to SSL and are readily applicable to other practical applications such as radar ranging, wireless location, sonar direction finding, beamforming, and sensor calibration. The project’s source code is made available for public access.
@INPROCEEDINGS{9911996, author={TaheriNejad, Nima and Shakibhamedan, Salar}, booktitle={2022 IEEE Computer Society Annual Symposium on VLSI (ISVLSI)}, title={Energy-aware Adaptive Approximate Computing for Deep Learning Applications}, year={2022}, volume={}, number={}, pages={328-328}, doi={10.1109/ISVLSI54635.2022.00072}}
Application that use deep learning incur a substantial amount of energy consumption. Reducing this energy footprint is important, especially for applications such as Internet of Things (IoT) Embedded Systems (ESs), where resources are scarce. Here, we present computational self-awareness as a promising solution for intelligently adapt machine learning algorithms at runtime to reduce their energy consumption. In particular, we focus on approximation as a key enabler knob for such adaptivity. We show that the benefits of such an approach can be up to 2.5 × energy savings.
@INPROCEEDINGS{9996733, author={Baroughi, A. S. and Huemer, S. and Shahhoseini, H. S. and TaheriNejad, N.}, booktitle={2022 25th Euromicro Conference on Digital System Design (DSD)}, title={AxE: An Approximate-Exact Multi-Processor System-on-Chip Platform}, year={2022}, volume={}, number={}, pages={60-66}, doi={10.1109/DSD57027.2022.00018}}
Due to the ever-increasing complexity of computing tasks, emerging computing paradigms that increase efficiency, such as approximate computing, are gaining momentum. However, so far, the majority of proposed solutions for hardware-based approximation have been application-specific and/or limited to smaller units of the computing system and require engineering effort for integration into the rest of the system. In this paper, we present Approximate and Exact Multi-Processor system-on-chip (AxE) platform. AxE is the first general-purpose approximate Multi-Processor System-on-Chip (MPSoC). AxE is a heterogeneous RISC-V platform with exact and approximate cores that allows exploring hardware approximation for any application and using software instructions. Using the full capacity of an entire MPSoC, especially a heterogeneous one such as AxE, is an increasingly challenging problem. Therefore, we also propose a task mapping method for running exact and approximable applications on AxE. That is a mixed task mapping, in which applications are viewed as a set of tasks that can be run independently on different processors with different capabilities (exact or approximate). We evaluated our proposed method on AxE and reached a 32% average execution speed-up and 21% energy consumption saving with an average of 99.3% accuracy on three mixed workloads. We also ran a sample image processing application, namely gray-scale filter, on AxE and will present its results.
@INPROCEEDINGS{9996856, author={Amirafshar, N. and Baroughi, A. S. and Shahhoseini, H. S. and TaheriNejad, N.}, booktitle={2022 25th Euromicro Conference on Digital System Design (DSD)}, title={An Approximate Carry Disregard Multiplier with Improved Mean Relative Error Distance and Probability of Correctness}, year={2022}, volume={}, number={}, pages={46-52}, doi={10.1109/DSD57027.2022.00016}}
Nowadays, a wide range of applications can tolerate certain computational errors. Hence, approximate computing has become one of the most attractive topics in computer architecture. Reducing accuracy in computations in a premeditated and appropriate manner reduces architectural complexities, and as a result, performance, power consumption, and area can improve significantly. This paper proposes a novel approximate multiplier design. The proposed design has been implemented using 45 nm CMOS technology and has been extensively evaluated. Compared to existing approximate architectures, the proposed approximate multiplier has higher accuracy. It also achieves better results in critical path delay, power consumption, and area up to 47.54 %, 75.24%, and 92.49%, respectively. Compared to the precise multipliers, our evaluations show that the critical path delay, power consumption, and area have been improved by 39%, 18%, and 6 %, respectively.
Mobile health technology is a rapidly growing field with numerous promises to make substantial impact in our lives. To open this special issue, which brings to you many exciting research results in mobile health technology, we discuss two important aspects of this technology. One is how they can be integrated in our daily lives as important care devices, especially during periods such as the more and more frequent pandemics around the world. Having discussed their advantages, we calculate their estimated footprint in the energy consumption and dioxide carbon they produce globally. With that we raise awareness and invite researchers to work on reducing their energy consumption to ensure that they maintain a low footprint even if their numbers explodes in the near future. We finish this article with a brief teaser of the papers published in this special issue and wish you a good read.
@INPROCEEDINGS{9841969, author={Rahimi Disfani, Roya and TaheriNejad, Nima and Valinataj, Mojtaba}, booktitle={2022 20th IEEE Interregional NEWCAS Conference (NEWCAS)}, title={Operational Conditions Analysis for Memristive Stateful Logics - A Study on IMPLY and TMSL}, year={2022}, volume={}, number={}, pages={480-484}, doi={10.1109/NEWCAS52662.2022.9841969}}
Memristive technology is a promising emerging technology, which can be used as storage and processing element. Memristors are non-volatile, compact, fast, and energy efficient. They can be used in logic designs to perform basic logical operations in memory and thus avoid the von-Neumann bottleneck. Among various possibilities, stateful logics stand out, since they can process the data with minimum data movement. Material Implication (IMPLY), Memristor-Aided Logic (MAGIC), Three Memristors Stateful Logic (TMSL) and Single-cycle Inmemristor XOR (SIXOR) are the some of the main examples of memristor-based stateful logics. Given the maturing state of the memristive technology, in this paper, we evaluate the effect of non-idealities of this technology, especially the device variations, on the operations of stateful logic. In particular, we focus on two well-received logics, namely IMPLY and TMSL, and analyze the effect of various operational conditions and device variations on the functionality of these gates.
@ARTICLE{9741240, author={Riahi Alam, Mohsen and Najafi, M. Hassan and Taherinejad, Nima and Imani, Mohsen and Gottumukkala, Raju}, journal={IEEE Transactions on Circuits and Systems II: Express Briefs}, title={Stochastic Computing in Beyond Von-Neumann Era: Processing Bit-Streams in Memristive Memory}, year={2022}, volume={69}, number={5}, pages={2423-2427}, doi={10.1109/TCSII.2022.3161995}}
Stochastic Computing (SC) is an alternative computing paradigm that promises high robustness to noise and outstanding area- and power-efficiency compared to traditional binary. It also enables the design of fully parallel and scalable computations. Despite its advantage, SC suffers from long latency and high energy consumption compared to conventional binary computing, especially with current CMOS technology. The cost of conversion between binary and stochastic representation takes a significant cost with CMOS circuits. In-Memory Computation (IMC) is introduced to accelerate Big Data applications by removing the data movement between memory and processing units, and by providing massive parallelism. In this work, we explore the efforts in employing IMC for fast and energy-efficient SC system design. We specially focus on memristors as an emerging technology that promises efficient memory and computation beyond CMOS. We discuss the potentials and challenges for realizing efficient SC systems in memory.
Attention deficit hyperactivity disorder (ADHD) is a mental disorder most notable in children. The disease may affect the ability to focus and cause a physical and mental restlessness and risky behavior. Recommended treatment consists of stimulant administration and behavioral therapy. However, medicating children is problematic since there are indications that brain development is affected by ADHD medication agents. Therefore, behavioral therapy is the preferred approach in ADHD treatment for children. In order to monitor and optimize the success of such behavioral therapies, neuro-feedback methods can be used. The most notable technology used in such methods is Electroencephalography (EEG). In this article, an overview of the pathology of ADHD, EEG and its usage as a diagnostic and therapeutic tool in the context of ADHD is given. Based on that knowledge, novel EEG measurement modes, new development principles, and system on chip implementations are presented and discussed.
@INPROCEEDINGS{9841969, author={Rahimi Disfani, Roya and TaheriNejad, Nima and Valinataj, Mojtaba}, booktitle={2022 20th IEEE Interregional NEWCAS Conference (NEWCAS)}, title={Operational Conditions Analysis for Memristive Stateful Logics - A Study on IMPLY and TMSL}, year={2022}, volume={}, number={}, pages={480-484}, doi={10.1109/NEWCAS52662.2022.9841969}}
Memristive technology is a promising emerging technology, which can be used as storage and processing element. Memristors are non-volatile, compact, fast, and energy efficient. They can be used in logic designs to perform basic logical operations in memory and thus avoid the von-Neumann bottleneck. Among various possibilities, stateful logics stand out, since they can process the data with minimum data movement. Material Implication (IMPLY), Memristor-Aided Logic (MAGIC), Three Memristors Stateful Logic (TMSL) and Single-cycle Inmemristor XOR (SIXOR) are the some of the main examples of memristor-based stateful logics. Given the maturing state of the memristive technology, in this paper, we evaluate the effect of non-idealities of this technology, especially the device variations, on the operations of stateful logic. In particular, we focus on two well-received logics, namely IMPLY and TMSL, and analyze the effect of various operational conditions and device variations on the functionality of these gates.
@ARTICLE{9638717, author={Hadizadeh Hafshejani, Ehsan and TaheriNejad, Nima and Rabbani, Rozhan and Azizi, Zohreh and Mohin, Shahabeddin and Fotowat-Ahmady, Ali and Mirabbasi, Shahriar}, journal={IEEE Sensors Journal}, title={Self-Aware Data Processing for Power Saving in Resource-Constrained IoT Cyber-Physical Systems}, year={2022}, volume={22}, number={4}, pages={3648-3659}, doi={10.1109/JSEN.2021.3133405}}
Given the emergence of the Internet of Things (IoT) Cyber-Physical Systems (CPSs) and their omnipresence, reducing their power consumption is among the major design priorities. To reduce the power consumption of such systems, we propose the use of a signal-dependent sampling method in a bottom-up fashion, which can lead to up to a 94% reduction in the overall system power with negligible or no loss in performance. Moreover, the proposed technique provides further flexibility for self-aware CPSs to dynamically adjust the number of data samples that are needed for processing (and consequently reduce the power consumption) based on the application at hand and the desired trade-off between accuracy and power consumption. To show the merits of the proposed approach, we also present case studies in the context of an Electrocardiography (ECG) monitoring system as well as a greenhouse (temperature and relative humidity) monitoring system. We also discuss the trade-offs among the system configuration parameters, power consumption, and performance (accuracy). We show that the proposed method has a negligible overhead, which facilitates the real-time operation of the IoT CPS while achieving significant power savings (up to 94%). Even though we study the effects of using this method for two representative applications, the technique is general and can offer similar improvements for a wide range of CPSs and resource-constrained IoT systems.
@INPROCEEDINGS{9556427, author={Bauer, Friedrich and Braun, Felix and Hauer, Daniel and Jantsch, Axel and Kobelrausch, Markus D. and Mosbeck, Martin and TaheriNejad, Nima and Vogt, Philipp-Sebastian}, booktitle={2021 31st International Conference on Field-Programmable Logic and Applications (FPL)}, title={MELODI: An Online Platform for Mass Education of Digital Design - HDL to Remote FPGA}, year={2021}, volume={}, number={}, pages={399-399}, doi={10.1109/FPL53798.2021.00084}}
Learning and teaching digital hardware design involves significant efforts on both sides, teachers and students. Hardware Description Languages (HDLs) simplify the design process, where the created designs can be tested either in simulators or using real hardware. For the latter, Field Programmable Gate Arrays (FPGAs) play a crucial role in facilitating and speeding up the prototyping process. Mass E-Learning of design, test, and prototyping Digital hardware (MELODI) is developed for scalable teaching of HDL to a large number of students. The primary goals are to a) minimize the requirements for students and b) reduce the resources required at the university. MELODI provides a complete HDL workflow, including real remote hardware prototyping on FPGAs without the need for any tool at the students’ side.
@ARTICLE{9447982, author={Schober, Peter and Najafi, M. Hassan and TaheriNejad, Nima}, journal={IEEE Transactions on Computers}, title={High-Accuracy Multiply-Accumulate (MAC) Technique for Unary Stochastic Computing}, year={2022}, volume={71}, number={6}, pages={1425-1439}, doi={10.1109/TC.2021.3087027}}
Multiply-accumulate (MAC) operations are common in data processing and machine learning but costly in terms of hardware usage. Stochastic Computing (SC) is a promising approach for low-cost hardware design of complex arithmetic operations such as multiplication. Computing with deterministic unary bit-streams (defined as bit-streams with all 1s grouped at the beginning or end of a bit-stream) has been recently suggested to improve the accuracy of SC. Conventionally, SC designs use multiplexer (mux) units or OR gates to accumulate data in the stochastic domain. MUX-based addition suffers from scaling of data and OR -based addition from inaccuracy. This work proposes a novel technique for MAC operation on unary bit-streams that allows exact, non-scaled addition of multiplication results. By introducing a relative delay between the products, we control correlation between bit-streams and eliminate OR -based addition error. We evaluate the accuracy of the proposed technique compared to the state-of-the-art MAC designs. After quantization, the proposed technique demonstrates at least 37 percent and up to 100 percent decrease of the mean absolute error for uniformly distributed random input values, compared to traditional OR -based MAC designs. Further, we demonstrate that the proposed technique is practical and evaluate area, power and energy of three possible implementations.
@ARTICLE{9425507, author={Mozelli, Amid and TaheriNejad, Nima and Jantsch, Axel}, journal={IEEE Design & Test}, title={A Study on Confidence: An Unsupervised Multiagent Machine Learning Experiment}, year={2022}, volume={39}, number={3}, pages={54-62}, doi={10.1109/MDAT.2021.3078341}}
Uncovering the true nature of confidence is a complex problem. This article proposes a step in this direction, designing a socially inspired experiment to further investigate the nature of confidence in the context of self-learning.
@ARTICLE{9425507, author={Mozelli, Amid and TaheriNejad, Nima and Jantsch, Axel}, journal={IEEE Design & Test}, title={A Study on Confidence: An Unsupervised Multiagent Machine Learning Experiment}, year={2022}, volume={39}, number={3}, pages={54-62}, doi={10.1109/MDAT.2021.3078341}}
Uncovering the true nature of confidence is a complex problem. This article proposes a step in this direction, designing a socially inspired experiment to further investigate the nature of confidence in the context of self-learning.
Emerging electronic devices are promising to drive the performance of computer systems to new heights, against the notable saturation in traditional transistor-based architectures. Among them, resistive RAM -- or ReRAM -- has attracted a lot of attention among scientists since its practical realization was reported in 2008 and numerous devices, circuits and systems, and also models have been described in the literature. However, behavioral models fail to reproduce device parameter variations and the drift of device state in the absence of a stimulus. This shortcoming substantially reduces the practical relevance of systems and circuits designed with existing models. The work at hand deals with the development of a behavioral model that integrates device parameter variation and state drift based on data collected from our measurements of real devices. As we show in this paper, BELIEVER model enables engineers to conduct more reliable and meaningful design and simulations of circuits and systems that use ReRAMs.
@ARTICLE{9382267, author={TaheriNejad, Nima}, journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems}, title={SIXOR: Single-Cycle In-Memristor XOR}, year={2021}, volume={29}, number={5}, pages={925-935}, doi={10.1109/TVLSI.2021.3062293}}
With the fast approach of the end of silicon scaling and existing problems, such as the Von-Neumann bottleneck, alternative computing paradigms are in demand. In-memory computation (IMC) is one of the most promising solutions, and memristive technology is one of the best platforms for that purpose. Many logic families have been proposed to enable memristive IMC, among which stateful logic family stands out due to its minimal power consumption and simplicity. In this work, to complement existing works, we propose the first stateful crossbar-compatible XOR atomic logic operation that requires only one cycle for its completion, which is two times faster than the current minimum required time for performing XOR (which is two cycles) using other atomic operations in comparable memristive stateful logic families. We show that, in an example case of an adder, by taking advantage of the proposed single-cycle in-memristor XOR (SIXOR), up to 4.5× speedup can be achieved compared to other SoA stateful adders. The gained speed-up scales up in more complex systems and calculations that use XOR.
@INPROCEEDINGS{9401660, author={Ossimitz, Christoph and TaheriNejad, Nima}, booktitle={2021 IEEE International Symposium on Circuits and Systems (ISCAS)}, title={A Fast Line Segment Detector Using Approximate Computing}, year={2021}, volume={}, number={}, pages={1-5}, doi={10.1109/ISCAS51556.2021.9401660}}
The Line Segment Detector (LSD) algorithm is an underlying step of many image processing systems. Hence, its performance has a significant on the upper layers using the detected line segment for various purposed. In this paper, we propose a fast LSD algorithm. This method approximates several floating point operations, including the logarithmic Gamma function, by a series of lookup table searches. Due to the simplicity of such approximation (lookup table search) compared to the naïve implementation (calculation-based), this method is considerably faster. The proposed method has implications on reduction of the necessary efforts to implement and enhancement of the performance of the LSD hardware accelerators. Our experiments show that the proposed method reduces the run-time of the algorithm by 13% on average, with no considerable quality loss in the detection results. This improvement further propagates through other image processing algorithms using LSD.
Currently, memristor devices suffer from variability between devices and from cycle to cycle. In this work, we study the impact of device variations on memristive Material Implication (IMPLY). New constraints for different parameters and variables are analytically derived and compared to extensive simulation results, covering single gate and 1T1R crossbar structures. We show that a static analysis based on switching conditions is not sufficient for an overall assessment of robustness against device variability. Furthermore, we outline parameter ranges within which the IMPLY gate is predicted to produce correct output values. Our study shows that threshold voltage is the most critical parameter. This work helps scientists and engineers to understand the pitfalls of designing reliable IMPLY-based calculation units better and design them with more ease. Moreover, these analyses can be used to determine whether a certain memristor technology is suitable for implementation of IMPLY-based circuits and systems.
@ARTICLE{9321508, author={Alam, Mohsen Riahi and Najafi, M. Hassan and TaheriNejad, Nima}, journal={IEEE Design & Test}, title={Exact Stochastic Computing Multiplication in Memristive Memory}, year={2021}, volume={38}, number={6}, pages={36-43}, doi={10.1109/MDAT.2021.3051296}}
Editor’s notes: This article focuses on memristive nano-devices and their use for stochastic computing (SC). It demonstrates how to convert numbers between binary and stochastic domains and how to perform multiplications using inmemory computations by the memristive logic family “MAGIC.” In contrast to earlier works on memristive SC, the authors do not harness the intrinsic stochasticity of memristive devices but rather create deterministic SNs using well-defined operations.— Weikang Qian, Shanghai Jiao Tong University
Smartphone users require high Battery Cycle Life (BCL) and high Quality of Experience (QoE) during their usage. These two objectives can be conflicting based on the user preference at run-time. Finding the best trade-off between QoE and BCL requires an intelligent resource management approach that considers and learns user preference at run-time. Current approaches focus on one of these two objectives and neglect the other, limiting their efficiency in meeting users’ needs. In this article, we present UBAR, User- and Battery-aware Resource management, which considers dynamic workload, user preference, and user plug-in/out pattern at run-time to provide a suitable trade-off between BCL and QoE. UBAR personalizes this trade-off by learning the user’s habits and using that to satisfy QoE, while considering battery temperature and State of Charge (SOC) pattern to maximize BCL. The evaluation results show that UBAR achieves 10% to 40% improvement compared to the existing state-of-the-art approaches.
Researchers commonly use continuous noninvasive blood-pressure measurement (cNIBP) based on photoplethysmography (PPG) signals to monitor blood pressure conveniently. However, the performance of the system still needs to be improved. Accuracy and precision in blood-pressure measurements are critical factors in diagnosing and managing patients’ health conditions. Therefore, we propose a convolutional long short-term memory neural network (CNN–LSTM) with grid search ability, which provides a robust blood-pressure estimation system by extracting meaningful information from PPG signals and reducing the complexity of hyperparameter optimization in the proposed model. The multiparameter intelligent monitoring for intensive care III (MIMIC III) dataset obtained PPG and arterial-blood-pressure (ABP) signals. We obtained 75,226 signal segments, with 60,180 signals allocated for training data, 12,030 signals allocated for the validation set, and 15,045 signals allocated for the test data. During training, we applied five-fold cross-validation with a grid-search method to select the best model and determine the optimal hyperparameter settings. The optimized configuration of the CNN–LSTM layers consisted of five convolutional layers, one long short-term memory (LSTM) layer, and two fully connected layers for blood-pressure estimation. This study successfully achieved good accuracy in assessing both systolic blood pressure (SBP) and diastolic blood pressure (DBP) by calculating the standard deviation (SD) and the mean absolute error (MAE), resulting in values of 7.89 ± 3.79 and 5.34 ± 2.89 mmHg, respectively. The optimal configuration of the CNN–LSTM provided satisfactory performance according to the standards set by the British Hypertension Society (BHS), the Association for the Advancement of Medical Instrumentation (AAMI), and the Institute of Electrical and Electronics Engineers (IEEE) for blood-pressure monitoring devices.
@ARTICLE{9194997, author={Hafshejani, Ehsan Hadizadeh and Elmi, Mohammad and TaheriNejad, Nima and Fotowat-Ahmady, Ali and Mirabbasi, Shahriar}, journal={IEEE Transactions on Circuits and Systems I: Regular Papers}, title={A Low-Power Signal-Dependent Sampling Technique: Analysis, Implementation, and Applications}, year={2020}, volume={67}, number={12}, pages={4334-4347}, doi={10.1109/TCSI.2020.3021290}}
Sensors are among essential building blocks of any Cyber-Physical Systems (CPSs). Acquisition and processing of their sensory data contribute to the power consumption and computation load of the overall CPSs. For data acquisition, the conventional fixed frequency sampling in many such systems is sub-optimal since a sizable number of samples do not contain important information. In this work, we propose a Signal-Dependent Sampling (SDS) method and present its associated circuit implementation. Using the proposed SDS method, the number of retained samples is significantly reduced with little or negligible compromise in the quality of the (reconstructed) signal. The associated error and added noise are analyzed and their boundaries - which can be controlled by the user - are calculated. Our experiments show that the proposed system is able to improve power efficiency of the overall system in various applications. For example, for wireless Electrocardiography (ECG), Photoplethysmography (PPG), and Electroencephalogram (EEG) monitoring systems, the proposed approach can achieve a power saving of 81%, 76%, and 64% respectively. The proof-of-concept prototype system is implemented using TSMC 0.18μm and has a foot-print and power consumption that compare favorably with those of the state-of-the-art implementations. The method can be used in a variety of applications including wireless sensor networks, mobile and wearable devices, as well as Internet of Things (IoT) nodes.
@ARTICLE{9195878, author={TaheriNejad, Nima and Herkersdorf, Andreas and Jantsch, Axel}, journal={IEEE Design & Test}, title={Autonomous Systems, Trust, and Guarantees}, year={2022}, volume={39}, number={1}, pages={42-48}, doi={10.1109/MDAT.2020.3024145}}
Editor’s notes: Trustworthiness is key for the acceptance of autonomous systems. The authors advocate deterministic methods with hard-bounded corridors for operational parameters to guarantee dependable autonomy considering both functional and extra-functional properties. — Selma Saidi, TU Dortmund
@ARTICLE{9151960, author={Götzinger, Maximilian and Juhász, Dávid and Taherinejad, Nima and Willegger, Edwin and Tutzer, Benedikt and Liljeberg, Pasi and Jantsch, Axel and Rahmani, Amir M.}, journal={IEEE Access}, title={RoSA: A Framework for Modeling Self-Awareness in Cyber-Physical Systems}, year={2020}, volume={8}, number={}, pages={141373-141394}, doi={10.1109/ACCESS.2020.3012824}}
The role of smart and autonomous systems is becoming vital in many areas of industry and society. Expectations from such systems continuously rise and become more ambitious: long lifetime, high reliability, high performance, energy efficiency, and adaptability, particularly in the presence of changing environments. Computational self-awareness promises a comprehensive assessment of the system state for sensible and well-informed actions and resource management. Computational self-awareness concepts can be used in many applications such as automated manufacturing plants, telecommunication systems, autonomous driving, traffic control, smart grids, and wearable health monitoring systems. Developing self-aware systems from scratch for each application is the most common practice currently, but this is highly redundant, inefficient, and uneconomic. Hence, we propose a framework that supports modeling and evaluation of various self-aware concepts in hierarchical agent systems, where agents are made up of self-aware functionalities. This paper presents the Research on Self-Awareness (RoSA) framework and its design principles. In addition, self-aware functionalities abstraction, data reliability, and confidence, which are currently provided by RoSA, are described. Potential use cases of RoSA are discussed. Capabilities of the proposed framework are showcased by case studies from the fields of healthcare and industrial monitoring. We believe that RoSA is capable of serving as a common framework for self-aware modeling and applications and thus helps researchers and engineers in exploring the vast design space of hierarchical agent-based systems with computational self-awareness.
@ARTICLE{9195878, author={TaheriNejad, Nima and Herkersdorf, Andreas and Jantsch, Axel}, journal={IEEE Design & Test}, title={Autonomous Systems, Trust, and Guarantees}, year={2022}, volume={39}, number={1}, pages={42-48}, doi={10.1109/MDAT.2020.3024145}}
Editor’s notes: Trustworthiness is key for the acceptance of autonomous systems. The authors advocate deterministic methods with hard-bounded corridors for operational parameters to guarantee dependable autonomy considering both functional and extra-functional properties. — Selma Saidi, TU Dortmund
@ARTICLE{8967013, author={Radakovits, David and TaheriNejad, Nima and Cai, Mengye and Delaroche, Théophile and Mirabbasi, Shahriar}, journal={IEEE Transactions on Circuits and Systems I: Regular Papers}, title={A Memristive Multiplier Using Semi-Serial IMPLY-Based Adder}, year={2020}, volume={67}, number={5}, pages={1495-1506}, doi={10.1109/TCSI.2020.2965935}}
Memristors are among emerging technologies with many promising features, which makes them suitable not only for storage purposes but also for computations. In this work, focusing on in-memory computations, we first present our semi-serial IMPLY-based adder and perform an extensive analysis of its merits. In addition to providing a favorable balance between the number of steps and number of memristors, a key property of the presented adder is its compactness as compared to the state-ofthe-art adders. Next, using our semi-serial adder, we propose an IMPLY-based multiplier. We show that the proposed multiplier is more than 5× better than other works based on the figure of merit which gives equal weight to the number of steps (i.e., speed) and required die area. Additionally, we provide a deeper insight into IMPLY-based arithmetic units, their properties, design characteristics, and advantages or disadvantages compared to one another by proposing new figures of merit and performing comprehensive comparative analyses. This facilitates the process of design, or selection, of suitable units for the design engineers and researchers in the field.
@INPROCEEDINGS{9180743, author={Alam, Mohsen Riahi and Najafi, M. Hassan and TaheriNejad, Nima}, booktitle={2020 IEEE International Symposium on Circuits and Systems (ISCAS)}, title={Exact In-Memory Multiplication Based on Deterministic Stochastic Computing}, year={2020}, volume={}, number={}, pages={1-5}, doi={10.1109/ISCAS45731.2020.9180743}}
Memristors offer the ability to both store and process data in memory, eliminating the overhead of data transfer between memory and processing unit. For data-intensive applications, developing efficient in-memory computing methods is under investigation. Stochastic computing (SC), a paradigm offering simple execution of complex operations, has been used for reliable and efficient multiplication of data in-memory. Current SC-based in-memory methods are incapable of producing accurate results. This work, to the best of our knowledge, develops the first accurate SC-based in-memory multiplier. For logical operations, we use Memristor-Aided Logic (MAGIC), and to generate bit-streams, we propose a novel method, which takes advantage of the intrinsic properties of memristors. The proposed design improves the speed and reduces the memory usage and energy consumption compared to the State-of-the-Art (SoA) accurate in-memory fixed-point and off-memory SC multipliers.
In this article, we make the case for the new class of Self-aware Cyber-physical Systems. By bringing together the two established fields of cyber-physical systems and self-aware computing, we aim at creating systems with strongly increased yet managed autonomy, which is a main requirement for many emerging and future applications and technologies. Self-aware cyber-physical systems are situated in a physical environment and constrained in their resources, and they understand their own state and environment and, based on that understanding, are able to make decisions autonomously at runtime in a self-explanatory way. In an attempt to lay out a research agenda, we bring up and elaborate on five key challenges for future self-aware cyber-physical systems: (i) How can we build resource-sensitive yet self-aware systems? (ii) How to acknowledge situatedness and subjectivity? (iii) What are effective infrastructures for implementing self-awareness processes? (iv) How can we verify self-aware cyber-physical systems and, in particular, which guarantees can we give? (v) What novel development processes will be required to engineer self-aware cyber-physical systems? We review each of these challenges in some detail and emphasize that addressing all of them requires the system to make a comprehensive assessment of the situation and a continual introspection of its own state to sensibly balance diverse requirements, constraints, short-term and long-term objectives. Throughout, we draw on three examples of cyber-physical systems that may benefit from self-awareness: a multi-processor system-on-chip, a Mars rover, and an implanted insulin pump. These three very different systems nevertheless have similar characteristics: limited resources, complex unforeseeable environmental dynamics, high expectations on their reliability, and substantial levels of risk associated with malfunctioning. Using these examples, we discuss the potential role of self-awareness in both highly complex and rather more simple systems, and as a main conclusion we highlight the need for research on above listed topics.
@INPROCEEDINGS{9068993, author={Pollreisz, David and Nejad, Nima Taheri}, booktitle={2020 IEEE 11th Latin American Symposium on Circuits & Systems (LASCAS)}, title={Reliable Respiratory Rate Extraction using PPG}, year={2020}, volume={}, number={}, pages={1-4}, doi={10.1109/LASCAS45839.2020.9068993}}
Wearable electronics enable a new look into the health of individuals in a fashion that was never possible before. However, many reliable methods for measuring Respiratory Rate (RR) require wearing gadgets that are impractical in a normal daily life setup. On the other hand, more practical methods, which are less intrusive, are often less reliable. Extracting RR using Photoplethysmogram (PPG) signals is one of the methods in the latter group. A major challenge for this method is the movement artifact, which leads to wrong estimation of RR or failure in its calculation. In this work, we propose a new algorithm, Smart Fusion of Frequency Domain Peak (SFFDP), that outperforms existing algorithm by at least 37% improvement in terms of reliability; i.e., average error, Standard Deviation (STD), and Figure of Merit (FoM). This method does not require any signal other than PPG. Therefore, it can be used in a wide range of wearable devices, such as smart watches, without any hardware additions.
@INPROCEEDINGS{9105643, author={Shamsa, Elham and Kanduri, Anil and TaheriNejad, Nima and Pröbstl, Alma and Chakraborty, Samarjit and Rahmani, Amir M. and Liljeberg, Pasi}, booktitle={2020 33rd International Conference on VLSI Design and 2020 19th International Conference on Embedded Systems (VLSID)}, title={User-centric Resource Management for Embedded Multi-core Processors}, year={2020}, volume={}, number={}, pages={43-48}, doi={10.1109/VLSID49098.2020.00025}}
Modern battery powered Embedded Systems (ES) must provide a high performance with minimal energy consumption to enhance the user experience. However, these two are often conflicting objectives. In current ES resource management techniques, user behavior and preferences are only indirectly or not at all considered. In this paper, we present a novel user- and battery-aware resource management framework for multi-processor architectures that considers these conflicting requirements and dynamic unknown workloads at run-time to maximize user satisfaction. Proposed technique learns user’s habits to dynamically adjust the resource management schemes based on the data it collects regarding user’s plug-in behavior, battery charge status, and workloads variability at run-time. This information is used to improve the balance between performance and energy consumption, and thus optimize the Quality of Experience (QoE). Our evaluation results show that our framework enhances the user experience by 22% in comparison with the existing state-of-the-art.
@ARTICLE{8832255, author={Ganjeheizadeh Rohani, Shokat and Taherinejad, Nima and Radakovits, David}, journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems}, title={A Semiparallel Full-Adder in IMPLY Logic}, year={2020}, volume={28}, number={1}, pages={297-301}, doi={10.1109/TVLSI.2019.2936873}}
Passive implementation of memristors has led to several innovative works in the field of electronics. Despite being primarily a candidate for memory applications, memristors have proven to be beneficial in several other circuits and applications as well. One of the use cases is the implementation of digital circuits such as adders. Among several logic implementations using memristors, IMPLY logic is one of the promising candidates. In this brief, we present a new architecture for a digital full-adder, which is up to 41% faster than existing IMPLY-based serial designs while requiring up to 78% less area (memristors) compared to the existing parallel design.
Cardiovascular diseases are one of the world's major causes of loss of life. The vital signs of a patient can indicate this up to 24 hours before such an incident happens. Healthcare professionals use Early Warning Score (EWS) as a common tool in healthcare facilities to indicate the health status of a patient. However , the chance of survival of an outpatient could be increased if a mobile EWS system would monitor them during their daily activities to be able to alert in case of danger. Because of limited healthcare professional supervision of this health condition assessment, a mobile EWS system needs to have an acceptable level of reliability even if errors occur in the monitoring setup such as noisy signals and detached sensors. In earlier works, a data reliability validation technique has been presented that gives information about the trustfulness of the calculated EWS. In this paper, we propose an EWS system enhanced with the self-aware property confidence, which is based on fuzzy logic. In our experiments , we demonstrate that-under adverse monitoring circumstances (such as noisy signals, detached sensors, and non-nominal monitoring conditions)-our proposed Self-Aware Early Warning Score (SA-EWS) system provides a more reliable EWS than an EWS system without self-aware properties.
@ARTICLE{8873419, author={TaheriNejad, Nima and Radakovits, David}, journal={IEEE Circuits and Systems Magazine}, title={From Behavioral Design of Memristive Circuits and Systems to Physical Implementations}, year={2019}, volume={19}, number={4}, pages={6-18}, doi={10.1109/MCAS.2019.2945209}}
Since Hewlett Packard (HP) announced the passive fabrication of their memristors, various memristive technologies?as a promising emerging technology?have gained ever-increasing attention from the researchers. Although a natural application is using them as memory units, there have been several works in the literature showing their utilization in circuits and systems. While research on various aspects of memristive circuits and systems has been proliferating, the majority of these works are based on simulations at different levels of modeling abstraction. Simulation is a very helpful design tool, and there have been several efforts in modeling memristors; however, we contend that at this point these simulations represent the reality of the behavior of memristors, especially in a circuit or system set-up, only to a very limited extent. We show how this negatively affects the reproduction of designed circuits and systems in different simulation levels, and more importantly in a real-world set-up with physical implementation. Following that, we look into some considerations which can improve the reproducibility of the circuits and systems to be designed in the future. We conclude the paper by suggesting certain approaches to tackle these practical challenges at device level as well as circuit and system level.
With the rise of wearable devices, which integrate myriad of health-care and fitness procedures into daily life, a reliable method for measuring various bio-signals in a daily setup is more desired than ever. Many of these physiological parameters, such as Heart rate (HR) and Respiratory Rate (RR), are extracted indirectly and using other signals such as Photoplethysmograph (PPG). Part of the reason is that in some cases, such as RR measurements, the devices which directly measure them are cumbersome to wear and thus, rather impractical. On the other hand, signals, such as PPG from which the RR can be extracted, are not very clean. This poses a challenge on reliable extraction of these metrics. The most important problem is that they are corrupted by motion artifacts. In this paper, we review the state of the art algorithms which are used to detect and filter motion artifacts in PPG signals and compare them in terms of their performance. The insight provided by this paper can help the scientists and engineers to obtain a better understanding of the field and be able to use the most suitable technique for their work, or come up with innovative solutions based on existing ones.
@INPROCEEDINGS{8791942, author={TaheriNejad, Nima and Lewis, Peter and Jantsch, Axel and Rahmani, Amir and Esterle, Lukas}, booktitle={2019 IEEE 4th International Workshops on Foundations and Applications of Self* Systems (FAS*W)}, title={Resource Constrained Self-Aware Cyber-Physical Systems (Tutorial)}, year={2019}, volume={}, number={}, pages={259-260}, doi={10.1109/FAS-W.2019.00071}}
The overlap of the two established fields of cyber-physical systems and self-aware computing systems constitutes a challenging class of systems that require autonomy and must satisfy multiple, possibly conflicting constraints (e.g., performance, timeliness, energy, reliability). Self-aware cyber-physical systems are situated in dynamic physical environments and constrained in their resources, they understand their own state and that of their environment. Based on that understanding, they are able to make appropriate decisions autonomously at runtime with high efficiency. In this tutorial, we will review the state of the art of this exciting domain.
@INPROCEEDINGS{8961312, author={TaheriNejad, N. and Delaroche, T. and Radakovits, D. and Mirabbasi, S.}, booktitle={2019 17th IEEE International New Circuits and Systems Conference (NEWCAS)}, title={A Semi-Serial Topology for Compact and Fast IMPLY-based Memristive Full Adders}, year={2019}, volume={}, number={}, pages={1-4}, doi={10.1109/NEWCAS44328.2019.8961312}}
Memristive systems are among the emerging technologies that hold a great promise. They are compact, CMOS compatible, easy to fabricate and can serve for storage as well as computation purposes. Adders are one of the most basic and critical building blocks of any computing system. One of the main application areas of memristors is in Material Implication (IMPLY) based logic. IMPLY-based adders are implemented either in serial, which has a compact implementation but needs many steps for calculation, or in parallel, which is fast, however, requires a large number of memristors. In this paper we propose an IMPLY-based adder topology and its respective addition algorithm which is 54-to-65% faster than serial adders and requires 46-to-76% less memristors than parallel adders. This topology is a favorable candidate for applications where neither speed, nor cost (i.e., area or number of memristors) could be compromised to gain the required performance.
@INPROCEEDINGS{8961248, author={Khorami, Ata and Saeidi, Roghayeh and Sharifkhani, Mohammad and Taherinejad, Nima}, booktitle={2019 17th IEEE International New Circuits and Systems Conference (NEWCAS)}, title={An Ultra Low-power Low-offset Double-tail Comparator}, year={2019}, volume={}, number={}, pages={1-4}, doi={10.1109/NEWCAS44328.2019.8961248}}
In double tail comparators, the pre-amplifier amplifies the input differential voltage and when the output V cm of the pre-amplifier becomes larger than V th of the latch input transistors, the latch is activated and finalizes the comparison. As a result, the pre-amplification delay is fixed to a value and cannot be set at the minimum required delay, to save power and improve offset. In fact, when the latch is activated the pre-amplifier output differential voltage is still growing but the latch finishes the comparison before the maximum differential gain is formed and applied to the latch. In this paper, a comparator is proposed in which the preamplifier is turned off when the maximum gain is achieved so that always the maximum possible gain is applied to the latch. Therefore, not only the input referred offset is improved but also the power consumption of the pre-amplifier is saved. Simulations in 0.18μm technology show with an appropriate pre-amplification delay the average power is saved by up to 75% while the offset voltage is reduced by about 30%.
With improvements in electronics and mechanics,robots have become more compact as well as more space and energy efficient. Hence, they are now a more integral part of our everyday lives. Thanks to Artificial Intelligence (AI) they are on the verge of entering our social lives too. Followingthis trend, Technische Universit ̈at Kiwi (TUK) is a family of social robots developed and under further development at the Institute of Computer Technology at TU Wien. The project deals with the design and creation of a companion robot. The main purpose of this work is to realize a relatable robot which can eventually serve in therapeutic applications, in particular for the children on the autism spectrum. To this end, the companion robot should be able to interact with the user and express emotions. The goal of the companion robot is to create a safe environment by serving as a safety blanket, in particular where other aids such as therapeutic pets cannot be used. Ultimately, we hope that by collecting helpful data, the companion robot can contribute to the therapy procedures as well as improvement of daily life interactions with family and friends. In this paper, we present Carl Friedrich, the first of TUK family.
@INPROCEEDINGS{8861788, author={Radakovits, David and TaheriNejad, Nima}, booktitle={2019 IEEE Canadian Conference of Electrical and Computer Engineering (CCECE)}, title={Implementation and Characterization of a Memristive Memory System}, year={2019}, volume={}, number={}, pages={1-4}, doi={10.1109/CCECE.2019.8861788}}
Memristors are one of the promising emerging technologies to address several challenges faced by the computing system of the day. However, a sizeable portion of the works in the literature are not supported by practical implementations or their details are kept as trade secrets. In this work, we propose and implement a writing and reading circuit for a memristive memory system and present our measurement results. A key feature of the proposed system is that it does not need any read-out compensation and virtually no refreshing (due to readout). However, we observed that by the passage of the time (and irrespective of not applying any inputs) some information loss happens, which necessitates refreshing and dictates its frequency. We associate this phenomenon, which has not been reported in the literature before, to what we call “leakage current”. We anticipate this paper to be a starting point for seeing more implementation-based works in the literature, modeling the leakage current phenomenon, and incorporating such design and consideration into the design process of memristive systems.
@INPROCEEDINGS{8861962, author={TaheriNejad, Nima and Jantsch, Axel}, booktitle={2019 IEEE Canadian Conference of Electrical and Computer Engineering (CCECE)}, title={Improved Machine Learning using Confidence}, year={2019}, volume={}, number={}, pages={1-5}, doi={10.1109/CCECE.2019.8861962}}
Wearable gadgets are in for an exponential rise thanks to the improvements in the silicon scaling and ubiquity of Internet as well as battery technology and sensor amelioration. However, despite these advances, wearable gadgets remain resource constrained devices requiring further improvements in all those areas. Self-awareness enables a system to adjust its behaviors to enhance the operations of the system and meet its goals. In this paper, we review one of the self-awareness techniques used in wearable devices and machine learning, namely confidence, which leads to their improvements. In particular, we focus on how confidence helps to maintain or enhance performance of machine learning techniques while reducing the complexity of the processes and required resources for running them on resource constrained devices. We look into three examples, epilepsy monitoring, iris flower detection, and image classification.
@INPROCEEDINGS{8591186, author={Gotzinger, M. and Willegger, E. and TaheriNejad, N. and Jantsch, A. and Sauter, T. and Glatzl, T. and Lilieberg, P.}, booktitle={IECON 2018 - 44th Annual Conference of the IEEE Industrial Electronics Society}, title={Applicability of Context-Aware Health Monitoring to Hydraulic Circuits}, year={2018}, volume={}, number={}, pages={4712-4719}, doi={10.1109/IECON.2018.8591186}}
Monitoring is an important aspect of operation and maintenance in virtually every industrial system. However, the extent and methods of monitoring vastly vary in different systems, from fully automated to fully manual. One of the challenges of automated monitoring is the tediousness of, and the extent of engineering time and effort required to develop necessary models or machine learning algorithms for the units to be monitored. Model-free monitoring, on the other hand, can save resources and efforts substantially. However, more often than not they have a very limited scope and application. Such a system is needed, for example, to monitor entire Heating, Ventilation and Air Conditioning (HVAC) systems, consisting of different types of sensors such as temperature, pressure, humidity or flow sensors. Recently, we proposed the Context-Aware Health Monitoring (CAH) system for model-free monitoring of any injective-function black-box, and it was tested successfully on an AC motor. In this paper, we evaluate the CAH system for an entirely different industrial use-case, that is, a hydraulic circuit. The results show the potential for considerable benefits in monitoring HVAC systems. Moreover, in the light of applying CAH to different use-cases which may potentially need a different setup of parameters, we performed a sensitivity analysis on the values of different parameters in the system. The results show the robustness of CAH with regard to the values of these parameters.
Factories in Industry 4.0 are growing in complexity due to the incorporation of a large number of Cyber-Physical System (CPSs) which are logically and often physically distributed. Traditional monolithic control and monitoring structures are not able to address the increasing requirements regarding flexibility, operational time, and efficiency as well as resilience. Self-Aware health Monitoring and Bio-inspired coordination for distributed Automation systems (SAMBA) is a cognitive application architecture which processes information from the factory floor and interacts with the Manufacturing Execution System (MES) to enable automated control and supervision of decentralized CPSs. The proposed architecture increases the ability of the system to ensure the quality of the process by intelligently adapting to rapidly changing environments and conditions.
@INPROCEEDINGS{8350992, author={Kholerdi, Hedyeh A. and TaheriNejad, Nima and Jantsch, Axel}, booktitle={2018 IEEE International Symposium on Circuits and Systems (ISCAS)}, title={Enhancement of Classification of Small Data Sets Using Self-awareness — An Iris Flower Case-Study}, year={2018}, volume={}, number={}, pages={1-5}, doi={10.1109/ISCAS.2018.8350992}}
In big-data (Deep) Neural Network (NN) algorithm is often used for classification. However, such a massive mine of data is not always available and a shortage of training data can significantly deteriorate the performance of NNs and other classifiers. Therefore, we propose a self-aware multiple classifier system suitable for “Small-Data” cases. This algorithm uses self-awareness to switch between classifiers to improve its performance. We tested the algorithm for the classification of iris flower species using the Iris standard database. Compared to NN, our algorithm showed up to 17% classification success rate improvement with up to 10 times smaller standard deviation.
@INPROCEEDINGS{8357315, author={Jantsch, Axel and Anzanpour, Arman and Kholerdi, Hedyeh and Azimi, Iman and Siafara, Lydia C. and Rahmani, Amir M. and TaheriNejad, Nima and Liljeberg, Pasi and Dutt, Nikil}, booktitle={2018 19th International Symposium on Quality Electronic Design (ISQED)}, title={Hierarchical dynamic goal management for IoT systems}, year={2018}, volume={}, number={}, pages={370-375}, doi={10.1109/ISQED.2018.8357315}}
As the Internet of Things (IoT) penetrates ever more application domains, many IoT-based systems are increasingly becoming more complex, versatile and resource-rich, and need to serve one or more applications with diverse and changing goals. These systems face new challenges in dynamic goal management due to a combination of limited shared resources, and multiple goals that may not only conflict with each other, but which may also change dynamically. We motivate the need for hierarchical, dynamic goal management for this class of complex IoT systems and substantiate our arguments with case studies from two application domains: patient health monitoring and Cyber-Physical Production Systems (CPPSs).
Early Warning Score (EWS) systems are a common practice in hospitals. Health-care professionals use them to measure and predict amelioration or deterioration of patients’ health status. However, it is desired to monitor EWS of many patients in everyday settings and outside the hospitals as well. For portable EWS devices, which monitor patients outside a hospital, it is important to have an acceptable level of reliability. In an earlier work, we presented a self-aware modified EWS system that adaptively corrects the EWS in the case of faulty or noisy input data. In this paper, we propose an enhancement of such data reliability validation through deploying a hierarchical agent-based system that classifies data reliability but using Fuzzy logic instead of conventional Boolean values. In our experiments, we demonstrate how our reliability enhancement method can offer a more accurate and more robust EWS monitoring system.
@INPROCEEDINGS{8216594, author={Siafara, Lydia C. and Kholerdi, Hedyeh A. and Bratukhin, Aleksey and TaheriNejad, Nima and Wendt, Alexander and Jantsch, Axel and Treytl, Albert and Sauter, Thilo}, booktitle={IECON 2017 - 43rd Annual Conference of the IEEE Industrial Electronics Society}, title={SAMBA: A self-aware health monitoring architecture for distributed industrial systems}, year={2017}, volume={}, number={}, pages={3512-3517}, doi={10.1109/IECON.2017.8216594}}
In the context of Industry 4.0, constantly evolving shop floors generate the need for a highly adaptive and autonomous automation system with lean maintenance, minimum downtime, maximum reliability, and resilience. Future Manufacturing Execution Systems (MESs) will be more complex and dynamic as well as distributed physically and logically. This makes it very difficult, if not impossible, for the conventional centralized architectures to effectively control these vibrant Cyber-Physical Production Systems (CPPSs). To address these issues, we propose Self-Aware health Monitoring and Bio-inspired coordination for distributed Automation systems (SAMBA), an architecture which tackles these challenges. SAMBA increases the ability of the system to intelligently adapt to rapidly changing environment and conditions of future CPPSs.
@INPROCEEDINGS{8037328, author={Pollreisz, David and TaheriNejad, Nima}, booktitle={2017 39th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)}, title={A simple algorithm for emotion recognition, using physiological signals of a smart watch}, year={2017}, volume={}, number={}, pages={2353-2356}, doi={10.1109/EMBC.2017.8037328}}
Recently, it has become easier and more common to measure physiological signals through wearable devices such as smart watches. Extracting emotional states of individuals with problems expressing it, such as autistic individuals, can help their parents, friends, and therapists to obtain a better understanding of what they feel throughout their day. Although emotion recognition methods based on physiological signals have been studied for many years, there is a smaller body of literature about systems working with data obtained from wearable devices. In this paper, we present an emotion recognition system with a small footprint suitable for limited resources of wearable devices. Other than identifying the emotions (with a success rate of 65%), The proposed system also tags each recognition with a confidence value (on average 57%).
@INPROCEEDINGS{8010110, author={Nejad, Nima Taheri and Shami, M. Ali and Manoj, P.D. Sai}, booktitle={2017 15th IEEE International New Circuits and Systems Conference (NEWCAS)}, title={Self-aware sensing and attention-based data collection in Multi-Processor System-on-Chips}, year={2017}, volume={}, number={}, pages={81-84}, doi={10.1109/NEWCAS.2017.8010110}}
Self-awareness is the foundation for many of the nowadays desired system characteristics, such as self-optimization and self-adaption. This awareness is rooted in observation and sensory data obtained by the system regarding itself and its environment. Given the important role which data collection plays in creating this awareness, we believe that it merits more attention than it has so far received. For example, increasing the amount of collected data can overload the system with increased computational cost, communication load, and power consumption. Self-awareness can help the system by making data collection smarter and better oriented. In this paper, we propose an attention-based data collection method, inspired by self-awareness, and exploit its potential in the context of Multi-Processor System-on-Chips (MPSoCs). Our case study shows that this method can reduce the computation and communication load related to processing sensory data up to 95%, at the cost of a negligible overhead at the sensor node.
@INPROCEEDINGS{7946814, author={Götzinger, Maximilian and TaheriNejad, Nima and Kholerdi, Hedyeh A. and Jantsch, Axel}, booktitle={2017 IEEE 30th Canadian Conference on Electrical and Computer Engineering (CCECE)}, title={On the design of context-aware health monitoring without a priori knowledge; an AC-Motor case-study}, year={2017}, volume={}, number={}, pages={1-5}, doi={10.1109/CCECE.2017.7946814}}
Health monitoring without a priori knowledge can save a significant amount of design and implementation time. However, for smaller devices with limited available resources, this is not feasible using most conventional methods. For small footprint sensor and actuator devices, we propose a health monitoring architecture and algorithm, which uses context-awareness to assess the health status of an “Injective-function Black-Box” without having a priori knowledge about it. The proposed algorithm can identify normal modes of operation, change of states (operation modes), deviation from a state, and abnormal functional operation. We have tested the algorithm on an AC Motor where the system was able to identify its health and changes in the operation status accordingly.
@INPROCEEDINGS{7946813, author={Rohani, Shokat Ganjeheizadeh and TaheriNejad, Nima}, booktitle={2017 IEEE 30th Canadian Conference on Electrical and Computer Engineering (CCECE)}, title={An improved algorithm for IMPLY logic based memristive Full-adder}, year={2017}, volume={}, number={}, pages={1-4}, doi={10.1109/CCECE.2017.7946813}}
Memristor characteristics like high speed, low power, and passive memory retention make it suitable for application in several fields. Neuromorphic systems, digital and analog circuits, or simply a memory unit, are some of these applications. A memristor exhibits different properties, which allow each field of application to take the desired advantages of this device. Memristor-based “Implication”, which implements the IMPLY logic using material properties of memristors, makes all logic operations possible in designs, which consists of only memristors as fundamental constituents. The focus of this article is on memristor-based Full-adder, which uses only the IMPLY logic. We propose an algorithm with the merit of needing fewer execution steps and a smaller number of memristors.
@INPROCEEDINGS{7927146, author={Anzanpour, Arman and Azimi, Iman and Götzinger, Maximilian and Rahmani, Amir M. and TaheriNejad, Nima and Liljeberg, Pasi and Jantsch, Axel and Dutt, Nikil}, booktitle={Design, Automation & Test in Europe Conference & Exhibition (DATE), 2017}, title={Self-awareness in remote health monitoring systems using wearable electronics}, year={2017}, volume={}, number={}, pages={1056-1061}, doi={10.23919/DATE.2017.7927146}}
In healthcare, effective monitoring of patients plays a key role in detecting health deterioration early enough. Many signs of deterioration exist as early as 24 hours prior having a serious impact on the health of a person. As hospitalization times have to be minimized, in-home or remote early warning systems can fill the gap by allowing in-home care while having the potentially problematic conditions and their signs under surveillance and control. This work presents a remote monitoring and diagnostic system that provides a holistic perspective of patients and their health conditions. We discuss how the concept of self-awareness can be used in various parts of the system such as information collection through wearable sensors, confidence assessment of the sensory data, the knowledge base of the patient's health situation, and automation of reasoning about the health situation. Our approach to self-awareness provides (i) situation awareness to consider the impact of variations such as sleeping, walking, running, and resting, (ii) system personalization by reflecting parameters such as age, body mass index, and gender, and (iii) the attention property of self-awareness to improve the energy efficiency and dependability of the system via adjusting the priorities of the sensory data collection. We evaluate the proposed method using a full system demonstration.
With the advancement of technology, non-intrusive monitoring of some physiological signals through smart watches and other wearable devices are made possible. This provides us with new opportunities of exploring newer fields of information technology applied in our everyday lives. One application which can help individuals with difficulty in expressing their emotions, e.g. autistic individuals, is emotion recognition through bio-signal processing. To develop such systems, however, a significant amount of measurement data is necessary to establish proper paradigms, which enable such analyses. Given the sparsity of the available data in the literature, specifically the ones using portable devices, we conducted a set of experiments to help in enriching the literature. In our experiments, we measured physiological signals of various subjects during four different emotional experiences; happiness, sadness, pain, and anger. Measured bio-signals are Electrodermal activity (EDA), Skin Temperature, and Heart rate. In this paper, we share our measurement results and our findings regarding their relation with happiness, sadness, anger, and pain.
With the advancement of technology, non-intrusive monitoring of some physiological signals through smart watches and other wearable devices are made possible. This provides us with new opportunities of exploring newer fields of information technology applied in our everyday lives. One application which can help individuals with difficulty in expressing their emotions, e.g. autistic individuals, is emotion recognition through bio-signal processing. To develop such systems, however, a significant amount of measurement data is necessary to establish proper paradigms, which enable such analyses. Given the sparsity of the available data in the literature, specifically the ones using portable devices, we conducted a set of experiments to help in enriching the literature. In our experiments, we measured physiological signals of various subjects during four different emotional experiences; happiness, sadness, pain, and anger. Measured bio-signals are Electrodermal activity (EDA), Skin Temperature, and Heart rate. In this paper, we share our measurement results and our findings regarding their relation with happiness, sadness, anger, and pain.
@INPROCEEDINGS{7751193, author={Taherinejad, Nima and Sai, Manoj P.D. and Rathmair, Michael and Jantsch, Axel}, booktitle={2016 13th International Conference on Electrical Engineering, Computing Science and Automatic Control (CCE)}, title={Fully digital write-in scheme for multi-bit memristive storage}, year={2016}, volume={}, number={}, pages={1-6}, doi={10.1109/ICEEE.2016.7751193}}
Memristors have been used in various applications, including single- and multi-bit storage units. The non-linear voltage-current relation in memristors is often seen as a problem, necessitating complex circuits and methods for a reliable write-in. In this paper, we take advantage of this phenomenon for storing more than one bit of information in a single memristor using digital bit streams. First, we demonstrate how two bits of information can be stored and read back from a single memristor unit. Then, we propose encoding schemes that can enhance the reliability of digitally writing two and three bits of data in a single memristor. To verify the reliability of this method for multi-bit data storage, we have run simulations based on the most prominent simulation models available.
Unfit drivers are the cause of tens of thousands of incidents on the roads which lead to injuries and deaths. Therefore, it is very important to take preventive measures against such incidents. One of the unfit driving conditions is driving while being drowsy. Using image processing techniques, drowsiness of the driver could be detected and hence such incidents could be prevented. In this work, inspired by how images are processed by the human visual system, an enhancement for driver's drowsiness detection is suggested. Furthermore, to improve the robustness of the drowsiness detection system, the mechanism for using energy levels in frames is changed. Lastly, a better decision making process is proposed. To measure the merit of the system, it is applied to a set of drivers' data. Test results show that using the proposed system, success rate of the drowsiness detection system is 90%.
@INPROCEEDINGS{7579868, author={Taherinejad, Nima and Manoj, P.D. Sai and Jantsch, Axel}, booktitle={2015 IEEE European Modelling Symposium (EMS)}, title={Memristors' Potential for Multi-bit Storage and Pattern Learning}, year={2015}, volume={}, number={}, pages={450-455}, doi={10.1109/EMS.2015.73}}
Memristor is a two-terminal device, termed as fourth element, and characterized by a varying resistance depending on the charge (current) flown through it. This leads to many interesting characteristics, including a memory of its past states, demonstrated in its resistance. Smaller area and power consumed by memristors compared to conventional memories makes them a more suitable choice for applications needing large memory. In this paper we explore one of the unique properties of memristors which extends their suitability by allowing storage of multi-bit data in a single memristor. Their ability of storing multi-bit patterns will be shown via a simplified proof and simulations. This characteristic can be advantageous for many applications. In this paper particularly, we briefly discuss its advantages in pattern learning applications.
@ARTICLE{7029097, author={Sheng, Zhengguo and Kenarsari-Anhari, Amir and Taherinejad, Nima and Leung, Victor C. M.}, journal={IEEE Transactions on Vehicular Technology}, title={A Multichannel Medium Access Control Protocol for Vehicular Power Line Communication Systems}, year={2016}, volume={65}, number={2}, pages={542-554}, doi={10.1109/TVT.2015.2397859}}
In-vehicle communications are emerging to play an important role in the continued development of reliable and efficient X-by-Wire applications in new vehicles. Since vehicle devices, sensors, and the electronic control unit (ECU) are already connected to power wires, the advancement of power line communications (PLCs) can provide a very low cost and virtually free platform for in-vehicle communications. In this paper, we propose a medium access control (MAC) protocol for vehicular PLC systems, where multiple nodes are competing for transmission over the direct current (dc) power line. The proposed protocol uses a combination of time and frequency multiplexing and consists of two key features: 1) a distributed channel selection policy to arbitrate packet transmission across different channels and provide robustness against interference and noise and 2) a distributed collision resolution algorithm to allow efficient nodes completion over selected channels. Specifically, the collision resolution algorithm is optimized with respect to the channel policy such that the success probability of transmission in each channel is maximized. Numerical results are also supplemented to validate the performance of the proposed protocol and provide useful guidelines for developing a robust contention-based MAC protocol for vehicular PLC systems.
@INPROCEEDINGS{6812364, author={Taherinejad, Nima and Lampe, Lutz and Mirabbasi, Shahriar}, booktitle={18th IEEE International Symposium on Power Line Communications and Its Applications}, title={Adaptive impedance matching for Vehicular Power Line Communication systems}, year={2014}, volume={}, number={}, pages={214-219}, doi={10.1109/ISPLC.2014.6812364}}
The growing number of electronic devices inside vehicles has motivated research and development activities in Vehicular Power Line Communication (VPLC) systems. Advantages of the VPLC approach include reduced complexity and cost of the wiring harness. Among the design challenges of VPLC systems is the problem of impedance matching. The access impedance at the modem port is a time varying quantity which also depends on the location of the VPLC modem. Impedance mismatch degrades the signal-to-noise ratio (SNR) and thus the signal integrity. Given the variable nature of the access impedance, a fixed matching circuit will be inefficient. A potential solution to cope with the access impedance variability is an adaptive impedance matching system which is the subject of this work. Here we have designed an adaptive impedance matching system. The system is simulated and its performance is evaluated under extreme changes in access impedance.
@INPROCEEDINGS{6201294, author={Taherinejad, Nima and Rosales, Roberto and Mirabbasi, Shahriar and Lampe, Lutz}, booktitle={2012 IEEE International Symposium on Power Line Communications and Its Applications}, title={On the design of impedance matching circuits for vehicular power line communication systems}, year={2012}, volume={}, number={}, pages={322-327}, doi={10.1109/ISPLC.2012.6201294}}
The design of power line communication (PLC) systems for vehicles, i.e., vehicular power line communication (VPLC), is a challenging task as propagation conditions are harsh and devices need to be low cost and highly integrated (to have minimal overhead on vehicle's cost and weight). One particular challenge, which is common to many PLC application scenarios, is the temporal and spatial variation of the input impedance. In this paper, we investigate on this issue and, based on previous studies and measurements on access impedances for a PLC network in a car, we discuss the design of adaptive impedance matching circuits for VPLC. This includes a study on frequency range of operation, suggestions for impedance matching circuits, and proposing a circuit structure. In particular, since inductors are an integral part of matching circuitry and given that over the typical frequency range of operation for VPLC their integration is challenging, if not impractical, we advocate the use of active inductors in matching circuits. As compared to passive inductors, they occupy a smaller on-chip size and their inductance is adjustable. We also propose an active inductor structure that provides a wide range of inductance values which are suitable for VPLC applications.
@INPROCEEDINGS{6201336, author={Taherinejad, Nima and Rosales, Roberto and Lampe, Lutz and Mirabbasi, Shahriar}, booktitle={2012 IEEE International Symposium on Power Line Communications and Its Applications}, title={Channel characterization for power line communication in a hybrid electric vehicle}, year={2012}, volume={}, number={}, pages={328-333}, doi={10.1109/ISPLC.2012.6201336}}
In today's electric and conventional combustion engine vehicles, data communication between electronic control units is accomplished by sending communication signals over dedicated wires. The space requirement, weight, and installation costs for these wires can become significant, especially in electric vehicles (EVs) of the future, which are highly sophisticated electronic systems. The concept of reusing existing electricity wires, which are needed to power electronic components, for data communication, i.e., vehicular power line communications (V-PLC), is thus a promising means to reduce the amount of dedicated wiring and/or establish redundant communication buses especially for EVs. Previous work on V-PLC has mostly focused on combustion engine vehicles. In this paper, we present the methodology and results from a measurement campaign with the goal of characterizing the transmission conditions for V-PLC in a hybrid EV (HEV). Emphasis is given to the choice of measurement points (potential nodes of a V-PLC network) and the proper design of adapters for measurement equipment. The results presented here focus on channel transfer function and access impedance.
@INPROCEEDINGS{5764438, author={Taherinejad, Nima and Rosales, Roberto and Mirabbasi, Shahriar and Lampe, Lutz}, booktitle={2011 IEEE International Symposium on Power Line Communications and Its Applications}, title={A study on access impedance for vehicular power line communications}, year={2011}, volume={}, number={}, pages={440-445}, doi={10.1109/ISPLC.2011.5764438}}
In this paper, we present an experimental study of access impedance for vehicular power line communications (VPLC). This study aims to provide better understanding of the effects of the vehicle loads on the impedance of the VPLC plug-in access nodes. Also, some insights for the design of adaptive impedance matching are provided. We report impedance measurements of various loads and car battery, which combined with input impedance measurements from a specific sport car are used to calculate an equivalent access impedance of a PLC device connected in parallel. We show how the inductive nature of the loads dominates the overall access impedance making it on one hand more attractive for transmission in the high MHz range, and on the other more challenging for the implementation of impedance matching.
@INPROCEEDINGS{5275072, author={Taherinejad, Nima and Abrishamifar, Adib}, booktitle={2009 European Conference on Circuit Theory and Design}, title={A new high speed, low power adder; using hybrid analog-digital circuits}, year={2009}, volume={}, number={}, pages={623-626}, doi={10.1109/ECCTD.2009.5275072}}
In this paper a new high speed and low power adder is presented. The circuit uses a hybrid concept of analog and digital circuit design to propagate the carry and so achieve a Full Adder with 78 ps delay and 7.26 muW of power consumption. SPICE Simulations performed on the 0.18 mum TSMC Technology demonstrates the average improvement of 159%, 184% and 516%, respectively for delay, power consumption and PDP.
@INPROCEEDINGS{5275109, author={Taherinejad, Nima}, booktitle={2009 European Conference on Circuit Theory and Design}, title={Highly reliable harmony search algorithm}, year={2009}, volume={}, number={}, pages={818-822}, doi={10.1109/ECCTD.2009.5275109}}
In this paper, after a literature overview, studies will be concentrated on pitch adjustment ratio function of harmony search algorithm. A more rational function will be proposed which increase the robustness of algorithm and therefore leads to a highly reliable algorithm simulations on a set of standard TSP problems, demonstrates that parameter of reliability (variance over average), has experienced 75% of improvement. Cost paid for this considerable improvement, is completely negligible; 0.7% decrease in quality of responses.
Two new efficient and robust ant colony algorithms are proposed. These algorithms contain two new and reasonable local updating rules that make them more efficient and robust. While going forward from start point to end point of a tour, the ants’ freedom to make local changes on links is gradually restricted. This idea is implemented in two different forms, leaving two new algorithms, KCC-Ants and ELU-Ants. To evaluate the new algorithms, we run them along with the old one on the standard TSP library, where in almost all of the cases the proposed algorithms had better solutions and even for some problem samples found the optimal solution.
@INPROCEEDINGS{4600890, author={Taherinejad, N. and Shah-Hosseini, H.S.}, booktitle={2008 15th International Conference on Mixed Design of Integrated Circuits and Systems}, title={A new digital multiplier/divider architecture, via hybrid analog-digital processing}, year={2008}, volume={}, number={}, pages={181-186}, doi={}}
Digital multiplying and dividing are time consumer tasks if are done via software. To speed up, the processor special hardware may be developed. Digital multipliers/dividers designed by digital circuits are too huge and complex in a system point of view and each of them need own hardware and so they’re occupying large amount of die. In this paper a digital multiplier/divider which uses hybrid processesμ of analog and digital circuits will be introduced. This multiplier/divider is very simple and uses the same device for both operations, so it’ll occupy the die, less. The idea will be developed in system level and some comparisons with similar devices will be done at this level.
@INPROCEEDINGS{4728280, author={Taherinejad, Nima and Naimi, Hossein Miar}, booktitle={2007 IEEE International Conference on Signal Processing and Communications}, title={Robust and Efficient Ant Colony Algorithm; Using New Local Updating Rule}, year={2007}, volume={}, number={}, pages={161-164}, doi={10.1109/ICSPC.2007.4728280}}
In this paper, two new robust ant colony algorithms with better results will be presented. The main approach to improve older algorithms is to use an intelligent local updating method. Here all agents haven't the same effects on the paths; local updating is done based on the situation and rout passed by agents. In order to evaluate and compare the results of new algorithm many standard problems of TSP Library and some random problems were tested. The experiments proved the better results of new algorithm and also its considerable better robustness.