2023
Journal Articles
Adrian-Gabriel Chifu; Sébastien Fournier
Sentiment Difficulty in Aspect-Based Sentiment Analysis Journal Article
In: Mathematics, vol. 11, no. 22, 2023, ISSN: 2227-7390.
Abstract | Links | BibTeX | Tags: Difficulty, Sentiment Analysis
@article{math11224647,
title = {Sentiment Difficulty in Aspect-Based Sentiment Analysis},
author = {Adrian-Gabriel Chifu and Sébastien Fournier},
url = {https://www.mdpi.com/2227-7390/11/22/4647},
doi = {10.3390/math11224647},
issn = {2227-7390},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Mathematics},
volume = {11},
number = {22},
abstract = {Subjectivity is a key aspect of natural language understanding, especially in the context of user-generated text and conversational systems based on large language models. Natural language sentences often contain subjective elements, such as opinions and emotions, that make them more nuanced and complex. The level of detail at which the study of the text is performed determines the possible applications of sentiment analysis. The analysis can be done at the document or paragraph level, or, even more granularly, at the aspect level. Many researchers have studied this topic extensively. The field of aspect-based sentiment analysis has numerous data sets and models. In this work, we initiate the discussion around the definition of sentence difficulty in this context of aspect-based sentiment analysis. To assess and quantify the difficulty of the aspect-based sentiment analysis, we conduct an experiment using three data sets: “Laptops”, “Restaurants”, and “MTSC” (Multi-Target-dependent Sentiment Classification), along with 21 learning models from scikit-learn. We also use two textual representations, TF-IDF (Terms frequency-inverse document frequency) and BERT (Bidirectional Encoder Representations from Transformers), to analyze the difficulty faced by these models in performing aspect-based sentiment analysis. Additionally, we compare the models with a fine-tuned version of BERT on the three data sets. We identify the most challenging sentences using a combination of classifiers in order to better understand them. We propose two strategies for defining sentence difficulty. The first strategy is binary and considers sentences as difficult when the classifiers are unable to correctly assign the sentiment polarity. The second strategy uses a six-level difficulty scale based on how many of the top five best-performing classifiers can correctly identify sentiment polarity. These sentences with assigned difficulty classes are then used to create predictive models for early difficulty detection. The purpose of estimating the difficulty of aspect-based sentiment analysis is to enhance performance while minimizing resource usage.},
keywords = {Difficulty, Sentiment Analysis},
pubstate = {published},
tppubtype = {article}
}
Conferences
Mihaela Gaman; Adrian-Gabriel Chifu; William Domingues; Radu-Tudor Ionescu
FreCDo: A Large Corpus for French Cross-Domain Dialect Identification Conference
27th International Conference on Knowledge-Based and Intelligent Information & Engineering Systems (KES 2023), KES International, Athens, Greece, 2023.
Abstract | Links | BibTeX | Tags: Cross-Domain Evaluation, dialect identification, French Corpus
@conference{Gaman2023,
title = {FreCDo: A Large Corpus for French Cross-Domain Dialect Identification},
author = {Mihaela Gaman and Adrian-Gabriel Chifu and William Domingues and Radu-Tudor Ionescu},
editor = {KES International},
url = {https://adrianchifu.com/wp-content/uploads/2023/09/k23-050.pdf},
year = {2023},
date = {2023-09-05},
urldate = {2023-09-05},
booktitle = {27th International Conference on Knowledge-Based and Intelligent Information & Engineering Systems (KES 2023)},
publisher = {KES International},
address = {Athens, Greece},
abstract = {We present a novel corpus for French dialect identification comprising 413,522 French text samples collected from public news websites in Belgium, Canada, France and Switzerland. To ensure an accurate estimation of the dialect identification performance of models, we designed the corpus to eliminate potential biases related to topic, writing style, and publication source. More precisely, the training, validation and test splits are collected from different news websites, while searching for different keywords (topics). This leads to a French cross-domain (FreCDo) dialect identification task. We conduct experiments with four competitive baselines, a fine-tuned CamemBERT model, an XGBoost based on fine-tuned CamemBERT features, a Support Vector Machines (SVM) classifier based on fine-tuned CamemBERT features, and an SVM based on word n-grams. Aside from presenting quantitative results, we also make an analysis of the most discriminative features learned by CamemBERT.},
keywords = {Cross-Domain Evaluation, dialect identification, French Corpus},
pubstate = {published},
tppubtype = {conference}
}
2022
Journal Articles
Ismail Badache; Adrian-Gabriel Chifu; Sébastien Fournier
Unsupervised and Supervised Methods to Estimate Temporal-Aware Contradictions in Online Course Reviews Journal Article
In: Mathematics, vol. 10, no. 5, 2022.
Abstract | Links | BibTeX | Tags: Aspect Detection, Contradiction Intensity, Feature Evaluation, Rating, Sentiment Analysis, Temporality
@article{badache2022,
title = {Unsupervised and Supervised Methods to Estimate Temporal-Aware Contradictions in Online Course Reviews},
author = {Ismail Badache and Adrian-Gabriel Chifu and Sébastien Fournier},
editor = {MDPI},
url = {https://www.mdpi.com/2227-7390/10/5/809},
doi = {10.3390/math10050809},
year = {2022},
date = {2022-03-03},
urldate = {2022-03-03},
journal = {Mathematics},
volume = {10},
number = {5},
abstract = {The analysis of user-generated content on the Internet has become increasingly popular for a wide variety of applications. One particular type of content is represented by the user reviews for programs, multimedia, products, and so on. Investigating the opinion contained by reviews may help in following the evolution of the reviewed items and thus in improving their quality. Detecting contradictory opinions in reviews is crucial when evaluating the quality of the respective resource. This article aims to estimate the contradiction intensity (strength) in the context of online courses (MOOC). This estimation was based on review ratings and on sentiment polarity in the comments, with respect to specific aspects, such as “lecturer”, “presentation”, etc. Between course sessions, users stop reviewing, and also, the course contents may evolve. Thus, the reviews are time dependent, and this is why they should be considered grouped by the course sessions. Having this in mind, the contribution of this paper is threefold: (a) defining the notion of subjective contradiction around specific aspects and then estimating its intensity based on sentiment polarity, review ratings, and temporality; (b) developing a dataset to evaluate the contradiction intensity measure, which was annotated based on a user study; (c) comparing our unsupervised method with supervised methods with automatic feature selection, over the dataset. The dataset collected from coursera.org is in English. It includes 2244 courses and 73,873 user-generated reviews of those courses.The results proved that the standard deviation of the ratings, the standard deviation of the polarities, and the number of reviews are suitable features for predicting the contradiction intensity classes. Among the supervised methods, the J48 decision trees algorithm yielded the best performance, compared to the naive Bayes model and the SVM model.},
keywords = {Aspect Detection, Contradiction Intensity, Feature Evaluation, Rating, Sentiment Analysis, Temporality},
pubstate = {published},
tppubtype = {article}
}
Conferences
Yann Duperis; Adrian-Gabriel Chifu; Bernard Espinasse; Sébastien Fournier; Arthur Kuehn
Deep Unordered Composition for Multi-label Classification applied to Skills Prediction Conference
Joint Conference of the Information Retrieval Communities in Europe CIRCLE 2022, Samatan, France, 2022.
Abstract | Links | BibTeX | Tags: Job recommender system, Natural Language Processing, Neural Networks
@conference{duperis2022,
title = {Deep Unordered Composition for Multi-label Classification applied to Skills Prediction},
author = {Yann Duperis and Adrian-Gabriel Chifu and Bernard Espinasse and Sébastien Fournier and Arthur Kuehn},
url = {http://ceur-ws.org/Vol-3178/CIRCLE_2022_paper_16.pdf},
year = {2022},
date = {2022-07-04},
urldate = {2022-07-04},
booktitle = {Joint Conference of the Information Retrieval Communities in Europe CIRCLE 2022},
address = {Samatan, France},
abstract = {Today, many recruitment processes are digitalized. Job offers are posted on job boards and candidates apply by submitting their resumes. To select an appropriate candidate for a job, recruiters rely mostly on the evaluation of the professional skills of the individual. However, researches have shown that individuals tend to omit some skills from their professional profile. A human recruiter, knowledgeable in a given activity sector, is often able to fill the gaps and infer the missing skills. In this paper our aim is to support this human recruiter by automatically inferring theses missing skills, a non-trivial task. To solve this task, first we propose a method to tackle the skill prediction problem by transforming it from a multi-label classification task it to a binary classification task. Then we implement this method with a deep learning model inspired by the Deep Unordered Composition approach. Two different variants of this model, one with the Deep Averaging Network architecture and the other with the Set-Transformer architecture, are evaluated on an open IT resumes data set, and the results are promising.},
keywords = {Job recommender system, Natural Language Processing, Neural Networks},
pubstate = {published},
tppubtype = {conference}
}
Igor Nascimento; Rinaldo Lima; Adrian Chifu; Bernard Espinasse; Sébastien Fournier
DeepREF: A Framework for Optimized Deep Learning-based Relation Classification Conference
Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022), European Language Resources Association (ELRA), Marseille, France, 2022.
Abstract | Links | BibTeX | Tags: DDI, DeepREF, Embeddings, Framework, NLP, Optuna, Relation Classification, SemEval
@conference{ChifuLREC2022,
title = {DeepREF: A Framework for Optimized Deep Learning-based Relation Classification},
author = {Igor Nascimento and Rinaldo Lima and Adrian Chifu and Bernard Espinasse and Sébastien Fournier},
url = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.480.pdf},
year = {2022},
date = {2022-06-20},
urldate = {2022-06-20},
booktitle = {Proceedings of the 13th Conference on Language Resources and Evaluation (LREC 2022)},
pages = {4513–4522},
publisher = {European Language Resources Association (ELRA)},
address = {Marseille, France},
abstract = {Relation Extraction (RE) is an important basic Natural Language Processing (NLP) task for many applications, including search engines and question-answering systems. There are many studies in this subarea of NLP that continue to be explored, such as the ones concerned by SemEval shared tasks. For many years, several RE systems based on statistical models have been proposed, as well as the frameworks to develop them. We focus on frameworks allowing to develop such RE systems using deep learning models. Such frameworks make it possible to reproduce experiments using many deep learning models and preprocessing techniques. Currently, there are very few frameworks of this type. In this paper, we propose an open and optimizable framework called DeepREF, inspired by two other existing frameworks: OpenNRE and REflex. DeepREF allows the rapid development of deep learning models for Relation Classification (RC). In addition, it enables hyperparameter optimization, and the application of many preprocessing techniques on the input textual data. DeepREF provides means to boost the process of running deep learning models for RC tasks on different datasets and models. DeepREF is evaluated on three reference corpora and has demonstrated competitive results compared to other state-of-the-art RC systems.},
keywords = {DDI, DeepREF, Embeddings, Framework, NLP, Optuna, Relation Classification, SemEval},
pubstate = {published},
tppubtype = {conference}
}
Proceedings
Noëemi Aepli; Antonios Anastasopoulos; Adrian-Gabriel Chifu; William Domingues; Fahim Faisal; Mihaela Gaman; Radu Tudor Ionescu; Yves Scherrer
Findings of the VarDial Evaluation Campaign 2022 Proceedings
Association for Computational Linguistics, Gyeongju, Republic of Korea, 2022.
Abstract | Links | BibTeX | Tags:
@proceedings{aepli-etal-2022-findings,
title = {Findings of the VarDial Evaluation Campaign 2022},
author = {Noëemi Aepli and Antonios Anastasopoulos and Adrian-Gabriel Chifu and William Domingues and Fahim Faisal and Mihaela Gaman and Radu Tudor Ionescu and Yves Scherrer},
url = {https://aclanthology.org/2022.vardial-1.1},
year = {2022},
date = {2022-10-01},
urldate = {2022-10-01},
booktitle = {Proceedings of the Ninth Workshop on NLP for Similar Languages, Varieties and Dialects},
pages = {1--13},
publisher = {Association for Computational Linguistics},
address = {Gyeongju, Republic of Korea},
abstract = {This report presents the results of the shared tasks organized as part of the VarDial Evaluation Campaign 2022. The campaign is part of the ninth workshop on Natural Language Processing (NLP) for Similar Languages, Varieties and Dialects (VarDial), co-located with COLING 2022. Three separate shared tasks were included this year: Identification of Languages and Dialects of Italy (ITDI), French Cross-Domain Dialect Identification (FDI), and Dialectal Extractive Question Answering (DialQA). All three tasks were organized for the first time this year.},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
2021
Conferences
Radu-Tudor Ionescu; Adrian-Gabriel Chifu
FreSaDa: A French Satire Data Set for Cross-Domain Satire Detection Conference
The International Joint Conference on Neural Network, IJCNN 2021, IJCNN2021 2021.
Abstract | Links | BibTeX | Tags: Cross-Domain Evaluation, Satire Detection, Text Classification, Unsupervised Domain Adaptation
@conference{IonescuChifu2021IJCNN,
title = {FreSaDa: A French Satire Data Set for Cross-Domain Satire Detection},
author = {Radu-Tudor Ionescu and Adrian-Gabriel Chifu},
url = {https://arxiv.org/abs/2104.04828},
year = {2021},
date = {2021-07-18},
urldate = {2021-07-18},
booktitle = {The International Joint Conference on Neural Network, IJCNN 2021},
series = {IJCNN2021},
abstract = {In this paper, we introduce FreSaDa, a French Satire Data Set, which is composed of 11,570 articles from the news domain. In order to avoid reporting unreasonably high accuracy rates due to the learning of characteristics specific to publication sources, we divided our samples into training, validation and test, such that the training publication sources are distinct from the validation and test publication sources. This gives rise to a cross-domain (cross-source) satire detection task. We employ two classification methods as baselines for our new data set, one based on low-level features (character n-grams) and one based on high-level features (average of CamemBERT word embeddings). As an additional contribution, we present an unsupervised domain adaptation method based on regarding the pairwise similarities (given by the dot product) between the training samples and the validation samples as features. By including these domain-specific features, we attain significant improvements for both character n-grams and CamemBERT embeddings.},
keywords = {Cross-Domain Evaluation, Satire Detection, Text Classification, Unsupervised Domain Adaptation},
pubstate = {published},
tppubtype = {conference}
}
Yann Duperis; Adrian-Gabriel Chifu; Bernard Espinasse; Sébastien Fournier; Arthur Kuehn
Vers un système de recommandation de profils experts dans l'industrie des procédés Conference
COnférence en Recherche d’Information et Applications, CORIA2021 Grenoble, France (virtuel), 2021.
Abstract | Links | BibTeX | Tags: Expert search, Job recommender system, Semantic web
@conference{Duperis2021,
title = {Vers un système de recommandation de profils experts dans l'industrie des procédés},
author = {Yann Duperis and Adrian-Gabriel Chifu and Bernard Espinasse and Sébastien Fournier and Arthur Kuehn},
url = {http://coria.asso-aria.org/2021/articles/long_12/main.pdf},
year = {2021},
date = {2021-04-15},
urldate = {2021-04-15},
booktitle = {COnférence en Recherche d’Information et Applications},
address = {Grenoble, France (virtuel)},
series = {CORIA2021},
abstract = {La dématérialisation des processus de recrutement n'a pas fait disparaître toutes les frictions inhérentes à cette activité. La recherche automatisée d'un candidat idéal se heurte toujours à la difficulté à modéliser correctement les besoins exprimés en langage naturel dans une offre d’emploi. Le recrutement d’experts, notamment, est particulièrement difficile. En effet, ces profils concernent une proportion réduite des recrutements et leur prise en charge informatisée nécessite une connaissance précise du secteur d’activité concerné. Dans cet article, nous proposons l’architecture d’un système de recommandation de profils experts dans l’industrie des procédés afin d’assister ce type de recrutements.},
keywords = {Expert search, Job recommender system, Semantic web},
pubstate = {published},
tppubtype = {conference}
}
Proceedings
Antoine Doucet; Adrian-Gabriel Chifu (Ed.)
COnférence en Recherche d'Informations et Applications - CORIA 2021, French Information Retrieval Conference, Grenoble, France, April 15, 2021 Proceedings
ARIA, 2021.
BibTeX | Tags:
@proceedings{DBLP:conf/coria/2021,
title = {COnférence en Recherche d'Informations et Applications - CORIA 2021, French Information Retrieval Conference, Grenoble, France, April 15, 2021},
editor = {Antoine Doucet and Adrian-Gabriel Chifu},
year = {2021},
date = {2021-04-15},
urldate = {2021-01-01},
publisher = {ARIA},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
2020
Conferences
Adrian Chifu; Josiane Mothe; Md Zia Ullah
Fair Exposure of Documents in Information Retrieval: a Community Detection Approach Conference
Joint Conference of the Information Retrieval Communities in Europe, CIRCLE2020 2020.
Abstract | Links | BibTeX | Tags: Document Communities, Document Network, Document Re-ranking, Fair Document Exposure, Information Retrieval, Information Systems
@conference{Chifu2020CIRCLE,
title = {Fair Exposure of Documents in Information Retrieval: a Community Detection Approach},
author = {Adrian Chifu and Josiane Mothe and Md Zia Ullah},
url = {https://www.irit.fr/CIRCLE/wp-content/uploads/2020/06/CIRCLE20_03.pdf},
year = {2020},
date = {2020-07-01},
booktitle = {Joint Conference of the Information Retrieval Communities in Europe},
series = {CIRCLE2020},
abstract = {While (mainly) designed to answer users’ needs, search engines and recommendation systems do not necessarily guarantee the exposure of the data they store and index while it can be essential for information providers. A recent research direction so called “fair” exposure of documents tackles this problem in information retrieval. It has mainly been cast into a re-ranking problem with constraints and optimization functions. This paper presents the first steps toward a new framework for fair document exposure. This framework is based on document linking and document com- munity detection; communities are used to rank the documents to be retrieved according to an information need. In addition to the first step of this new framework, we present its potential through both a toy example and a few illustrative examples from the 2019 TREC Fair Ranking Track data set.},
keywords = {Document Communities, Document Network, Document Re-ranking, Fair Document Exposure, Information Retrieval, Information Systems},
pubstate = {published},
tppubtype = {conference}
}