2019
Journal Articles

Ismaïl Badache; Sébastien Fournier; Adrian Chifu
Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ? Journal Article
In: Le Bulletin de l'Association Française pour l'Intelligence Artificielle (AFIA 2019), 2019.
Abstract | Links | BibTeX | Tags: Aspect Detection, Contradiction Intensity, Criteria Evaluation, Sentiment Analysis
@article{Badache2019AFIA,
title = {Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ?},
author = {Ismaïl Badache and Sébastien Fournier and Adrian Chifu},
url = {https://hal.archives-ouvertes.fr/hal-01872267/document},
year = {2019},
date = {2019-12-01},
journal = {Le Bulletin de l'Association Française pour l'Intelligence Artificielle (AFIA 2019)},
abstract = {Reviews on web resources (e.g. courses, movies) become increasingly exploited in text analysis tasks (e.g. opinion detection, controversy detection). This paper investigates contradiction intensity in reviews exploiting different features such as variation of ratings and variation of polarities around specific entities (e.g. aspects, topics). Firstly, aspects are identified according to the distributions of the emotional terms in the vicinity of the most frequent nouns in the reviews collection. Secondly, the polarity of each review segment containing an aspect is estimated. Only resources containing these aspects with opposite polarities are considered. Finally, some features are evaluated, using feature selection algorithms, to determine their impact on the effectiveness of contradiction intensity detection. The selected features are used to learn some state-of-the-art learning approaches. The experiments are conducted on the Massive Open Online Courses data set containing 2244 courses and their 73,873 reviews, collected from coursera.org. Results showed that variation of ratings, variation of polarities, and reviews quantity are the best predictors of contradiction intensity. Also, J48 was the most effective learning approach for this type of classification.},
keywords = {Aspect Detection, Contradiction Intensity, Criteria Evaluation, Sentiment Analysis},
pubstate = {published},
tppubtype = {article}
}
Conferences

Josiane Mothe; Léa Laporte; Adrian-Gabriel Chifu
Predicting Query Difficulty in IR: Impact of Difficulty Definition Conference
2019 11th International Conference on Knowledge and Systems Engineering, KSE2019 IEEE 2019.
Abstract | Links | BibTeX | Tags: Information Retrieval, Query Difficulty Prediction, Query Features
@conference{mothe2019predicting,
title = {Predicting Query Difficulty in IR: Impact of Difficulty Definition},
author = {Josiane Mothe and Léa Laporte and Adrian-Gabriel Chifu},
url = {https://www.irit.fr/publis/SIG/2019_KSE_MLC.pdf},
year = {2019},
date = {2019-10-24},
urldate = {2019-01-01},
booktitle = {2019 11th International Conference on Knowledge and Systems Engineering},
pages = {1--6},
organization = {IEEE},
series = {KSE2019},
abstract = {While it exists information on about any topic on the web, we know from information retrieval (IR) evaluation programs that search systems fail to answer to some queries in an effective manner. System failure is associated to query difficulty in the IR literature. However, there is no clear definition of query difficulty. This paper investigates several ways of defining query difficulty and analyses the impact of these definitions on query difficulty prediction results. Our experiments show that the most stable definition across collections is a threshold-based definition of query difficulty classes.},
keywords = {Information Retrieval, Query Difficulty Prediction, Query Features},
pubstate = {published},
tppubtype = {conference}
}

Adrian-Gabriel Chifu
The R2I_LIS Team Proposes Majority Vote for VarDial’s MRC Task Conference
Proceedings of the Sixth Workshop on NLP for Similar Languages, Varieties and Dialects, VARDIAL2019 2019.
Abstract | Links | BibTeX | Tags: Competition, Dialect Classification, Feature Engineering, Majority Vote
@conference{chifu2019r2i_lis,
title = {The R2I_LIS Team Proposes Majority Vote for VarDial’s MRC Task},
author = {Adrian-Gabriel Chifu},
url = {https://www.aclweb.org/anthology/W19-1414.pdf},
year = {2019},
date = {2019-06-01},
urldate = {2019-06-01},
booktitle = {Proceedings of the Sixth Workshop on NLP for Similar Languages, Varieties and Dialects},
pages = {138--143},
series = {VARDIAL2019},
abstract = {This article presents the model that generated the runs submitted by the R2I LIS team to the VarDial2019 evaluation campaign, more particularly, to the binary classification by dialect sub-task of the Moldavian vs. Romanian Cross-dialect Topic identification (MRC) task. The team proposed a majority vote-based model, between five supervised machine learning models, trained on forty manually-crafted features. One of the three submitted runs was ranked second at the binary classification sub-task, with a performance of 0.7963, in terms of macro-F1 measure. The other two runs were ranked third and fourth, respectively.},
keywords = {Competition, Dialect Classification, Feature Engineering, Majority Vote},
pubstate = {published},
tppubtype = {conference}
}

Bernard Espinasse; Sébastien Fournier; Adrian Chifu; Gaël Guibon; René Azcurra; Valentin Mace
On the Use of Dependencies in Relation Classification of Text with Deep Learning Conference
20th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing2019), CICLing2019 2019.
Abstract | Links | BibTeX | Tags: Compositional Word Embedding, Deep Learning, Dependencies, Relation Classification, Word Embedding
@conference{Espinasse2019,
title = {On the Use of Dependencies in Relation Classification of Text with Deep Learning},
author = {Bernard Espinasse and Sébastien Fournier and Adrian Chifu and Gaël Guibon and René Azcurra and Valentin Mace},
url = {https://hal.archives-ouvertes.fr/hal-02103919/document},
year = {2019},
date = {2019-04-07},
urldate = {2019-04-07},
booktitle = {20th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing2019)},
series = {CICLing2019},
abstract = {Deep Learning is more and more used in NLP tasks, such as in relation classification of texts. This paper assesses the impact of syntactic dependencies in this task at two levels. The first level concerns the generic Word Embedding (WE) as input of the classification model, the second level concerns the corpus whose relations have to be classified. In this paper, two classification models are studied, the first one is based on a CNN using a generic WE and does not take into account the dependencies of the corpus to be treated, and the second one is based on a compositional WE combining a generic WE with syntactical annotations of this corpus to classify. The impact of dependencies in relation classification is estimated using two different WE. The first one is essentially lexical and trained on the Wikipedia corpus in English, while the second one is also syntactical, trained on the same previously annotated corpus with syntactical dependencies. The two classification models are evaluated on the SemEval 2010 reference corpus using these two generic WE. The experiments show the importance of taking dependencies into account at different levels in the relation classification.},
keywords = {Compositional Word Embedding, Deep Learning, Dependencies, Relation Classification, Word Embedding},
pubstate = {published},
tppubtype = {conference}
}
2018
Journal Articles

Adrian-Gabriel Chifu; Florentina Hristea
Feature selection for spectral clustering: to help or not to help spectral clustering when performing sense discrimination for IR? Journal Article
In: Open Computer Science, vol. 8, no. 1, pp. 218–227, 2018.
Abstract | Links | BibTeX | Tags: Information Retrieval, Query Disambiguation, Spectral Clustering, Word Sense Discrimination
@article{chifu2018feature,
title = {Feature selection for spectral clustering: to help or not to help spectral clustering when performing sense discrimination for IR?},
author = {Adrian-Gabriel Chifu and Florentina Hristea},
url = {https://www.degruyter.com/view/journals/comp/8/1/article-p218.xml},
year = {2018},
date = {2018-12-01},
urldate = {2018-12-01},
journal = {Open Computer Science},
volume = {8},
number = {1},
pages = {218--227},
publisher = {Sciendo},
abstract = {Whether or not word sense disambiguation (WSD) can improve information retrieval (IR) results represents a topic that has been intensely debated over the years, with many inconclusive or contradictory conclusions. The most rarely used type of WSD for this task is the unsupervised one, although it has been proven to be beneficial at a large scale. Our study builds on existing research and tries to improve the most recent unsupervised method which is based on spectral clustering. It investigates the possible benefits of “helping” spectral clustering through feature selection when it performs sense discrimination for IR. Results obtained so far, involving large data collections, encourage us to point out the importance of feature selection even in the case of this advanced, state of the art clustering technique that is known for performing its own feature weighting. By suggesting an improvement of what we consider the most promising approach to usage of WSD in IR, and by commenting on its possible extensions, we state that WSD still holds a promise for IR and hope to stimulate continuation of this line of research, perhaps at an even more successful level.},
keywords = {Information Retrieval, Query Disambiguation, Spectral Clustering, Word Sense Discrimination},
pubstate = {published},
tppubtype = {article}
}
Conferences

Nathanaëla Andrianasolo; Adrian-Gabriel Chifu; Sébastien Fournier; Fidelia Ibekwe-SanJuan
Challenges to knowledge organization in the era of social media. The case of social controversies Conference
15th International ISKO Conference, ISKO2018 2018.
Abstract | Links | BibTeX | Tags: Controversy Mediation, Post-truth, Social Capital, Social Media, Societal Challenges to Knowledge Organization, Twitter
@conference{Andrianasolo2018,
title = {Challenges to knowledge organization in the era of social media. The case of social controversies},
author = {Nathanaëla Andrianasolo and Adrian-Gabriel Chifu and Sébastien Fournier and Fidelia Ibekwe-SanJuan},
url = {https://hal.archives-ouvertes.fr/hal-01889180/document},
year = {2018},
date = {2018-07-09},
urldate = {2018-07-09},
booktitle = {15th International ISKO Conference},
series = {ISKO2018},
abstract = {In this paper, we look at how social media, in particular Twitter, are used to trigger, propagate and regulate opinions, and social controversies. Social media platforms are displacing the mainstream media and traditional sources of knowledge by facilitating the propagation of ideologies and causes championed by different groups of people. This results in pressures being brought to bear on institutions in the real world which are forced to make hasty decisions based on social media campaigns. The new forms of activism and the public arena enabled by social media platforms have also facilitated the propagation of so-called “post-truth” and “alternative facts” that obfuscate the traditional processes of knowledge elaboration which took decades to arrive at. This poses serious challenges for Knowledge Organization systems (KOs) that the KO community needs to find ways to address.},
keywords = {Controversy Mediation, Post-truth, Social Capital, Social Media, Societal Challenges to Knowledge Organization, Twitter},
pubstate = {published},
tppubtype = {conference}
}

Adrian-Gabriel Chifu; Léa Laporte; Josiane Mothe; Md Zia Ullah
Query performance prediction focused on summarized letor features Conference
The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, SIGIR2018 2018.
Abstract | Links | BibTeX | Tags: Letor Features, Post Retrieval Features, Query Difficulty Prediction, Query Features, Query Performance Prediction
@conference{chifu2018query,
title = {Query performance prediction focused on summarized letor features},
author = {Adrian-Gabriel Chifu and Léa Laporte and Josiane Mothe and Md Zia Ullah},
url = {ftp://ftp.irit.fr/IRIT/SIG/2018_SIGIR_CLMU.pdf},
year = {2018},
date = {2018-07-01},
urldate = {2018-01-01},
booktitle = {The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval},
pages = {1177--1180},
series = {SIGIR2018},
abstract = {Query performance prediction (QPP) aims at automatically estimating the information retrieval system effectiveness for any user's query. Previous work has investigated several types of pre- and post-retrieval query performance predictors; the latter has been shown to be more effective. In this paper we investigate the use of features that were initially defined for learning to rank in the task of QPP. While these features have been shown to be useful for learning to rank documents, they have never been studied as query performance predictors. We developed more than 350 variants of them based on summary functions. Conducting experiments on four TREC standard collections, we found that Letor-based features appear to be better QPP than predictors from the literature. Moreover, we show that combining the best Letor features outperforms the state of the art query performance predictors. This is the first study that considers such an amount and variety of Letor features for QPP and that demonstrates they are appropriate for this task.},
keywords = {Letor Features, Post Retrieval Features, Query Difficulty Prediction, Query Features, Query Performance Prediction},
pubstate = {published},
tppubtype = {conference}
}

Ismail Badache; Sébastien Fournier; Adrian-Gabriel Chifu
Predicting Contradiction Intensity: Low, Strong or Very Strong? Conference
The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, SIGIR2018 2018.
Abstract | Links | BibTeX | Tags: Aspect, Contradiction Intensity, Feature Evaluation, Sentiment
@conference{badache2018predicting,
title = {Predicting Contradiction Intensity: Low, Strong or Very Strong?},
author = {Ismail Badache and Sébastien Fournier and Adrian-Gabriel Chifu},
url = {https://hal.archives-ouvertes.fr/hal-01796060/document},
year = {2018},
date = {2018-07-01},
urldate = {2018-01-01},
booktitle = {The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval},
pages = {1125--1128},
series = {SIGIR2018},
abstract = {Reviews on web resources (e.g. courses, movies) become increasingly exploited in text analysis tasks (e.g. opinion detection, controversy detection). This paper investigates contradiction intensity in reviews exploiting different features such as variation of ratings and variation of polarities around specific entities (e.g. aspects, topics). Firstly, aspects are identified according to the distributions of the emotional terms in the vicinity of the most frequent nouns in the reviews collection. Secondly, the polarity of each review segment containing an aspect is estimated. Only resources containing these aspects with opposite polarities are considered. Finally, some features are evaluated, using feature selection algorithms, to determine their impact on the effectiveness of contradiction intensity detection. The selected features are used to learn some state-of-the-art learning approaches. The experiments are conducted on the Massive Open Online Courses data set containing 2244 courses and their 73,873 reviews, collected from coursera.org. Results showed that variation of ratings, variation of polarities, and reviews quantity are the best predictors of contradiction intensity. Also, J48 was the most effective learning approach for this type of classification.https://hal.archives-ouvertes.fr/hal-01796060/document},
keywords = {Aspect, Contradiction Intensity, Feature Evaluation, Sentiment},
pubstate = {published},
tppubtype = {conference}
}

Ismaïl Badache; Sébastien Fournier; Adrian Chifu
Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ? Conference
29es journées francophones d'Ingénierie des Connaissances, IC2018 Nancy, France, 2018, (2nd Best Paper).
Abstract | Links | BibTeX | Tags: Analyse de sentiments, Détection d'aspects, Evaluation des critères, Intensité de contradiction
@conference{Badache2018,
title = {Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ?},
author = {Ismaïl Badache and Sébastien Fournier and Adrian Chifu},
url = {https://hal.archives-ouvertes.fr/hal-01839546/document},
year = {2018},
date = {2018-07-01},
urldate = {2018-07-01},
booktitle = {29es journées francophones d'Ingénierie des Connaissances},
pages = {55-69},
address = {Nancy, France},
series = {IC2018},
abstract = {Les commentaires sur des ressources Web (ex. : des cours, des films) deviennent de plus en plus exploitées dans des tâches d’analyse de texte (ex. détection d’opinion, détection de controverses). Cet article étudie l’intensité de contradiction dans les commentaires en exploitant différents critères tels que la variation des notations et la variation des polarités autour d’entités spécifiques (ex. aspects, sujets). Premièrement, les aspects sont identifiés en fonction des distributions des termes émotionnels à proximité des noms les plus fréquents dans la collection des commentaires. Deuxièmement, la polarité est estimée pour chaque segment de commentaire contenant un aspect. Seules les ressources ayant des commentaires contenant des aspects avec des polarités opposées sont prises en compte. Enfin, les critères sont évalués, en utilisant des algorithmes de sélection d’attributs, pour déterminer leur impact sur l’efficacité de la détection de l’intensité des contradictions. Les critères sélectionnés sont ensuite introduits dans des modèles d’apprentissage pour prédire l’intensité de contradiction. L’évaluation expérimentale est menée sur une collection contenant 2244 cours et leurs 73873 commentaires, collectés à partir de coursera.org. Les résultats montrent que la variation des notations, la variation des polarités et la quantité de commentaires sont les meilleurs prédicteurs de l’intensité de contradiction. En outre, J48 est l’approche d’apprentissage la plus efficace pour cette tâche.},
note = {2nd Best Paper},
keywords = {Analyse de sentiments, Détection d'aspects, Evaluation des critères, Intensité de contradiction},
pubstate = {published},
tppubtype = {conference}
}

Ismail Badache; Sébastien Fournier; Adrian-Gabriel Chifu
Contradiction in Reviews: is it Strong or Low? Conference
40th European Conference on Information Retrieval, ECIR 2018-BroDyn: Workshop on Analysis of Broad Dynamic Topics over Social Media, ECIR2018 - BroDyn 2018.
Abstract | Links | BibTeX | Tags: Aspect Detection, Contradiction Intensity, Sentiment Analysis
@conference{badache2018contradiction,
title = {Contradiction in Reviews: is it Strong or Low?},
author = {Ismail Badache and Sébastien Fournier and Adrian-Gabriel Chifu},
url = {http://ceur-ws.org/Vol-2078/paper1.pdf},
year = {2018},
date = {2018-03-01},
urldate = {2018-01-01},
booktitle = {40th European Conference on Information Retrieval, ECIR 2018-BroDyn: Workshop on Analysis of Broad Dynamic Topics over Social Media},
series = {ECIR2018 - BroDyn},
abstract = {Analysis of opinions (reviews) generated by users becomes increasingly exploited by a variety of applications. It allows to follow the evolution of the opinions or to carry out investigations on web resource (e.g. courses, movies, products). The detection of contradictory opinions is an important task to evaluate the latter. This paper focuses on the problem of detecting and estimating contradiction intensity based on the sentiment analysis around specific aspects of a resource. Firstly, certain aspects are identified, according to the distributions of the emotional terms in the vicinity of the most frequent names in the whole of the reviews. Secondly, the polarity of each review segment containing an aspect is estimated using the state-of-the-art approach SentiNeuron. Then, only the resources containing these aspects with opposite polarities (positive, negative) are considered. Thirdly, a measure of the intensity of the contradiction is introduced. It is based on the joint dispersion of the polarity and the rating of the reviews containing the aspects within each resource. The evaluation of the proposed approach is conducted on the Massive Open Online Courses collection containing 2244 courses and their 73,873 reviews, collected from Coursera. The results revealed the effectiveness of the proposed approach to detect and quantify contradictions.},
keywords = {Aspect Detection, Contradiction Intensity, Sentiment Analysis},
pubstate = {published},
tppubtype = {conference}
}