2020
Conferences
Francisco Rodrigues; Rinaldo Lima; William Domingues; Robson Fidalgo; Adrian Chifu; Bernard Espinasse; Sébastien Fournier
DeepNLPF: A Framework for Integrating Third Party NLP Tools Conference
Proceedings of the 12th Language Resources and Evaluation Conference, LREC2020 2020.
Abstract | Links | BibTeX | Tags: Framework, Natural Language Processing, NLP Tools Integration
@conference{rodrigues2020deepnlpf,
title = {DeepNLPF: A Framework for Integrating Third Party NLP Tools},
author = {Francisco Rodrigues and Rinaldo Lima and William Domingues and Robson Fidalgo and Adrian Chifu and Bernard Espinasse and Sébastien Fournier},
url = {http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.895.pdf},
year = {2020},
date = {2020-05-11},
urldate = {2020-01-01},
booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference},
pages = {7244--7251},
series = {LREC2020},
abstract = {Natural Language Processing (NLP) of textual data is usually broken down into a sequence of several subtasks, where the output of one the subtasks becomes the input to the following one, which constitutes an NLP pipeline. Many third-party NLP tools are currently available, each performing distinct NLP subtasks. However, it is difficult to integrate several NLP toolkits into a pipeline due to many problems, including different input/output representations or formats, distinct programming languages, and tokenization issues. This paper presents DeepNLPF, a framework that enables easy integration of third-party NLP tools, allowing the user to preprocess natural language texts at lexical, syntactic, and semantic levels. The proposed framework also provides an API for complete pipeline customization including the definition of input/output formats, integration plugin management, transparent multiprocessing execution strategies, corpus-level statistics, and database persistence. Furthermore, the DeepNLPF user-friendly GUI allows its use even by a non-expert NLP user. We conducted runtime performance analysis showing that DeepNLPF not only easily integrates existent NLP toolkits but also reduces significant runtime processing compared to executing the same NLP pipeline in a sequential manner.},
keywords = {Framework, Natural Language Processing, NLP Tools Integration},
pubstate = {published},
tppubtype = {conference}
}
2019
Journal Articles
Ismaïl Badache; Sébastien Fournier; Adrian Chifu
Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ? Journal Article
In: Le Bulletin de l'Association Française pour l'Intelligence Artificielle (AFIA 2019), 2019.
Abstract | Links | BibTeX | Tags: Aspect Detection, Contradiction Intensity, Criteria Evaluation, Sentiment Analysis
@article{Badache2019AFIA,
title = {Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ?},
author = {Ismaïl Badache and Sébastien Fournier and Adrian Chifu},
url = {https://hal.archives-ouvertes.fr/hal-01872267/document},
year = {2019},
date = {2019-12-01},
journal = {Le Bulletin de l'Association Française pour l'Intelligence Artificielle (AFIA 2019)},
abstract = {Reviews on web resources (e.g. courses, movies) become increasingly exploited in text analysis tasks (e.g. opinion detection, controversy detection). This paper investigates contradiction intensity in reviews exploiting different features such as variation of ratings and variation of polarities around specific entities (e.g. aspects, topics). Firstly, aspects are identified according to the distributions of the emotional terms in the vicinity of the most frequent nouns in the reviews collection. Secondly, the polarity of each review segment containing an aspect is estimated. Only resources containing these aspects with opposite polarities are considered. Finally, some features are evaluated, using feature selection algorithms, to determine their impact on the effectiveness of contradiction intensity detection. The selected features are used to learn some state-of-the-art learning approaches. The experiments are conducted on the Massive Open Online Courses data set containing 2244 courses and their 73,873 reviews, collected from coursera.org. Results showed that variation of ratings, variation of polarities, and reviews quantity are the best predictors of contradiction intensity. Also, J48 was the most effective learning approach for this type of classification.},
keywords = {Aspect Detection, Contradiction Intensity, Criteria Evaluation, Sentiment Analysis},
pubstate = {published},
tppubtype = {article}
}
Conferences
Josiane Mothe; Léa Laporte; Adrian-Gabriel Chifu
Predicting Query Difficulty in IR: Impact of Difficulty Definition Conference
2019 11th International Conference on Knowledge and Systems Engineering, KSE2019 IEEE 2019.
Abstract | Links | BibTeX | Tags: Information Retrieval, Query Difficulty Prediction, Query Features
@conference{mothe2019predicting,
title = {Predicting Query Difficulty in IR: Impact of Difficulty Definition},
author = {Josiane Mothe and Léa Laporte and Adrian-Gabriel Chifu},
url = {https://www.irit.fr/publis/SIG/2019_KSE_MLC.pdf},
year = {2019},
date = {2019-10-24},
urldate = {2019-01-01},
booktitle = {2019 11th International Conference on Knowledge and Systems Engineering},
pages = {1--6},
organization = {IEEE},
series = {KSE2019},
abstract = {While it exists information on about any topic on the web, we know from information retrieval (IR) evaluation programs that search systems fail to answer to some queries in an effective manner. System failure is associated to query difficulty in the IR literature. However, there is no clear definition of query difficulty. This paper investigates several ways of defining query difficulty and analyses the impact of these definitions on query difficulty prediction results. Our experiments show that the most stable definition across collections is a threshold-based definition of query difficulty classes.},
keywords = {Information Retrieval, Query Difficulty Prediction, Query Features},
pubstate = {published},
tppubtype = {conference}
}
Adrian-Gabriel Chifu
The R2I_LIS Team Proposes Majority Vote for VarDial’s MRC Task Conference
Proceedings of the Sixth Workshop on NLP for Similar Languages, Varieties and Dialects, VARDIAL2019 2019.
Abstract | Links | BibTeX | Tags: Competition, Dialect Classification, Feature Engineering, Majority Vote
@conference{chifu2019r2i_lis,
title = {The R2I_LIS Team Proposes Majority Vote for VarDial’s MRC Task},
author = {Adrian-Gabriel Chifu},
url = {https://www.aclweb.org/anthology/W19-1414.pdf},
year = {2019},
date = {2019-06-01},
urldate = {2019-06-01},
booktitle = {Proceedings of the Sixth Workshop on NLP for Similar Languages, Varieties and Dialects},
pages = {138--143},
series = {VARDIAL2019},
abstract = {This article presents the model that generated the runs submitted by the R2I LIS team to the VarDial2019 evaluation campaign, more particularly, to the binary classification by dialect sub-task of the Moldavian vs. Romanian Cross-dialect Topic identification (MRC) task. The team proposed a majority vote-based model, between five supervised machine learning models, trained on forty manually-crafted features. One of the three submitted runs was ranked second at the binary classification sub-task, with a performance of 0.7963, in terms of macro-F1 measure. The other two runs were ranked third and fourth, respectively.},
keywords = {Competition, Dialect Classification, Feature Engineering, Majority Vote},
pubstate = {published},
tppubtype = {conference}
}
Bernard Espinasse; Sébastien Fournier; Adrian Chifu; Gaël Guibon; René Azcurra; Valentin Mace
On the Use of Dependencies in Relation Classification of Text with Deep Learning Conference
20th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing2019), CICLing2019 2019.
Abstract | Links | BibTeX | Tags: Compositional Word Embedding, Deep Learning, Dependencies, Relation Classification, Word Embedding
@conference{Espinasse2019,
title = {On the Use of Dependencies in Relation Classification of Text with Deep Learning},
author = {Bernard Espinasse and Sébastien Fournier and Adrian Chifu and Gaël Guibon and René Azcurra and Valentin Mace},
url = {https://hal.archives-ouvertes.fr/hal-02103919/document},
year = {2019},
date = {2019-04-07},
urldate = {2019-04-07},
booktitle = {20th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing2019)},
series = {CICLing2019},
abstract = {Deep Learning is more and more used in NLP tasks, such as in relation classification of texts. This paper assesses the impact of syntactic dependencies in this task at two levels. The first level concerns the generic Word Embedding (WE) as input of the classification model, the second level concerns the corpus whose relations have to be classified. In this paper, two classification models are studied, the first one is based on a CNN using a generic WE and does not take into account the dependencies of the corpus to be treated, and the second one is based on a compositional WE combining a generic WE with syntactical annotations of this corpus to classify. The impact of dependencies in relation classification is estimated using two different WE. The first one is essentially lexical and trained on the Wikipedia corpus in English, while the second one is also syntactical, trained on the same previously annotated corpus with syntactical dependencies. The two classification models are evaluated on the SemEval 2010 reference corpus using these two generic WE. The experiments show the importance of taking dependencies into account at different levels in the relation classification.},
keywords = {Compositional Word Embedding, Deep Learning, Dependencies, Relation Classification, Word Embedding},
pubstate = {published},
tppubtype = {conference}
}
2018
Journal Articles
Adrian-Gabriel Chifu; Florentina Hristea
Feature selection for spectral clustering: to help or not to help spectral clustering when performing sense discrimination for IR? Journal Article
In: Open Computer Science, vol. 8, no. 1, pp. 218–227, 2018.
Abstract | Links | BibTeX | Tags: Information Retrieval, Query Disambiguation, Spectral Clustering, Word Sense Discrimination
@article{chifu2018feature,
title = {Feature selection for spectral clustering: to help or not to help spectral clustering when performing sense discrimination for IR?},
author = {Adrian-Gabriel Chifu and Florentina Hristea},
url = {https://www.degruyter.com/view/journals/comp/8/1/article-p218.xml},
year = {2018},
date = {2018-12-01},
urldate = {2018-12-01},
journal = {Open Computer Science},
volume = {8},
number = {1},
pages = {218--227},
publisher = {Sciendo},
abstract = {Whether or not word sense disambiguation (WSD) can improve information retrieval (IR) results represents a topic that has been intensely debated over the years, with many inconclusive or contradictory conclusions. The most rarely used type of WSD for this task is the unsupervised one, although it has been proven to be beneficial at a large scale. Our study builds on existing research and tries to improve the most recent unsupervised method which is based on spectral clustering. It investigates the possible benefits of “helping” spectral clustering through feature selection when it performs sense discrimination for IR. Results obtained so far, involving large data collections, encourage us to point out the importance of feature selection even in the case of this advanced, state of the art clustering technique that is known for performing its own feature weighting. By suggesting an improvement of what we consider the most promising approach to usage of WSD in IR, and by commenting on its possible extensions, we state that WSD still holds a promise for IR and hope to stimulate continuation of this line of research, perhaps at an even more successful level.},
keywords = {Information Retrieval, Query Disambiguation, Spectral Clustering, Word Sense Discrimination},
pubstate = {published},
tppubtype = {article}
}
Conferences
Nathanaëla Andrianasolo; Adrian-Gabriel Chifu; Sébastien Fournier; Fidelia Ibekwe-SanJuan
Challenges to knowledge organization in the era of social media. The case of social controversies Conference
15th International ISKO Conference, ISKO2018 2018.
Abstract | Links | BibTeX | Tags: Controversy Mediation, Post-truth, Social Capital, Social Media, Societal Challenges to Knowledge Organization, Twitter
@conference{Andrianasolo2018,
title = {Challenges to knowledge organization in the era of social media. The case of social controversies},
author = {Nathanaëla Andrianasolo and Adrian-Gabriel Chifu and Sébastien Fournier and Fidelia Ibekwe-SanJuan},
url = {https://hal.archives-ouvertes.fr/hal-01889180/document},
year = {2018},
date = {2018-07-09},
urldate = {2018-07-09},
booktitle = {15th International ISKO Conference},
series = {ISKO2018},
abstract = {In this paper, we look at how social media, in particular Twitter, are used to trigger, propagate and regulate opinions, and social controversies. Social media platforms are displacing the mainstream media and traditional sources of knowledge by facilitating the propagation of ideologies and causes championed by different groups of people. This results in pressures being brought to bear on institutions in the real world which are forced to make hasty decisions based on social media campaigns. The new forms of activism and the public arena enabled by social media platforms have also facilitated the propagation of so-called “post-truth” and “alternative facts” that obfuscate the traditional processes of knowledge elaboration which took decades to arrive at. This poses serious challenges for Knowledge Organization systems (KOs) that the KO community needs to find ways to address.},
keywords = {Controversy Mediation, Post-truth, Social Capital, Social Media, Societal Challenges to Knowledge Organization, Twitter},
pubstate = {published},
tppubtype = {conference}
}
Adrian-Gabriel Chifu; Léa Laporte; Josiane Mothe; Md Zia Ullah
Query performance prediction focused on summarized letor features Conference
The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, SIGIR2018 2018.
Abstract | Links | BibTeX | Tags: Letor Features, Post Retrieval Features, Query Difficulty Prediction, Query Features, Query Performance Prediction
@conference{chifu2018query,
title = {Query performance prediction focused on summarized letor features},
author = {Adrian-Gabriel Chifu and Léa Laporte and Josiane Mothe and Md Zia Ullah},
url = {ftp://ftp.irit.fr/IRIT/SIG/2018_SIGIR_CLMU.pdf},
year = {2018},
date = {2018-07-01},
urldate = {2018-01-01},
booktitle = {The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval},
pages = {1177--1180},
series = {SIGIR2018},
abstract = {Query performance prediction (QPP) aims at automatically estimating the information retrieval system effectiveness for any user's query. Previous work has investigated several types of pre- and post-retrieval query performance predictors; the latter has been shown to be more effective. In this paper we investigate the use of features that were initially defined for learning to rank in the task of QPP. While these features have been shown to be useful for learning to rank documents, they have never been studied as query performance predictors. We developed more than 350 variants of them based on summary functions. Conducting experiments on four TREC standard collections, we found that Letor-based features appear to be better QPP than predictors from the literature. Moreover, we show that combining the best Letor features outperforms the state of the art query performance predictors. This is the first study that considers such an amount and variety of Letor features for QPP and that demonstrates they are appropriate for this task.},
keywords = {Letor Features, Post Retrieval Features, Query Difficulty Prediction, Query Features, Query Performance Prediction},
pubstate = {published},
tppubtype = {conference}
}
Ismail Badache; Sébastien Fournier; Adrian-Gabriel Chifu
Predicting Contradiction Intensity: Low, Strong or Very Strong? Conference
The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, SIGIR2018 2018.
Abstract | Links | BibTeX | Tags: Aspect, Contradiction Intensity, Feature Evaluation, Sentiment
@conference{badache2018predicting,
title = {Predicting Contradiction Intensity: Low, Strong or Very Strong?},
author = {Ismail Badache and Sébastien Fournier and Adrian-Gabriel Chifu},
url = {https://hal.archives-ouvertes.fr/hal-01796060/document},
year = {2018},
date = {2018-07-01},
urldate = {2018-01-01},
booktitle = {The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval},
pages = {1125--1128},
series = {SIGIR2018},
abstract = {Reviews on web resources (e.g. courses, movies) become increasingly exploited in text analysis tasks (e.g. opinion detection, controversy detection). This paper investigates contradiction intensity in reviews exploiting different features such as variation of ratings and variation of polarities around specific entities (e.g. aspects, topics). Firstly, aspects are identified according to the distributions of the emotional terms in the vicinity of the most frequent nouns in the reviews collection. Secondly, the polarity of each review segment containing an aspect is estimated. Only resources containing these aspects with opposite polarities are considered. Finally, some features are evaluated, using feature selection algorithms, to determine their impact on the effectiveness of contradiction intensity detection. The selected features are used to learn some state-of-the-art learning approaches. The experiments are conducted on the Massive Open Online Courses data set containing 2244 courses and their 73,873 reviews, collected from coursera.org. Results showed that variation of ratings, variation of polarities, and reviews quantity are the best predictors of contradiction intensity. Also, J48 was the most effective learning approach for this type of classification.https://hal.archives-ouvertes.fr/hal-01796060/document},
keywords = {Aspect, Contradiction Intensity, Feature Evaluation, Sentiment},
pubstate = {published},
tppubtype = {conference}
}
Ismaïl Badache; Sébastien Fournier; Adrian Chifu
Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ? Conference
29es journées francophones d'Ingénierie des Connaissances, IC2018 Nancy, France, 2018, (2nd Best Paper).
Abstract | Links | BibTeX | Tags: Analyse de sentiments, Détection d'aspects, Evaluation des critères, Intensité de contradiction
@conference{Badache2018,
title = {Prédire l'intensité de contradiction dans les commentaires : faible, forte ou très forte ?},
author = {Ismaïl Badache and Sébastien Fournier and Adrian Chifu},
url = {https://hal.archives-ouvertes.fr/hal-01839546/document},
year = {2018},
date = {2018-07-01},
urldate = {2018-07-01},
booktitle = {29es journées francophones d'Ingénierie des Connaissances},
pages = {55-69},
address = {Nancy, France},
series = {IC2018},
abstract = {Les commentaires sur des ressources Web (ex. : des cours, des films) deviennent de plus en plus exploitées dans des tâches d’analyse de texte (ex. détection d’opinion, détection de controverses). Cet article étudie l’intensité de contradiction dans les commentaires en exploitant différents critères tels que la variation des notations et la variation des polarités autour d’entités spécifiques (ex. aspects, sujets). Premièrement, les aspects sont identifiés en fonction des distributions des termes émotionnels à proximité des noms les plus fréquents dans la collection des commentaires. Deuxièmement, la polarité est estimée pour chaque segment de commentaire contenant un aspect. Seules les ressources ayant des commentaires contenant des aspects avec des polarités opposées sont prises en compte. Enfin, les critères sont évalués, en utilisant des algorithmes de sélection d’attributs, pour déterminer leur impact sur l’efficacité de la détection de l’intensité des contradictions. Les critères sélectionnés sont ensuite introduits dans des modèles d’apprentissage pour prédire l’intensité de contradiction. L’évaluation expérimentale est menée sur une collection contenant 2244 cours et leurs 73873 commentaires, collectés à partir de coursera.org. Les résultats montrent que la variation des notations, la variation des polarités et la quantité de commentaires sont les meilleurs prédicteurs de l’intensité de contradiction. En outre, J48 est l’approche d’apprentissage la plus efficace pour cette tâche.},
note = {2nd Best Paper},
keywords = {Analyse de sentiments, Détection d'aspects, Evaluation des critères, Intensité de contradiction},
pubstate = {published},
tppubtype = {conference}
}