2019
Conferences
Josiane Mothe; Léa Laporte; Adrian-Gabriel Chifu
Predicting Query Difficulty in IR: Impact of Difficulty Definition Conference
2019 11th International Conference on Knowledge and Systems Engineering, KSE2019 IEEE 2019.
Abstract | Links | BibTeX | Tags: Information Retrieval, Query Difficulty Prediction, Query Features
@conference{mothe2019predicting,
title = {Predicting Query Difficulty in IR: Impact of Difficulty Definition},
author = {Josiane Mothe and Léa Laporte and Adrian-Gabriel Chifu},
url = {https://www.irit.fr/publis/SIG/2019_KSE_MLC.pdf},
year = {2019},
date = {2019-10-24},
urldate = {2019-01-01},
booktitle = {2019 11th International Conference on Knowledge and Systems Engineering},
pages = {1--6},
organization = {IEEE},
series = {KSE2019},
abstract = {While it exists information on about any topic on the web, we know from information retrieval (IR) evaluation programs that search systems fail to answer to some queries in an effective manner. System failure is associated to query difficulty in the IR literature. However, there is no clear definition of query difficulty. This paper investigates several ways of defining query difficulty and analyses the impact of these definitions on query difficulty prediction results. Our experiments show that the most stable definition across collections is a threshold-based definition of query difficulty classes.},
keywords = {Information Retrieval, Query Difficulty Prediction, Query Features},
pubstate = {published},
tppubtype = {conference}
}
2018
Conferences
Adrian-Gabriel Chifu; Léa Laporte; Josiane Mothe; Md Zia Ullah
Query performance prediction focused on summarized letor features Conference
The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, SIGIR2018 2018.
Abstract | Links | BibTeX | Tags: Letor Features, Post Retrieval Features, Query Difficulty Prediction, Query Features, Query Performance Prediction
@conference{chifu2018query,
title = {Query performance prediction focused on summarized letor features},
author = {Adrian-Gabriel Chifu and Léa Laporte and Josiane Mothe and Md Zia Ullah},
url = {ftp://ftp.irit.fr/IRIT/SIG/2018_SIGIR_CLMU.pdf},
year = {2018},
date = {2018-07-01},
urldate = {2018-01-01},
booktitle = {The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval},
pages = {1177--1180},
series = {SIGIR2018},
abstract = {Query performance prediction (QPP) aims at automatically estimating the information retrieval system effectiveness for any user's query. Previous work has investigated several types of pre- and post-retrieval query performance predictors; the latter has been shown to be more effective. In this paper we investigate the use of features that were initially defined for learning to rank in the task of QPP. While these features have been shown to be useful for learning to rank documents, they have never been studied as query performance predictors. We developed more than 350 variants of them based on summary functions. Conducting experiments on four TREC standard collections, we found that Letor-based features appear to be better QPP than predictors from the literature. Moreover, we show that combining the best Letor features outperforms the state of the art query performance predictors. This is the first study that considers such an amount and variety of Letor features for QPP and that demonstrates they are appropriate for this task.},
keywords = {Letor Features, Post Retrieval Features, Query Difficulty Prediction, Query Features, Query Performance Prediction},
pubstate = {published},
tppubtype = {conference}
}
2015
Conferences
Adrian Chifu; Léa Laporte; Josiane Mothe
La prédiction efficace de la difficulté des requêtes : une tâche impossible ? Conference
Conférence francophone en Recherche d'Information et Applications (CORIA 2015), Paris, 2015.
Abstract | Links | BibTeX | Tags: Data Mining, Evaluation, Information Retrieval, Query Difficulty Prediction
@conference{ChifuCORIA2015,
title = {La prédiction efficace de la difficulté des requêtes : une tâche impossible ?},
author = {Adrian Chifu and Léa Laporte and Josiane Mothe},
url = {https://oatao.univ-toulouse.fr/15263/1/chifu_15263.pdf},
year = {2015},
date = {2015-03-18},
booktitle = {Conférence francophone en Recherche d'Information et Applications (CORIA 2015), Paris},
abstract = {Résumé :
Les moteurs de recherche d’information (RI) retrouvent des réponses quelle que soit la requête, mais certaines requêtes sont difficiles (le système n’obtient pas de bonne performance en termes de mesure de RI). Pour les requêtes difficiles, des traitements ad-hoc doivent être appliqués. Prédire qu’une requête est difficile est donc crucial et différents prédicteurs ont été proposés. Dans cet articlenous étudions la variété de l’information captée par les prédicteurs existants et donc leur non redondance. Par ailleurs, nous montrons que les corrélations entre les prédicteurs et les performance des systèmes donnent peu d’espoir sur la capacité de ces prédicteurs à être réellement efficaces. Enfin, nous étudions la capacité des prédicteurs à prédire les classes de difficulté des requêtes en nous appuyant sur une variété de méthodes exploratoires et d’apprentissage. Nous montrons que malgré les (faibles) corrélations observées avec les mesures de performance, les prédicteurs actuels conduisent à des performances de prédiction variables et sont donc difficilement utilisables dans une application concrète de RI.
Abstract:
Search engines found answers whatever the user query is, but some queries are more difficult than others for the system. For difficult queries, adhoc treatments must be applied. Predicting query difficulty is crucial and different predictors have been proposed. In this paper, we revisit these predictors. First we check the non statistical redundancy of predictors. Then, we show that the correlation between the values of predictors and system performance gives little hope on the ability of these predictors to be effective. Finally, we study the ability of predictors to predict the classes of difficulty by relying on a variety of exploratory and learning methods. We show that despite the (low) correlation with performance measures, current predictors are not robust enough to be used in practical IR applications.},
keywords = {Data Mining, Evaluation, Information Retrieval, Query Difficulty Prediction},
pubstate = {published},
tppubtype = {conference}
}
Les moteurs de recherche d’information (RI) retrouvent des réponses quelle que soit la requête, mais certaines requêtes sont difficiles (le système n’obtient pas de bonne performance en termes de mesure de RI). Pour les requêtes difficiles, des traitements ad-hoc doivent être appliqués. Prédire qu’une requête est difficile est donc crucial et différents prédicteurs ont été proposés. Dans cet articlenous étudions la variété de l’information captée par les prédicteurs existants et donc leur non redondance. Par ailleurs, nous montrons que les corrélations entre les prédicteurs et les performance des systèmes donnent peu d’espoir sur la capacité de ces prédicteurs à être réellement efficaces. Enfin, nous étudions la capacité des prédicteurs à prédire les classes de difficulté des requêtes en nous appuyant sur une variété de méthodes exploratoires et d’apprentissage. Nous montrons que malgré les (faibles) corrélations observées avec les mesures de performance, les prédicteurs actuels conduisent à des performances de prédiction variables et sont donc difficilement utilisables dans une application concrète de RI.
Abstract:
Search engines found answers whatever the user query is, but some queries are more difficult than others for the system. For difficult queries, adhoc treatments must be applied. Predicting query difficulty is crucial and different predictors have been proposed. In this paper, we revisit these predictors. First we check the non statistical redundancy of predictors. Then, we show that the correlation between the values of predictors and system performance gives little hope on the ability of these predictors to be effective. Finally, we study the ability of predictors to predict the classes of difficulty by relying on a variety of exploratory and learning methods. We show that despite the (low) correlation with performance measures, current predictors are not robust enough to be used in practical IR applications.