2015
Journal Articles
Julie Ayter; Adrian Chifu; Sébastien Déjean; Cecile Desclaux; Josiane Mothe
Statistical analysis to establish the importance of information retrieval parameters Journal Article
In: Journal of Universal Computer Science, vol. 21, no. 13, pp. pp–1767, 2015.
Abstract | Links | BibTeX | Tags: Information Retrieval, IR System Parameter, Query Clustering, Query Difficulty, Random Forest
@article{ayter2015statistical,
title = {Statistical analysis to establish the importance of information retrieval parameters},
author = {Julie Ayter and Adrian Chifu and Sébastien Déjean and Cecile Desclaux and Josiane Mothe},
url = {https://hal.archives-ouvertes.fr/hal-01592043/document},
year = {2015},
date = {2015-12-01},
urldate = {2015-12-01},
journal = {Journal of Universal Computer Science},
volume = {21},
number = {13},
pages = {pp--1767},
abstract = {Search engines are based on models to index documents, match queries and documents and rank documents. Research in Information Retrieval (IR) aims at defining these models and their parameters in order to optimize the results. Using benchmark collections, it has been shown that there is not a best system configura- tion that works for any query, but rather that performance varies from one query to another. It would be interesting if a meta-system could decide which system config- uration should process a new query by learning from the context of previousqueries. This paper reports a deep analysis considering more than 80,000 search engine config- urations applied to 100 queries and the corresponding performance. The goal of the analysis is to identify which configuration responds best to a certain type of query. We considered two approaches to define query types: one is post-evaluation, based on query clustering according to the performance measured with Average Precision, while the second approach is pre-evaluation, using query features (including query difficulty predictors) to cluster queries. Globally, we identified two parameters that should be optimized: retrieving model and TrecQueryTags process. One could ex- pect such results as these two parameters are major components of IR process. However our work results in two main conclusions: 1/ based on post-evaluation approach, we found that retrieving model is the most influential parameter for easy queries while TrecQueryTags process is for hard queries; 2/ for pre-evaluation, current query fea- tures do not allow to cluster queries to identify differences in the influential parameters.},
key = {Information Retrieval, query difficulty, query clustering, IR system pa- rameters, Random Forest},
keywords = {Information Retrieval, IR System Parameter, Query Clustering, Query Difficulty, Random Forest},
pubstate = {published},
tppubtype = {article}
}
2014
Conferences
Julie Ayter; Cecile Desclaux; Adrian Chifu; Josiane Mothe; Sébastien Déjean
Performance Analysis of Information Retrieval Systems Conference
Spanish Conference on Information Retrieval (CERI2014), Coruna, 2014, 2014.
Abstract | Links | BibTeX | Tags: Adaptive Information Retrieval, Classification, Information Retrieval, Optimization, Query Difficulty, Random Forest
@conference{nokey,
title = {Performance Analysis of Information Retrieval Systems},
author = {Julie Ayter and Cecile Desclaux and Adrian Chifu and Josiane Mothe and Sébastien Déjean},
url = {https://hal.archives-ouvertes.fr/hal-01119086/document},
year = {2014},
date = {2014-06-01},
urldate = {2014-06-01},
booktitle = {Spanish Conference on Information Retrieval (CERI2014), Coruna, 2014},
abstract = {It has been shown that there is not a best information retrieval system configuration which would work for any query, but rather that performance can vary from one query to another. It would be interesting if a meta-system could decide which system should process a new query by learning from the context of previously submitted queries. This paper reports a deep analysis considering more than 80,000 search engine configurations applied to 100 queries and the corresponding performance. The goal of the analysis is to identify which search engine configuration responds best to a certain type of query. We considered two approaches to define query types: one is based on query clustering according to the query performance (their difficulty), while the other approach uses various query features (including query difficulty predictors) to cluster queries. We identified two parameters that should be optimized first. An important outcome is that we could not obtain strong conclusive results; considering the large number of systems and methods we used, this result could lead to the conclusion that current query features does not fit the optimizing problem.},
keywords = {Adaptive Information Retrieval, Classification, Information Retrieval, Optimization, Query Difficulty, Random Forest},
pubstate = {published},
tppubtype = {conference}
}
2013
Conferences
Adrian-Gabriel Chifu
Prédire la Difficulté des Requêtes: la Combinaison de Mesures Statistiques et Sémantiques Conference
COnférence francophone en Recherche d'Information et Applications, CORIA2013 2013.
Abstract | Links | BibTeX | Tags: Combined Predictors, Information Retrieval, Measure Correlation, Query Ambiguity, Query Difficulty, Query Performance Prediction
@conference{chifu2013predire,
title = {Prédire la Difficulté des Requêtes: la Combinaison de Mesures Statistiques et Sémantiques},
author = {Adrian-Gabriel Chifu},
url = {https://hal.archives-ouvertes.fr/hal-01145833/document},
year = {2013},
date = {2013-04-03},
urldate = {2013-01-01},
booktitle = {COnférence francophone en Recherche d'Information et Applications},
pages = {pp--191},
series = {CORIA2013},
abstract = {The performance of an Information Retrieval System (IRS) is closely related to the query. The queries that lead to retrieval failure are referenced in the literature as "difficult queries". This study aims at analysing, adapting and combining several difficulty predictors. The evaluation of the prediction is based on the correla- tion between the predicted difficulty and the IRS performance. As predictors, we have considered an ambiguity predictor, the IDF measure and a score distribution measure. We show that combining the proposed predictors, produce good results. The evaluation framework consists in the TREC7 and TREC8 ahdoc collections.},
keywords = {Combined Predictors, Information Retrieval, Measure Correlation, Query Ambiguity, Query Difficulty, Query Performance Prediction},
pubstate = {published},
tppubtype = {conference}
}