2016
Conferences
Adrian Chifu; Serge Molina; Josiane Mothe
MyBestQuery: A serious game to collect manual query reformulation Conference
Colloque Veille Stratégique Scientifique et Technologique (VSST 2016), Rabat (Morocco), 2016.
Abstract | Links | BibTeX | Tags: Human Annotation, Information Retrieval, Query Reformulation, Serious Game
@conference{ChifuVSST2016,
title = {MyBestQuery: A serious game to collect manual query reformulation},
author = {Adrian Chifu and Serge Molina and Josiane Mothe},
url = {https://oatao.univ-toulouse.fr/18853/1/2016_VSST_CMM.pdf},
year = {2016},
date = {2016-10-18},
urldate = {2016-10-18},
booktitle = {Colloque Veille Stratégique Scientifique et Technologique (VSST 2016), Rabat (Morocco)},
abstract = {This paper presents MyBestQuery, a serious game designed to collect query reformulations from players. Query reformulation is a hot topic in information retrieval and covers many aspects. One of them is query reformulation analysis which is based on users’ session. It can be used to understand user's intent or to measure his satisfaction with regards to the results he obtained when querying the search engine. Automatic query reformulation is another aspect of query reformulation. It automatically expands the initial user’s query in order to improve the quality of the retrieved document set. This mechanism relies on document analysis but could also benefit from manually reformulated query analysis. Web search engines collect millions of search sessions and possible query reformulations. As academics, this information is hardly accessible for us. MyBestQuery is designed as a serious game in order to collect various possible reformulation users suggest. The more long-term objective of this work is to analyse the humanly produced query reformulation in order to both analyse manual query reformulation and compare them with the automatically produced reformulations. Preliminary results are reported in this paper.},
keywords = {Human Annotation, Information Retrieval, Query Reformulation, Serious Game},
pubstate = {published},
tppubtype = {conference}
}
Adrian-Gabriel Chifu; Sébastien Fournier
20th International Conference on Knowledge Based and Intelligent Information and Engineering Systems, vol. 96, KES2016 Elsevier, 2016.
Abstract | Links | BibTeX | Tags: Lexical Chains, Story Segmentation, Transcriptions, Video Retrieval, Word Embedding
@conference{chifu2016segchainw2v,
title = {Segchainw2v: Towards a generic automatic video segmentation framework, based on lexical chains of audio transcriptions and word embeddings},
author = {Adrian-Gabriel Chifu and Sébastien Fournier},
url = {https://reader.elsevier.com/reader/sd/pii/S1877050916319925?token=8EF351081CC26980139265A58715C3CD59C8D8708A523107D6B67DA85DBFBD02E9D644FBA6DEEA469D14B5B3E6D0BC24},
year = {2016},
date = {2016-09-01},
urldate = {2016-01-01},
booktitle = {20th International Conference on Knowledge Based and Intelligent Information and Engineering Systems},
journal = {Procedia Computer Science},
volume = {96},
pages = {1371--1380},
publisher = {Elsevier},
series = {KES2016},
abstract = {With the advances in multimedia broadcasting through a rich variety of channels and with the vulgarization of video production, it becomes essential to be able to provide reliable means of retrieving information within videos, not only the videos themselves. Research in this area has been widely focused on the context of TV news broadcasts, for which the structure itself provides clues for story segmentation. The systematic employment of these clues would lead to thematically driven systems that would not be easily adaptable in the case of videos of other types. The systems are therefore dependent on the type of videos for which they have been designed. In this paper we aim at introducing SegChainW2V, a generic unsupervised framework for story segmentation, based on lexical chains from transcriptions and their vectorization. SegChainW2V takes into account the topic changes by perceiving the fiuctuations of the most frequent terms throughout the video, as well as their semantics through the word embedding vectorization.},
keywords = {Lexical Chains, Story Segmentation, Transcriptions, Video Retrieval, Word Embedding},
pubstate = {published},
tppubtype = {conference}
}
Adrian-Gabriel Chifu; Sébastien Fournier
Proceedings of the 6th international conference on web intelligence, mining and semantics, WIMS2016 2016.
Abstract | Links | BibTeX | Tags: Lexical Chains, Story Segmentation, Transcriptions, Video Retrieval
@conference{chifu2016segchain,
title = {SegChain: Towards a generic automatic video segmentation framework, based on lexical chains of audio transcriptions},
author = {Adrian-Gabriel Chifu and Sébastien Fournier},
url = {https://hal.archives-ouvertes.fr/hal-03018087/document},
year = {2016},
date = {2016-06-01},
urldate = {2016-01-01},
booktitle = {Proceedings of the 6th international conference on web intelligence, mining and semantics},
pages = {1--8},
series = {WIMS2016},
abstract = {With the advances in multimedia broadcasting through a rich variety of channels and with the vulgarization of video production, it becomes essential to be able to provide reliable means of retrieving information within videos, not only the videos themselves. Research in this area has been widely focused on the context of TV news broadcasts, for which the structure itself provides clues for story segmentation. The systematic employment of these clues would lead to thematically driven systems that would not be easily adaptable in the case of videos of other types. The systems are therefore dependent on the type of videos for which they have been designed. In this paper we aim at introducing SegChain, a generic unsupervised framework for story segmentation, based on lexical chains from transcriptions. SegChain takes into account the topic changes by perceiving the fluctuations of the most frequent terms throughout the video.},
keywords = {Lexical Chains, Story Segmentation, Transcriptions, Video Retrieval},
pubstate = {published},
tppubtype = {conference}
}
Adrian Chifu; Serge Molina; Josiane Mothe
MyBestQuery : un jeu sérieux pour apprendre des utilisateurs Conference
Conférence francophone en Recherche d'Information et Applications (CORIA 2016), Toulouse, 2016.
Abstract | Links | BibTeX | Tags: Crowdsourcing, Query Annotation, Search Engine, Serious Game, User Study
@conference{ChifuCORIA2016,
title = {MyBestQuery : un jeu sérieux pour apprendre des utilisateurs},
author = {Adrian Chifu and Serge Molina and Josiane Mothe},
url = {https://hal.archives-ouvertes.fr/hal-01534812/document},
year = {2016},
date = {2016-03-01},
urldate = {2016-03-01},
booktitle = {Conférence francophone en Recherche d'Information et Applications (CORIA 2016), Toulouse},
abstract = {Résumé :
MyBestQuery est un jeu sérieux qui collecte des éléments sur les requêtes soumises à un moteur de recherche: (i) la prédiction de la difficulté de la requête par le joueur (ii) des raisons possibles expliquant cette difficulté (iii) des propositions de reformulation.
Abstract:
MyBestQuery is a serious game designed to collect items from queries submitted to a search engine: (i) the query difficulty prediction (ii) the possible reasons for this difficulty (iii) other query formulations.},
keywords = {Crowdsourcing, Query Annotation, Search Engine, Serious Game, User Study},
pubstate = {published},
tppubtype = {conference}
}
MyBestQuery est un jeu sérieux qui collecte des éléments sur les requêtes soumises à un moteur de recherche: (i) la prédiction de la difficulté de la requête par le joueur (ii) des raisons possibles expliquant cette difficulté (iii) des propositions de reformulation.
Abstract:
MyBestQuery is a serious game designed to collect items from queries submitted to a search engine: (i) the query difficulty prediction (ii) the possible reasons for this difficulty (iii) other query formulations.
2015
Journal Articles
Julie Ayter; Adrian Chifu; Sébastien Déjean; Cecile Desclaux; Josiane Mothe
Statistical analysis to establish the importance of information retrieval parameters Journal Article
In: Journal of Universal Computer Science, vol. 21, no. 13, pp. pp–1767, 2015.
Abstract | Links | BibTeX | Tags: Information Retrieval, IR System Parameter, Query Clustering, Query Difficulty, Random Forest
@article{ayter2015statistical,
title = {Statistical analysis to establish the importance of information retrieval parameters},
author = {Julie Ayter and Adrian Chifu and Sébastien Déjean and Cecile Desclaux and Josiane Mothe},
url = {https://hal.archives-ouvertes.fr/hal-01592043/document},
year = {2015},
date = {2015-12-01},
urldate = {2015-12-01},
journal = {Journal of Universal Computer Science},
volume = {21},
number = {13},
pages = {pp--1767},
abstract = {Search engines are based on models to index documents, match queries and documents and rank documents. Research in Information Retrieval (IR) aims at defining these models and their parameters in order to optimize the results. Using benchmark collections, it has been shown that there is not a best system configura- tion that works for any query, but rather that performance varies from one query to another. It would be interesting if a meta-system could decide which system config- uration should process a new query by learning from the context of previousqueries. This paper reports a deep analysis considering more than 80,000 search engine config- urations applied to 100 queries and the corresponding performance. The goal of the analysis is to identify which configuration responds best to a certain type of query. We considered two approaches to define query types: one is post-evaluation, based on query clustering according to the performance measured with Average Precision, while the second approach is pre-evaluation, using query features (including query difficulty predictors) to cluster queries. Globally, we identified two parameters that should be optimized: retrieving model and TrecQueryTags process. One could ex- pect such results as these two parameters are major components of IR process. However our work results in two main conclusions: 1/ based on post-evaluation approach, we found that retrieving model is the most influential parameter for easy queries while TrecQueryTags process is for hard queries; 2/ for pre-evaluation, current query fea- tures do not allow to cluster queries to identify differences in the influential parameters.},
key = {Information Retrieval, query difficulty, query clustering, IR system pa- rameters, Random Forest},
keywords = {Information Retrieval, IR System Parameter, Query Clustering, Query Difficulty, Random Forest},
pubstate = {published},
tppubtype = {article}
}
Adrian-Gabriel Chifu; Florentina Hristea; Josiane Mothe; Marius Popescu
Word sense discrimination in information retrieval: A spectral clustering-based approach Journal Article
In: Information Processing & Management, vol. 51, no. 2, pp. 16–31, 2015.
Abstract | Links | BibTeX | Tags: High Precision, Information Retrieval, Spectral Clustering, Word Sense Disambiguation, Word Sense Discrimination
@article{chifu2015word,
title = {Word sense discrimination in information retrieval: A spectral clustering-based approach},
author = {Adrian-Gabriel Chifu and Florentina Hristea and Josiane Mothe and Marius Popescu},
url = {https://hal.archives-ouvertes.fr/hal-01153775/document},
year = {2015},
date = {2015-03-01},
urldate = {2015-01-01},
journal = {Information Processing & Management},
volume = {51},
number = {2},
pages = {16--31},
publisher = {Elsevier},
abstract = {Word sense ambiguity has been identified as a cause of poor precision in information retrieval (IR) systems. Word sense disambiguation and discrimination methods have been defined to help systems choose which documents should be retrieved in relation to an ambiguous query. However, the only approaches that show a genuine benefit for word sense discrimination or disambiguation in IR are generally supervised ones. In this paper we propose a new unsupervised method that uses word sense discrimination in IR. The method we develop is based on spectral clustering and reorders an initially retrieved doc- ument list by boosting documents that are semantically similar to the target query. For several TREC ad hoc collections we show that our method is useful in the case of queries which contain ambiguous terms. We are interested in improving the level of precision after 5, 10 and 30 retrieved documents (P@5, P@10, P@30) respectively. We show that precision can be improved by 8% above current state-of-the-art baselines. We also focus on poor performing queries.},
keywords = {High Precision, Information Retrieval, Spectral Clustering, Word Sense Disambiguation, Word Sense Discrimination},
pubstate = {published},
tppubtype = {article}
}
Conferences
Radu Tudor Ionescu; Adrian-Gabriel Chifu; Josiane Mothe
International Symposium on String Processing and Information Retrieval, SPIRE2015 Springer 2015.
Abstract | Links | BibTeX | Tags: Document Topic Distribution, Information Retrieval, Kurtosis, LDA, Ranking Retrieval Systems, Skewness, Topic Modeling
@conference{ionescu2015deshato,
title = {DeShaTo: Describing the Shape of Cumulative Topic Distributions to Rank Retrieval Systems without Relevance Judgments},
author = {Radu Tudor Ionescu and Adrian-Gabriel Chifu and Josiane Mothe},
url = {https://oatao.univ-toulouse.fr/15354/1/ionescu_15354.pdf},
year = {2015},
date = {2015-09-01},
urldate = {2015-01-01},
booktitle = {International Symposium on String Processing and Information Retrieval},
pages = {75--82},
organization = {Springer},
series = {SPIRE2015},
abstract = {This paper investigates an approach for estimating the effectiveness of any IR system. The approach is based on the idea that a set of documents retrieved for a specific query is highly relevant if there are only a small number of predominant topics in the retrieved documents. The proposed approach is to determine the topic probability distribution of each document offline, using Latent Dirichlet Allocation. Then, for a retrieved set of documents, a set of probability distribution shape descriptors, namely the skewness and the kurtosis, are used to compute a score based on the shape of the cumulative topic distribution of the respective set of documents. The proposed model is termed DeShaTo, which is short for Describing the Shape of cumulative Topic distributions. In this work, DeShaTo is used to rank retrieval systems without relevance judgments. In most cases, the empirical results are better than the state of the art approach. Compared to other approaches, DeShaTo works independently for each system. Therefore, it remains reliable even when there are less systems to be ranked by relevance.},
keywords = {Document Topic Distribution, Information Retrieval, Kurtosis, LDA, Ranking Retrieval Systems, Skewness, Topic Modeling},
pubstate = {published},
tppubtype = {conference}
}
Adrian Chifu; Léa Laporte; Josiane Mothe
La prédiction efficace de la difficulté des requêtes : une tâche impossible ? Conference
Conférence francophone en Recherche d'Information et Applications (CORIA 2015), Paris, 2015.
Abstract | Links | BibTeX | Tags: Data Mining, Evaluation, Information Retrieval, Query Difficulty Prediction
@conference{ChifuCORIA2015,
title = {La prédiction efficace de la difficulté des requêtes : une tâche impossible ?},
author = {Adrian Chifu and Léa Laporte and Josiane Mothe},
url = {https://oatao.univ-toulouse.fr/15263/1/chifu_15263.pdf},
year = {2015},
date = {2015-03-18},
booktitle = {Conférence francophone en Recherche d'Information et Applications (CORIA 2015), Paris},
abstract = {Résumé :
Les moteurs de recherche d’information (RI) retrouvent des réponses quelle que soit la requête, mais certaines requêtes sont difficiles (le système n’obtient pas de bonne performance en termes de mesure de RI). Pour les requêtes difficiles, des traitements ad-hoc doivent être appliqués. Prédire qu’une requête est difficile est donc crucial et différents prédicteurs ont été proposés. Dans cet articlenous étudions la variété de l’information captée par les prédicteurs existants et donc leur non redondance. Par ailleurs, nous montrons que les corrélations entre les prédicteurs et les performance des systèmes donnent peu d’espoir sur la capacité de ces prédicteurs à être réellement efficaces. Enfin, nous étudions la capacité des prédicteurs à prédire les classes de difficulté des requêtes en nous appuyant sur une variété de méthodes exploratoires et d’apprentissage. Nous montrons que malgré les (faibles) corrélations observées avec les mesures de performance, les prédicteurs actuels conduisent à des performances de prédiction variables et sont donc difficilement utilisables dans une application concrète de RI.
Abstract:
Search engines found answers whatever the user query is, but some queries are more difficult than others for the system. For difficult queries, adhoc treatments must be applied. Predicting query difficulty is crucial and different predictors have been proposed. In this paper, we revisit these predictors. First we check the non statistical redundancy of predictors. Then, we show that the correlation between the values of predictors and system performance gives little hope on the ability of these predictors to be effective. Finally, we study the ability of predictors to predict the classes of difficulty by relying on a variety of exploratory and learning methods. We show that despite the (low) correlation with performance measures, current predictors are not robust enough to be used in practical IR applications.},
keywords = {Data Mining, Evaluation, Information Retrieval, Query Difficulty Prediction},
pubstate = {published},
tppubtype = {conference}
}
Les moteurs de recherche d’information (RI) retrouvent des réponses quelle que soit la requête, mais certaines requêtes sont difficiles (le système n’obtient pas de bonne performance en termes de mesure de RI). Pour les requêtes difficiles, des traitements ad-hoc doivent être appliqués. Prédire qu’une requête est difficile est donc crucial et différents prédicteurs ont été proposés. Dans cet articlenous étudions la variété de l’information captée par les prédicteurs existants et donc leur non redondance. Par ailleurs, nous montrons que les corrélations entre les prédicteurs et les performance des systèmes donnent peu d’espoir sur la capacité de ces prédicteurs à être réellement efficaces. Enfin, nous étudions la capacité des prédicteurs à prédire les classes de difficulté des requêtes en nous appuyant sur une variété de méthodes exploratoires et d’apprentissage. Nous montrons que malgré les (faibles) corrélations observées avec les mesures de performance, les prédicteurs actuels conduisent à des performances de prédiction variables et sont donc difficilement utilisables dans une application concrète de RI.
Abstract:
Search engines found answers whatever the user query is, but some queries are more difficult than others for the system. For difficult queries, adhoc treatments must be applied. Predicting query difficulty is crucial and different predictors have been proposed. In this paper, we revisit these predictors. First we check the non statistical redundancy of predictors. Then, we show that the correlation between the values of predictors and system performance gives little hope on the ability of these predictors to be effective. Finally, we study the ability of predictors to predict the classes of difficulty by relying on a variety of exploratory and learning methods. We show that despite the (low) correlation with performance measures, current predictors are not robust enough to be used in practical IR applications.
2014
Conferences
Julie Ayter; Cecile Desclaux; Adrian Chifu; Josiane Mothe; Sébastien Déjean
Performance Analysis of Information Retrieval Systems Conference
Spanish Conference on Information Retrieval (CERI2014), Coruna, 2014, 2014.
Abstract | Links | BibTeX | Tags: Adaptive Information Retrieval, Classification, Information Retrieval, Optimization, Query Difficulty, Random Forest
@conference{nokey,
title = {Performance Analysis of Information Retrieval Systems},
author = {Julie Ayter and Cecile Desclaux and Adrian Chifu and Josiane Mothe and Sébastien Déjean},
url = {https://hal.archives-ouvertes.fr/hal-01119086/document},
year = {2014},
date = {2014-06-01},
urldate = {2014-06-01},
booktitle = {Spanish Conference on Information Retrieval (CERI2014), Coruna, 2014},
abstract = {It has been shown that there is not a best information retrieval system configuration which would work for any query, but rather that performance can vary from one query to another. It would be interesting if a meta-system could decide which system should process a new query by learning from the context of previously submitted queries. This paper reports a deep analysis considering more than 80,000 search engine configurations applied to 100 queries and the corresponding performance. The goal of the analysis is to identify which search engine configuration responds best to a certain type of query. We considered two approaches to define query types: one is based on query clustering according to the query performance (their difficulty), while the other approach uses various query features (including query difficulty predictors) to cluster queries. We identified two parameters that should be optimized first. An important outcome is that we could not obtain strong conclusive results; considering the large number of systems and methods we used, this result could lead to the conclusion that current query features does not fit the optimizing problem.},
keywords = {Adaptive Information Retrieval, Classification, Information Retrieval, Optimization, Query Difficulty, Random Forest},
pubstate = {published},
tppubtype = {conference}
}
Adrian-Gabriel Chifu; Josiane Mothe
Expansion Sélective de Requêtes par Apprentissage Conference
COnférence francophone en Recherche d'Information et Applications, CORIA2014 LORIA, Nancy, France, 2014.
Abstract | Links | BibTeX | Tags: Difficulty Predictors, Machine Learning, Query Expansion, Selective Information Retrieval
@conference{chifu2014expansion,
title = {Expansion Sélective de Requêtes par Apprentissage},
author = {Adrian-Gabriel Chifu and Josiane Mothe},
url = {https://oatao.univ-toulouse.fr/12934/1/Chifu_12934.pdf},
year = {2014},
date = {2014-03-19},
urldate = {2014-03-19},
booktitle = {COnférence francophone en Recherche d'Information et Applications},
publisher = {LORIA},
address = {Nancy, France},
series = {CORIA2014},
abstract = {Query expansion (QE) improves the retrieval quality in average, even though it can dramatically decrease performance for certain queries. This observation drives the trend to suggest selective approaches that aim at choosing the best function to apply for each query. Most of selective approaches use a learning process on past query features and results. This paper presents a new selective QE method that relies on query difficulty predictors. The method combines statistically and linguistically based predictors. The QE method is learned by a SVM. We demonstrate the efficiency of the proposed method on a number of standard TREC benchmarks. The supervised learning models have performed the query classification with more than 90% accuracy on the test collection. Our approach improves MAP by more than 11%, compared to the non selective methods.},
keywords = {Difficulty Predictors, Machine Learning, Query Expansion, Selective Information Retrieval},
pubstate = {published},
tppubtype = {conference}
}