@InProceedings{ardo99:_online99, AUTHOR= "A. Ardö and T. Koch", TITLE= "Automatic classification applied to the full-text {I}nternet documents in a robot-generated subject index", YEAR=1999, month = dec, pages = "239-246", booktitle = "Online Information 99, Proceedings", ISBN = {1-900871-44-0}, note = "\htmladdnormallink{http://www.it.lth.se/anders/online99/}{http://www.it.lth.se/anders/online99/}" } @Misc{tidy, title = {{HTML} {T}idy {L}ibrary {P}roject}, note = "\htmladdnormallink{http://tidy.sourceforge.net/}{http://tidy.sourceforge.net/}" } @Misc{oai, title = {{The Open Archives Initiative Protocol for Metadata Harvesting}}, note = "\htmladdnormallink{http://www.openarchives.org/OAI/openarchivesprotocol.html}{http://www.openarchives.org/OAI/openarchivesprotocol.html}" } @Misc{hiitmpca, title = {Discrete {C}omponents {A}nalysis}, note = "\htmladdnormallink{http://www.componentanalysis.org/}{http://www.componentanalysis.org/}" } @Misc{robotexclusion, title = {Robots {E}xclusion {P}rotocol}, note = "\htmladdnormallink{http://www.robotstxt.org/wc/exclusion.html}{http://www.robotstxt.org/wc/exclusion.html}" } @TechReport{a32, author = {S. Lundberg and A. Ardö and A. Brümmer and T. Koch}, title = {The {European Web Index}: An {Internet} Search Service for the {European} Higher Education, Research and Development Communities.}, year = 1996, institution = {NetLab, Lund University Library}, note = {Paper prepared to meet the requirements of Work Package 3 of EU Telematics for Research, project DESIRE. \htmladdnormallink{http://www.lub.lu.se/desire/radar/reports/D3.12}{http://www.lub.lu.se/desire/radar/reports/D3.12}}, month = Aug } @InProceedings{a37, author = { A. Ardö and S. Lundberg}, title = {A regional distributed {WWW} search and indexing service - the {DESIRE} way.}, year = 1998, number = {1-7}, volume = 30, pages = {173-183}, note = {\htmladdnormallink{http://www.it.lth.se/anders/WWW7/}{http://www.it.lth.se/anders/WWW7/}}, booktitle = {Proceedings of the Seventh International World Wide Web Conference}, series = {Computer Networks and ISDN Systems}, month = Apr } @InProceedings{Buntine2005, Author = "W.Buntine", Title = "{Open {S}ource {S}earch: {A} {D}ata {M}ining {P}latform}", Note = "To appear", booktitle = "SIGIR Forum", Year = 2005 } @InProceedings{Aberer:2004, Author ="K. Aberer and F. Klemm and M. Rajman and Jie Wu", Title = "{An {A}rchitecture for {P2P} {I}nformation {R}etrieval}", booktitle = "27th Annual International ACM SIGIR Conference (SIGIR 2004), Workshop on Peer-to-Peer Information Retrieval, Sheffield, UK", Month = jul, Year = 2004 } @misc{ALVIS, Title = "{{ALVIS} -{S}uperpeer {S}emantic {S}earch {E}ngine}", Note = "http://www.alvis.info/" } @misc{alvisarch, Title ="{ALVIS} architecture", Note = "http://www.alvis.info/alvis/architecture" } @misc{alvisxml, Author = {Kimmo Valtonen and Antti Tuominen and Wray Buntine}, Title ="The {ALVIS} {D}ocument {A}rchitecture", Note = "\linebreak http://project.alvis.info/alvis\_docs/ALVIS\_X8\_20050130\_HUT\_KV.pdf" } @misc{Knowlib, Title ="Knowledge {D}iscovery and {D}igital {L}ibrary {R}esearch {G}roup", Note = "http://www.it.lth.se/knowlib/" } @misc{Combine, Title = "{Combine {W}eb crawler}", author = {A. Ard\"{o}}, year = 2005, Note = "Software package for general and focused Web-crawling. http://combine.it.lth.se/" } @article{chakrabarti99focused, author = "Soumen Chakrabarti and Martin van den Berg and Byron Dom", title = "Focused crawling: a new approach to topic-specific {Web} resource discovery", journal = "Computer Networks (Amsterdam, Netherlands: 1999)", volume = "31", number = "11--16", pages = "1623--1640", year = "1999" } @article{larsson92, author = "Larson, R.R.", year = "1992", title = "Experiments in automatic {L}ibrary of {C}ongress classification", journal = "JASIS", volume = "43", number = "2", pages = "130--148" } @article{Golub_Review, author = "Koraljka Golub", title = "Automated subject classification of textual {Web} documents", journal = "Journal of Documentation", volume = "62", number = "3", year = 2006, pages = "350--371" } @inProceedings{pant04crawling, author = {Gautam Pant and Padmini Srinivasan and Filippo Menczer}, title = {Crawling the Web.}, year = {2004}, pages = {153-178}, editor = {Mark Levene and Alexandra Poulovassilis}, booktitle = {Web Dynamics - Adapting to Change in Content, Size, Topology and Use}, publisher = {Springer}, isbn = {3-540-40676-X} } @InProceedings{ardo05:_ECDL, author = {K. Golub and A. Ardö}, year = 2005, month = sep, title = "{Importance of {HTML} {S}tructural {E}lements in {A}utomated {S}ubject {C}lassification}", pages = { 368 - 378 }, series = {Lecture Notes in Computer Science}, volume = 3652, publisher = {Springer}, editor = {Andreas Rauber and Stavros Christodoulakis and A Min Tjoa}, booktitle = {9th European Conference on Research and Advanced Technology for Digital Libraries - ECDL 2005}, isbn = {3-540-28767-1}, note = { Manuscript at: \htmladdnormallink{http://www.it.lth.se/knowlib/publ/ECDL2005.pdf}{http://www.it.lth.se/knowlib/publ/ECDL2005.pdf}} } @Misc{MeSH, title = {{Medical Subject Headings, MeSH}}, note = {\htmladdnormallink{http://www.nlm.nih.gov/mesh/meshhome.html}{http://www.nlm.nih.gov/mesh/meshhome.html}} } @Misc{ei, title = {{Engineering Information Inc}}, note = {\htmladdnormallink{http://www.ei.org/}{http://www.ei.org/}} } @Misc{heritrix, title = {{Heritrix}}, note = {\htmladdnormallink{http://crawler.archive.org/}{http://crawler.archive.org/}} } @Misc{wire, title = {{WIRE}}, note = {\htmladdnormallink{http://www.cwr.cl/projects/WIRE/}{http://www.cwr.cl/projects/WIRE/}} } @Misc{nalanda, title = {{Nalanda iVia Focused Crawler}}, note = {\htmladdnormallink{http://ivia.ucr.edu/projects/Nalanda/}{http://ivia.ucr.edu/projects/Nalanda/}} } @Misc{CombineDoc, title = {{Detailed documentation for the Combine system}}, note = {\htmladdnormallink{http://combine.it.lth.se/documentation/}{http://combine.it.lth.se/documentation/}} } @InProceedings{ardo06:_ECDL, author = {K. Golub and A. Ardö and D. Mladenic and M. Grobelnik}, year = 2006, month = sep, title = "{Comparing and Combining Two Approaches to Automated Subject Classification of Text}", pages = { 467--470 }, series = {Lecture Notes in Computer Science}, volume = 4172, publisher = {Springer}, editor = {Julio Gonzalo and Constantino Thanos and M. Felisa Verdejo and Rafael C. Carrasco}, booktitle = {10th European Conference on Research and Advanced Technology for Digital Libraries - ECDL 2006}, isbn = {3-540-44636-2} } @article{GolubChall, author = "Koraljka Golub", title = "Automated subject classification of textual Web pages, based on a controlled vocabulary: challenges and recommendations", journal = "New review of hypermedia and multimedia", volume = "12", number = "1", month = jun, pages = "11-27", year = "2006", note = "Special issue on knowledge organization systems and services" } @InProceedings{GolubRole, author = "Koraljka Golub", title = "The Role of Different Thesauri Terms in Automated Subject Classification of Text", booktitle = "IEEE/WIC/ACM International Conference on Web Intelligence", month = dec, year = "2006" } @article{Lewis_etal_04, author = "D. D. Lewis and Y. Yang and T. Rose and F. Li", title = "RCV1: A new benchmark collection for text categorization research", journal = "The Journal of Machine Learning Research", volume = "", number = "5", pages = "361--397", year = "2004" } @article{Yang_99, author = "Y. Yang", title = "An evaluation of statistical approaches to text categorization", journal = "Journal of Information Retrieval", volume = "", number = "1", pages = "67--88", year = "1999" } %Y. Yang, An evaluation of statistical approaches to text categorization, Journal of Information Retrieval, 1999, 1(1/2), pp. 67-88. @Book{Svenonius_2000, title = "The intellectual foundations of information organization", year = 2000, publisher = "MIT Press, Cambridge, MA, USA", author = "E. Svenonius" } %E. Svenonius, The intellectual foundations of information organization. Cambridge, MA: MIT Press, 2000. @InProceedings{Chen_Dumais_2000, author = "H. Chen and S. T. Dumais", title = "Bringing order to the web: automatically categorizing search results", booktitle = "Proc. of CHI-00, ACM International Conference on Human Factors in Computing Systems", pages = "145--152", month = "", year = "2000" } %H. Chen, and S. T. Dumais, Bringing order to the web: automatically categorizing search results, in Proc. of CHI-00, ACM International Conference on Human Factors in Computing Systems, 2000, pp.145-52. @article{Bang_etal_06, author = "S. L. Bang and J. D. Yang and H. J. Yang", title = "Hierarchical document categorization with k-NN and concept-based thesauri", journal = "Information Processing and Management", volume = "", number = "42", pages = "387--406", year = "2006" } %S. L. Bang, J. D. Yang, and H. J. Yang, Hierarchical document categorization with k-NN and concept-based thesauri, Information Processing and Management, 2006, 42, pp. 387-406. @article{Garces_etal_06, author = "P. J. Garcés and J. A. Olivas and F. P. Romero", title = "Concept-matching IR systems versus word-matching information retrieval systems: Considering fuzzy interrelations for indexing Web pages", journal = "JASIS\&T", volume = "57", number = "4", pages = "564--576", year = "2006" } %P. J. Garcés, J. A. Olivas, and F. P. Romero, Concept-matching IR systems versus word-matching information retrieval systems: Considering fuzzy interrelations for indexing Web pages, JASIS&T 2006, 57(4), pp. 564-576. @InProceedings{Medelyan_06, author = "O. Medelyan and I. Witten", title = "Thesaurus based automatic keyphrase indexing", booktitle = "Proceedings of the Sixth ACM/IEEE Joint Conference on Digital Libraries, JCDL 06", pages = "296--297", month = "", year = "2006" } %O. Medelyan, and I. Witten, Thesaurus based automatic keyphrase indexing, in: Proc. of the JCDL 2006, pp. 296-297. @Misc{International_85, title = {{Documentation - Methods for examining documents, determining their subjects, and selecting index terms}}, note = "International Organization for Standardization, Standard 5963-1985" } %International Organization for Standardization. Documentation - Methods for examining documents, determining their subjects, and selecting index terms, ISO Standard 5963-1985. @book{Lancaster_03, title = "Indexing and abstracting in theory and practice", year = 2003, publisher = "Facet, London", note = "3rd ed.", author = "F. W. Lancaster" } %F. W. Lancaster, Indexing and abstracting in theory and practice. 3rd ed., London: Facet, 2003. @book{Olson_01, title = "Subject analysis in online catalogs", year = 2001, publisher = "Englewood, CO: Libraries Unlimited", note = "2nd ed.", author = "H. A. Olson and J. J. Boll" } %H. A. Olson, and J. J. Boll, Subject analysis in online catalogs. 2nd ed. Englewood, CO: Libraries Unlimited, 2001. @Misc{Hjorland06, title = {{Lifeboat for knowledge organization: indexing theory}}, note = "\htmladdnormallink{http://www.db.dk/bh/Lifeboat\_KO/CONCEPTS/indexing\_theory.htm}{http://www.db.dk/bh/Lifeboat_KO/CONCEPTS/indexing_theory.htm}" } %B. Hjřrland, Lifeboat for knowledge organization: indexing theory. Available: http://www.db.dk/bh/Lifeboat_KO/CONCEPTS/indexing_theory.htm @book{1Ingwersen_05, title = "The turn: integration of information seeking and retrieval in context", year = 2005, publisher = "Springer, Dordrecht, The Netherlands", author = "P. Ingwersen and K. Järvelin" } %P. Ingwersen, and K. Järvelin, The turn: integration of information seeking and retrieval in context. Dordrecht, The Netherlands: Springer, 2005. @article{Sebastiani_02, author = "F. Sebastiani", title = "Machine learning in automated text categorization", journal = "ACM Computing Surveys", volume = "34", number = "1", pages = "1--47", year = "2002" } %F. Sebastiani, Machine learning in automated text categorization, ACM Computing Surveys, 2002, 34(1), pp. 1-47. @Misc{Compendex, title = {{Compendex database}}, note = "\htmladdnormallink{http://www.engineeringvillage2.org/}{http://www.engineeringvillage2.org/}" } %Compendex database. Available: http://www.engineeringvillage2.org/. @MastersThesis{Rafael06, school = {Dept. of Information Technology, Lund University, P.O. Box 118, S-221~00 Lund, Sweden}, author = {Rafael Romero Trujilo}, title = {Simulation Tool to Study Focused Web Crawling Strategies}, month = mar, note = "\htmladdnormallink{http://combine.it.lth.se/CrawlSim/CrawlSim.pdf}{http://combine.it.lth.se/CrawlSim/CrawlSim.pdf}", year = 2006 }