Heydon1999@article{Heydon1999,
title = {Mercator: A scalable, extensible Web crawler},
author = {Heydon, Allan and Najork, Marc},
journal = {World Wide Web},
volume = {2},
number = {4},
year = {1999},
pages = {219--229},
www_pdf_url = {http://coitweb.uncc.edu/~sakella/courses/cloud09/papers/Mercator.pdf},
issn = {1573-1413},
www_tags = {selected},
url = {http://dx.doi.org/10.1023/A:1019213109274},
abstract = {This paper describes Mercator, a scalable, extensible Web crawler written
entirely in Java. Scalable Web crawlers are an important component of many Web
services, but their design is not well-documented in the literature. We enumerate
the major components of any scalable Web crawler, comment on alternatives and
tradeoffs in their design, and describe the particular components used in
Mercator. We also describe Mercator's support for extensibility and
customizability. Finally, we comment on Mercator's performance, which we have
found to be comparable to that of other crawlers for which performance numbers
have been published},
doi = {10.1023/A:1019213109274},
www_important = {1},
www_section = {crawling},
}
|
ext4-shingled-disks@inproceedings{ext4-shingled-disks,
title = {Evolving Ext4 for Shingled Disks},
author = {Abutalib Aghayev and Theodore Ts{\textquoteright}o and Garth Gibson and Peter
Desnoyers},
booktitle = {Proceedings of 15th USENIX Conference on File and Storage Technologies (FAST
17)},
year = {2017},
address = {Santa Clara, CA},
pages = {105--120},
publisher = {USENIX Association},
isbn = {978-1-931971-36-2105},
www_section = {Hardware and Data Centers},
www_tags = {selected},
www_pdf_url = {http://www.pdl.cmu.edu/PDL-FTP/Storage/ext4-lazy.pdf},
}
|
guerilla-open-access@misc{guerilla-open-access,
title = {Guerilla Open Access Manifesto},
author = {Aaron Swartz},
year = {2008},
month = {July},
howpublished = {internet},
www_tags = {selected},
www_section = {Radical Libraries},
www_html_url = {https://archive.org/stream/GuerillaOpenAccessManifesto/Goamjuly2008_djvu.txt},
}
|
kahle-2020-vision@article{kahle-2020-vision,
title = {Transforming Our Libraries from Analog to Digital: A 2020 Vision},
author = {Brewster Kahle},
journal = {EDUCAUSE Review},
year = {2017},
month = {March},
www_tags = {selected},
www_html_url = {http://er.educause.edu/articles/2017/3/transforming-our-libraries-from-analog-to-digital-a-2020-vision},
www_pdf_url = {http://er.educause.edu/~/media/files/articles/2017/3/erm1722.pdf},
www_section = {The Internet Archive},
}
|
licklider1965libraries@book{licklider1965libraries,
title = {Libraries of the future},
author = {Licklider, J.C.R.},
year = {1965},
publisher = {M.I.T. Press},
www_tags = {selected},
www_section = {misc},
www_pdf_url = {http://worrydream.com/refs/Licklider%20-%20Libraries%20of%20the%20Future.pdf},
www_html_url = {https://openlibrary.org/books/OL5942946M/Libraries_of_the_future},
}
|
najork-heydon-highperf@techreport{najork-heydon-highperf,
title = {High-performance web crawling},
author = {Marc Najork and Allan Heydon},
institution = {SRC Research Report 173, Compaq Systems Research},
year = {2001},
www_section = {crawling},
www_html_url = {https://web-beta.archive.org/web/20061018044811/http://gatekeeper.research.compaq.com/pub/DEC/SRC/research-reports/abstracts/src-rr-173.html},
www_pdf_url = {http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-173.pdf},
www_tags = {selected},
}
|
newyorker-cobweb@misc{newyorker-cobweb,
title = {The Cobweb: Can the Internet be archived?},
author = {Jill Lepore},
year = {2015},
month = {January},
howpublished = {The New Yorker},
www_tags = {selected},
www_section = {The Internet Archive},
www_html_url = {http://www.newyorker.com/magazine/2015/01/26/cobweb},
}
|