# By default we allow robots to access all areas of our site accessible to # anonymous users, except for search, which burns our CPU for no reason. User-agent: * Disallow: /search Disallow: /iro/secure/ Disallow: /industry/secure/ Disallow: /about/faculty-staff-resources/limitedsubs/ Disallow: /sponsored-programs/ Disallow: /research-protections/ Disallow: /osp-build/ Disallow: /orp-build/ Disallow: /ics/ Disallow: /iah/ Disallow: /front-page Disallow: /documents/ Disallow: /documents Disallow: /capabilities/centers/ Disallow: /faq/ Disallow: /faq # Some bots, like Bing's and Yahoo's, obey a Crawl-delay parameter, which # specifies a number of seconds to wait between hits. (Bing supports only # whole-number values.) This is a tradeoff between CPU use and search result # freshness. Crawl-delay: 1 # Add Googlebot-specific syntax extension to exclude forms # that are repeated for each piece of content in the site # the wildcard is only supported by Googlebot # http://www.google.com/support/webmasters/bin/answer.py?answer=40367&ctx;=sibling User-Agent: Googlebot Disallow: /*sendto_form$ Disallow: /*folder_factories$ Disallow: /*?searchterm=* Disallow: /*?path=* # Penn State's Google Search Appliance comes at some servers so hard and fast # that it burns 60% of their CPU. Limit what it spiders: User-Agent: PennStateSpider Disallow: /*sendto_form$ Disallow: /*folder_factories$ Disallow: /*?searchterm=* Disallow: /*?path=*