class Mechanize::Page
This class encapsulates an HTML page. If Mechanize
finds a content type of ‘text/html’, this class will be instantiated and returned.
Example:
require 'mechanize' agent = Mechanize.new agent.get('http://google.com/').class # => Mechanize::Page
Constants
- DEFAULT_RESPONSE
Attributes
Possible encodings for this page based on HTTP
headers and meta elements
Public Class Methods
# File lib/mechanize/page.rb, line 576 def charset content_type charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/i, 1] return nil if charset == 'none' charset end
Retrieves all charsets from meta
tags in body
# File lib/mechanize/page.rb, line 598 def self.meta_charset body # HACK use .map body.scan(/<meta .*?>/i).map do |meta| if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then $2 elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then meta =~ /content\s*=\s*(["'])?(.*?)\1/i m_charset = charset $2 if $2 m_charset if m_charset end end.compact end
Retrieves the last content-type
set by a meta
tag in body
# File lib/mechanize/page.rb, line 616 def self.meta_content_type body body.scan(/<meta .*?>/i).reverse.map do |meta| if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then meta =~ /content=(["'])?(.*?)\1/i return $2 end end nil end
Mechanize::File::new
# File lib/mechanize/page.rb, line 28 def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) response ||= DEFAULT_RESPONSE @meta_content_type = nil @encoding = nil @encodings = [nil] raise 'no' if mech and not Mechanize === mech @mech = mech reset @encodings << Mechanize::Util.detect_charset(body) if body @encodings.concat self.class.response_header_charset(response) if body @encodings.concat self.class.meta_charset body meta_content_type = self.class.meta_content_type body @meta_content_type = meta_content_type if meta_content_type end @encodings << mech.default_encoding if mech and mech.default_encoding super uri, response, body, code end
# File lib/mechanize/page.rb, line 585 def self.response_header_charset response charsets = [] response.each do |header, value| next unless header == 'content-type' next unless value =~ /charset/i charsets << charset(value) end charsets end
Public Instance Methods
Shorthand for parser.at
.
See also Nokogiri::XML::Node#at for details.
# File lib/mechanize/page.rb, line 216
Shorthand for parser.at_css
.
See also Nokogiri::XML::Node#at_css for details.
# File lib/mechanize/page.rb, line 223
Shorthand for parser.at_xpath
.
See also Nokogiri::XML::Node#at_xpath for details.
# File lib/mechanize/page.rb, line 236 def_delegators :parser, :search, :css, :xpath, :at, :at_css, :at_xpath
Find a single base tag matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “base tag(s)”.
Example:
page.base_with(href: /foo/).click
# File lib/mechanize/page.rb, line 343
Same as base_with
but raises an ElementNotFoundError if no button matches criteria
# File lib/mechanize/page.rb, line 356
Return a list of all base tags
# File lib/mechanize/page.rb, line 527 def bases @bases ||= search('base').map { |node| Base.new(node, @mech, self) } end
Find all base tags matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “base tag(s)”.
Example:
page.bases_with(href: /foo/).each do |base| puts base.href end
# File lib/mechanize/page.rb, line 378 elements_with :base
Return the canonical URI for the page if there is a link tag with href=“canonical”.
# File lib/mechanize/page.rb, line 179 def canonical_uri link = at('link[@rel="canonical"][@href]') return unless link href = link['href'] URI href rescue URI::InvalidURIError URI Mechanize::Util.uri_escape href end
Get the content type
# File lib/mechanize/page.rb, line 190 def content_type @meta_content_type || response['content-type'] end
Shorthand for parser.css
.
See also Nokogiri::XML::Node#css for details.
# File lib/mechanize/page.rb, line 202
# File lib/mechanize/page.rb, line 71 def detected_encoding Mechanize::Util.detect_charset(body) end
# File lib/mechanize/page.rb, line 91 def encoding parser.encoding rescue NoMethodError nil end
# File lib/mechanize/page.rb, line 75 def encoding=(encoding) reset @encoding = encoding if @parser parser_encoding = @parser.encoding if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0 # lazy reinitialize the parser with the new encoding @parser = nil end end encoding end
Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is valid.
# File lib/mechanize/page.rb, line 99 def encoding_error?(parser=nil) parser = self.parser unless parser return false if parser.errors.empty? parser.errors.any? do |error| error.message.scrub =~ /(indicate\ encoding)| (Invalid\ bytes)| (Invalid\ char)| (input\ conversion\ failed)/x end end
Find a single form matching criteria
. See forms_with
for details of criteria
.
Examples:
page.form_with(action: '/post/login.php') do |f| ... end
# File lib/mechanize/page.rb, line 242
Same as form_with
but raises an ElementNotFoundError if no button matches criteria
# File lib/mechanize/page.rb, line 257
Return a list of all form tags
# File lib/mechanize/page.rb, line 506 def forms @forms ||= search('form').map do |html_form| form = Mechanize::Form.new(html_form, @mech, self) form.action ||= @uri.to_s form end end
Find all forms form matching criteria. If a string is given, it is taken as a name attribute value. If a hash is given, forms are narrowed by the key-value pairs as follows.
:id, :dom_id: selects forms with a dom_id value that matches this value.
:class, :dom_class: selects forms with a dom_class value that matches this value. Note that class attribute values are compared literally as string, so forms_with
(class: “a”) does not match a form with class=“a b”. Use forms_with
(css: “form.a”) instead.
:search: only selects forms matching this selector expression.
:xpath: only selects forms matching this XPath expression.
:css: only selects forms matching this CSS selector expression.
:action, :method, etc.: narrows forms by a given attribute value using the === operator.
Example:
page.forms_with(css: '#content table.login_box form', method: /\APOST\z/i, ).each do |f| ... end
# File lib/mechanize/page.rb, line 301 elements_with :form
Find a single frame tag matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “frame tag(s)”.
Example:
page.frame_with(src: /foo/).click
# File lib/mechanize/page.rb, line 381
Same as frame_with
but raises an ElementNotFoundError if no button matches criteria
# File lib/mechanize/page.rb, line 394
Return a list of all frame tags
# File lib/mechanize/page.rb, line 534 def frames @frames ||= search('frame').map { |node| Frame.new(node, @mech, self) } end
Find all frame tags matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “frame tag(s)”.
Example:
page.frames_with(src: /foo/).each do |frame| p frame.src end
# File lib/mechanize/page.rb, line 416 elements_with :frame
Find a single iframe tag matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “iframe tag(s)”.
Example:
page.iframe_with(src: /foo/).click
# File lib/mechanize/page.rb, line 419
Same as iframe_with
but raises an ElementNotFoundError if no button matches criteria
# File lib/mechanize/page.rb, line 432
Return a list of all iframe tags
# File lib/mechanize/page.rb, line 541 def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } end
Find all iframe tags matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “iframe tag(s)”.
Example:
page.iframes_with(src: /foo/).each do |iframe| p iframe.src end
# File lib/mechanize/page.rb, line 454 elements_with :iframe
# File lib/mechanize/page.rb, line 553 def image_urls @image_urls ||= images.map(&:url).uniq end
Find a single image matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “image(s)”.
Example:
page.image_with(alt: /main/).fetch.save
# File lib/mechanize/page.rb, line 457
Same as image_with
but raises an ElementNotFoundError if no button matches criteria
# File lib/mechanize/page.rb, line 470
Return a list of all img tags
# File lib/mechanize/page.rb, line 548 def images @images ||= search('img').map { |node| Image.new(node, self) } end
Find all images matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “image(s)”.
Example:
page.images_with(src: /jpg\Z/).each do |img| img.fetch.save end
# File lib/mechanize/page.rb, line 492 elements_with :image
Return a list of all label tags
# File lib/mechanize/page.rb, line 559 def labels @labels ||= search('label').map { |node| Label.new(node, self) } end
# File lib/mechanize/page.rb, line 564 def labels_hash unless @labels_hash hash = {} labels.each do |label| hash[label.node['for']] = label if label.for end @labels_hash = hash end return @labels_hash end
Find a single link matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “link(s)”.
Example:
page.link_with(href: /foo/).click
# File lib/mechanize/page.rb, line 304
Same as link_with
but raises an ElementNotFoundError if no button matches criteria
# File lib/mechanize/page.rb, line 317
Return a list of all link and area tags
# File lib/mechanize/page.rb, line 496 def links @links ||= %w{ a area }.map do |tag| search(tag).map do |node| Link.new(node, @mech, self) end end.flatten end
Find all links matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “link(s)”.
Example:
page.links_with(href: /foo/).each do |link| puts link.href end
# File lib/mechanize/page.rb, line 340 elements_with :link
# File lib/mechanize/page.rb, line 67 def meta_charset self.class.meta_charset(body) end
Return a list of all meta refresh elements
# File lib/mechanize/page.rb, line 517 def meta_refresh query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta' @meta_refresh ||= search(query).map do |node| MetaRefresh.from_node node, self end.compact end
# File lib/mechanize/page.rb, line 110 def parser return @parser if @parser return unless @body url = @uri && @uri.to_s if @encoding @parser = mech.html_parser.parse html_body, url, @encoding elsif mech.force_default_encoding @parser = mech.html_parser.parse html_body, url, @mech.default_encoding else @encodings.reverse_each do |encoding| @parser = mech.html_parser.parse html_body, url, encoding break unless encoding_error? @parser end end @parser end
# File lib/mechanize/page.rb, line 164 def reset @bases = nil @forms = nil @frames = nil @iframes = nil @links = nil @labels = nil @labels_hash = nil @meta_refresh = nil @parser = nil @title = nil end
# File lib/mechanize/page.rb, line 63 def response_header_charset self.class.response_header_charset(response) end
Shorthand for parser.search
.
See Nokogiri::XML::Node#search for details.
# File lib/mechanize/page.rb, line 195
# File lib/mechanize/page.rb, line 55 def title @title ||= if doc = parser title = doc.xpath('string(((/html/head | /html | /head | /)/title)[1])').to_s title.empty? ? nil : title end end
Shorthand for parser.xpath
.
See also Nokogiri::XML::Node#xpath for details.
# File lib/mechanize/page.rb, line 209
Private Instance Methods
# File lib/mechanize/page.rb, line 630 def html_body if @body @body.empty? ? '<html></html>' : @body else '' end end