Commit cfc0f816 authored by Wickramasinghe R.J.P's avatar Wickramasinghe R.J.P

firstvet-crawler-added

parent 4bf76a7e
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class FirstvetItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass
# Define here the models for your spider middleware
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
from scrapy import signals
# useful for handling different item types with a single interface
from itemadapter import is_item, ItemAdapter
class FirstvetSpiderMiddleware:
# Not all methods need to be defined. If a method is not defined,
# scrapy acts as if the spider middleware does not modify the
# passed objects.
@classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_spider_input(self, response, spider):
# Called for each response that goes through the spider
# middleware and into the spider.
# Should return None or raise an exception.
return None
def process_spider_output(self, response, result, spider):
# Called with the results returned from the Spider, after
# it has processed the response.
# Must return an iterable of Request, or item objects.
for i in result:
yield i
def process_spider_exception(self, response, exception, spider):
# Called when a spider or process_spider_input() method
# (from other spider middleware) raises an exception.
# Should return either None or an iterable of Request or item objects.
pass
def process_start_requests(self, start_requests, spider):
# Called with the start requests of the spider, and works
# similarly to the process_spider_output() method, except
# that it doesn’t have a response associated.
# Must return only requests (not items).
for r in start_requests:
yield r
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)
class FirstvetDownloaderMiddleware:
# Not all methods need to be defined. If a method is not defined,
# scrapy acts as if the downloader middleware does not modify the
# passed objects.
@classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
# Called for each request that goes through the downloader
# middleware.
# Must either:
# - return None: continue processing this request
# - or return a Response object
# - or return a Request object
# - or raise IgnoreRequest: process_exception() methods of
# installed downloader middleware will be called
return None
def process_response(self, request, response, spider):
# Called with the response returned from the downloader.
# Must either;
# - return a Response object
# - return a Request object
# - or raise IgnoreRequest
return response
def process_exception(self, request, exception, spider):
# Called when a download handler or a process_request()
# (from other downloader middleware) raises an exception.
# Must either:
# - return None: continue processing this exception
# - return a Response object: stops process_exception() chain
# - return a Request object: stops process_exception() chain
pass
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
class FirstvetPipeline:
def process_item(self, item, spider):
return item
# Scrapy settings for firstvet project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# https://docs.scrapy.org/en/latest/topics/settings.html
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'firstvet'
SPIDER_MODULES = ['firstvet.spiders']
NEWSPIDER_MODULE = 'firstvet.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'firstvet (+http://www.yourdomain.com)'
# Obey robots.txt rules
ROBOTSTXT_OBEY = True
# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16
# Disable cookies (enabled by default)
#COOKIES_ENABLED = False
# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False
# Override the default request headers:
#DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
#}
# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# 'firstvet.middlewares.FirstvetSpiderMiddleware': 543,
#}
# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
# 'firstvet.middlewares.FirstvetDownloaderMiddleware': 543,
#}
# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
#}
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
#ITEM_PIPELINES = {
# 'firstvet.pipelines.FirstvetPipeline': 300,
#}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False
# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.
import scrapy
class FirstvetspiderSpider(scrapy.Spider):
name = 'firstvetspider'
allowed_domains = ['firstvet.com']
start_urls = ['https://firstvet.com/us/articles/ringworm-in-dogs/']
def parse(self, response):
info_ringworm = response.xpath('//p/text()').extract()
yield {'Info': info_ringworm}
print(info_ringworm)
\ No newline at end of file
[
{"Info": ["Does your pet have ringworm? What can you do to help? Keep reading to learn about the different types and treatments of ringworm in cats and dogs.", "Book a video consultation with an experienced veterinarian within minutes.", "Ringworm is not a worm! Actually, it\u2019s an infection caused by a type of fungus. These are also called dermatophytes. A \u201cringworm\u201d infection is also known as \u201cdermatophytosis\u201d, and it can infect many animals including dogs, cats, and people. Several species of fungi can cause infection in the superficial layers of skin, and also hair and nails.", "The most common dermatophytes causing infection in dogs and cats:", "Ringworm tends to affect the young, old, and immunocompromised. A healthy adult animal may come into contact with these organisms without becoming infected by them. Most pets become infected through contact with other animals. It is not uncommon to see dermatophytosis in puppies and kittens, rescue and shelter pets or overcrowding situations, as well as hunting dogs or animals in warm environments. Animals that are under stress, malnourished, or harboring an underlying disease may be more likely to become infected. Interestingly, cats with FIV or Feline Leukemia are ", "more susceptible to dermatophytosis.", "Dermatophytosis is a zoonotic infection, meaning humans can become infected by contact with infected animals. The name \u2018ringworm\u2019 comes from its red, round appearance surrounded by a scaly ring (on human skin).", "A combination of moisture on the skin, fungal spores, and microtrauma to the superficial layers of the skin can cause a lesion. The severity of lesions is correlated with immune response. There are no \u201cmore virulent\u201d or \u201cless virulent\u201d strains, the infection is dictated by the host\u2019s immune system. Many things can cause micro-abrasions to the skin, such as grooming and bathing, fleas, and mites.", "Direct contact is the main mode of transmission of ringworm among dogs and in between dogs and other animals. Transmission often happens when a dog is in contact with the infected animal or any contaminated object like a carpet, food bowl, or bedding. Infected animals spread fungal spores into the environment when they shed off infected hair. Fungal spores can stay viable for up to 18 months.", "However, contact alone is not enough to cause an infection in dogs and humans. Host factors like immunity, age, health condition, nutrition, and grooming behavior can influence and affect the risk of infection even with direct contact with the fungal spores. Also, infected animals that have recovered can develop some degree of resistance against dermatophytes that protect them against reinfection for a short time.", "Dermatophytosis in dogs typically causes hair loss and itchiness. Lesions are often seen in bald patches, and the skin can become scaly and produce dandruff. The skin can also become darker and occasionally red from inflammation. Secondary bacterial infection may occur on the affected parts of the skin and pustular nodules may start to develop.", "The commonly affected parts of the dog\u2019s body are the feet, face, ears, and tail, as these are the ones that come in contact with various objects in the environment or other animals. The nails and nailbeds can also become infected, which can result in ", ", ", ", or brittle nails in dogs. Redness and darkly pigmented skin are often seen in dogs with ringworm infection on their nails and nailbeds.", "Dogs can become carriers of dermatophytes, and not show any signs even if they are carrying the fungal organism. Asymptomatic carriers can still transfer the infection to other animals and humans through contact.", "Diagnosis by your veterinarian may be immediate or take some time for testing through a laboratory. Tests for ringworm include:", "Treatment requires persistent and appropriate medication, time, monitoring, and patience. Or doing absolutely nothing at all!", "Since ringworm is primarily transmitted through direct contact, isolation of the infected animal and daily cleaning of the house and objects that might have come in contact with an infected animal can help prevent transmission and control the spread of ringworm. Supplements that help improve your dog\u2019s immune system and general health can help prevent infection even if there\u2019s contact with infectious spores.", "Recently, a vaccine is being studied that can offer protection against some species of dermatophytes that cause ringworm in dogs. Having your dog vaccinated can help offer protection but does not eliminate the risk entirely, since other fungal species can cause skin infection in dogs.", "Click ", " to schedule a video consult to speak to one of our vets. You can also download the FirstVet app from the Apple App Store and Google Play Stores.", "Crystals in your dog\u2019s urine (crystalluria) are formed when there is an excessive amount (oversaturation) of various min...", "\nRead full article\n", "An ectopic ureter is a congenital condition, which means that the anatomical defect is already present at birth. Affecte...", "\nRead full article\n", "In many ways, we share a lot of similarities with our canine buddies in terms of anatomy and physiology. Canines also su...", "\nRead full article\n", "Book a video consultation with an experienced veterinarian within minutes.", "Video call a licensed vet to get expert advice. Open 24 hours a day, 365 days a year."]}
]
\ No newline at end of file
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# https://scrapyd.readthedocs.io/en/latest/deploy.html
[settings]
default = firstvet.settings
[deploy]
#url = http://localhost:6800/
project = firstvet
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment