Super simple and basic scrapyd web interface

As I use scrapy in crawling sites data to be used in b-kam.com, I found it so silly to use console each time to run/stop spider. So, I decided to take a couple of hours to write this simple HTML and Javascript code to manage (start / stop) scrapyd spiders / jobs.

I think it should be developed later to use PHP & MySQL and store some details in database.

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
	<head>
		<title>Simple scrapyd web manager</title>
		<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script>
		<script type="text/javascript">
			// Scrapyd URL, i.e. http://localhost:6800/
			var scrapyd_url = 'http://localhost:6800/';
			// Your scrapyd project name
			var project_name = 'bkam';

			var all_spiders = new Array();
			var running_spiders = new Array();
			var finished_spiders = new Array();

			$(document).ready(function() {
				$(".start_spider").live('click', function() {
					spider_name = $(this).attr('rel');
					$.post(scrapyd_url + 'schedule.json', {
						project : project_name,
						spider : spider_name
					});

				});
				
				$(".stop_spider").live('click', function() {
					job_id = $(this).attr('rel');
					$.post(scrapyd_url + 'cancel.json', {
						project : project_name,
						job : job_id
					});

				});

				function listjobs() {
					$.getJSON(scrapyd_url + 'listjobs.json?project=' + project_name, function(data) {
						var finished_items = [];
						$.each(data.finished, function(key, val) {
							finished_spiders.push(val.spider);
							finished_items.push('<li>' + val.spider + '&nbsp;&nbsp;<a href="' + scrapyd_url + 'logs/' + project_name + '/' + val.spider + '/' + val.id + '.log">log</a></li>');
						});
						$('#finished_spiders').append(finished_items.join(''));

						if (data.running.length > 0) {
							var running_items = [];
							$.each(data.running, function(key, val) {
								running_spiders.push(val.spider);
								running_items.push('<li>' + val.spider + '&nbsp;&nbsp;<a href="' + scrapyd_url + 'logs/' + project_name + '/' + val.spider + '/' + val.id + '.log">log</a>&nbsp;&nbsp|&nbsp;&nbsp<a href="#" class="stop_spider" rel="' + val.id + '">Stop</a></li>');
							});
							$('#running_spiders').append(running_items.join(''));
						} else {
							$('#running_spiders').append('<li>No Running spiders</li>');
						}

					});
				}

				listjobs();

				// Get spiders list
				$.getJSON(scrapyd_url + 'listspiders.json?project=' + project_name, function(data) {
					var items = [];
					//console.log(data);
					$.each(data.spiders, function(key, val) {
						items.push('<li id="' + val + '">' + val + '&nbsp;&nbsp;<a href="#" class="start_spider" rel="'+ val + '">Start</a></li>');
					});

					$('#available_spiders').append(items.join(''));
				});

			});
		</script>
	</head>
	<body>
		<a href="#" onClick="window.location.reload">refresh</a>
		<br />
		List of running spiders
		<ul id="running_spiders"></ul>
		List of finished spiders (First is latest)
		<ul id="finished_spiders"></ul>
		List of available spiders
		<ul id="available_spiders"></ul>
	</body>
</html>

1 Comment

  • thanks for such a great work, its rare to find scrapyd example on internet, I am trying to do same thing in asp.net but having problem passing project name and spider name, how we pass these parameters, any idea how to do it in c#?

    thanks

Leave a comment

 

WP-SpamFree by Pole Position Marketing