grabimage.c

/* grabimage.c -- Usage:  "grabimage www.somewhere.com/path/picture.jpg" 
	This demonstrates the use of the socket API in a *nix environment.
	NOTES are at the end */

#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/* a simple struct for returning data from a function */
typedef struct {
	struct in_addr addr;
	char *path;
} webpic;

void line(int sock, char *buf); 
webpic process_arg(char *arg);
void recieve(int sock, void *buf, int len); 
void transmit(int sock, char *msg); 

int main (int argc, char *argv[]) {
	int sock, ilen;
	webpic image;
	struct sockaddr_in connto;
	char message[1024], buffer[4096]={0}, *ptr;
	void *img_data;
	FILE *out;

	if (argc < 2) { puts("Grab what image?"); return -1;}
	
	image = process_arg(argv[1]);
	connto.sin_family=AF_INET;
	connto.sin_port=htons(80);
	connto.sin_addr.s_addr=image.addr.s_addr;
	memset(&(connto.sin_zero),0,8);
	
	/* now we have the details, open a socket and connect */
	sprintf(message, "%s", inet_ntoa(image.addr));	
	printf("Connecting to %s...",message);
	if ((sock=socket(PF_INET,SOCK_STREAM,0))==-1) perror("socket");
	if (connect(sock,(struct sockaddr*)&connto,sizeof(struct sockaddr))==-1) {
		perror("!connect"); 
		return -3;
	}
	printf("success:\n\n");	

	/* ask for the image (NOTE 2) */
	sprintf(message,"GET /%s HTTP/1.0\r\n\r\n",image.path);	
	transmit(sock,message);
	/* now read the header sent back (NOTE 3) */	
	while (buffer[0]!='\r') { 
		line(sock,buffer);  
		if (strncasecmp(buffer,"Content-Length:",15)==0) {
			sscanf(&buffer[16],"%d",&ilen);
		}	
		printf(" %s\n", buffer);
	}

	/* get the image */		
	img_data = malloc(ilen);
	recieve(sock,img_data,ilen);
	/* write the image to a file */
	if ((ptr = strrchr(image.path,'/'))==NULL) ptr=image.path;
	else ptr++;
	out = fopen(ptr,"wb");
	fwrite(img_data,ilen,1,out);
	printf("Image copied to %s\n\n",ptr);
	
	return 0;
}

void line(int sock, char *buf) {
	char ch;
	int i=0;
	while(recv(sock,&ch,1,0)) {
		if (ch=='\n') {
			buf[i] = '\0';
			return;
		}
		buf[i++]=ch;
	}
}
		
	
void recieve(int sock, void *buf, int len) {
	int done = 0, todo = len;
	while (todo>0) {
		if ((done=recv(sock,buf,todo,0))==-1) {
			perror("recv");
			exit (-5);
		}
		todo -= done;
		buf += done;
	}
}
	

void transmit(int sock, char *msg) {
	int done, todo=strlen(msg);
	while (todo>0) {
		if ((done=send(sock,msg,todo,0))==-1) {
			perror("send");
			exit (-4);
		}
		todo-=done;
		msg+=done;
	}
}


webpic process_arg(char *arg) {
	webpic data;
	struct hostent *info;
	struct in_addr *ptr;
	
	/* seperate image path from hostname */
	data.path = strchr(arg,'/');
	data.path[0] = 0;
	data.path++;
	/* get host address using the host name */
	if ((info=gethostbyname(arg))==NULL) {
		printf("gethostbyname fail: %d\n",h_errno);
		exit (-2);
	}
	/* copy this address (NOTE 1) */
	ptr = (struct in_addr*)(info->h_addr);
	data.addr.s_addr = ptr->s_addr;
	return data;	
}

/*  NOTES

1) gethostbyname() from netdb.h returns a struct hostent by DNS lookup.  
   This includes a list of address for the host, "char **h_addr_list".  
   h_addr is #defined as h_addr_list[0], the first (possibly only) address in the list.  
   To copy this address to an unsigned 32-bit integer (the content of a struct in_addr), 
   we need the intermediary *ptr in process_arg().

2) since send and recv are not guaranteed to complete, we use a couple of simple functions 
   ("transmit() and "recieve()") to loop until a specific number of bytes are completed.  
   line() also loops, one character at a time until the newline, so we can read the text HTTP header.

3) In RFC 2616 (Hypertext Transfer Protocol -- HTTP/1.1) it says that an HTTP header shall contain 
   "an empty line (i.e., a line with nothing preceding the CRLF) indicating the end of the header fields", 
   so we are looking for a line beginning with a Carriage Return.  
   Of course, we do not check the start-line, which should be "HTTP/1.1 200 OK", but that seems like a 
   reasonably safe assumption here.  There is a chance that no "content-length" will be given, which will 
   be noticable in the output and cause execution to fail.
   Also beware that strncasestr is a GNU specific C extension that may not be available everywhere.

*/