AmigaEd wrote:
Hello,
I'm trying to learn C and I am wondering if someone out there might have a very simple example of some C source code that will grab a web page and display it or even just save it as a file.
I've looked at a few programs on aminet, but I can't seem to make sense out of them.
Thank you,
AmigaEd
#include <sys/types.h>
#include <sys/socket.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main ( void ) {
int socket_handle ;
struct sockaddr_in socket_detials ;
char * input_buffer;
char * httpget = "GET HTTP 1.1 / \r\r" ;
input_buffer = malloc(20000);
socket_handle = socket ( AF_INET, SOCK_STREAM, 0) ;
socket_detials.sin_family = AF_INET ;
socket_detials.sin_addr.s_addr=inet_addr("68.90.68.66");
socket_detials.sin_port = htons(80);
connect (socket_handle,(struct sockaddr*)&socket_detials, sizeof ( struct sockaddr));
send ( socket_handle , httpget, strlen(httpget), 0 ) ;
recv ( socket_handle , input_buffer , 20000, 0 ) ;
printf ( "%s\n", input_buffer ) ;
return 0 ;
}
by koaftder on 2006/1/20 22:05:10
This isnt too bad achttp://www.google.com/search?hl=en&q=software+hut&btnG=Google+Searchtually. (http://achttp://www.google.com/search?hl=en&q=software+hut&btnG=Google+Searchtually.) Learn your tcp library. All you have to do is issue one simple string. something like "GET HTTP 1.0 /"
AmigaEd wrote:Quoteby koaftder on 2006/1/20 22:05:10
This isnt too bad achttp://www.google.com/search?hl=en&q=software+hut&btnG=Google+Searchtually. (http://achttp://www.google.com/search?hl=en&q=software+hut&btnG=Google+Searchtually.) Learn your tcp library. All you have to do is issue one simple string. something like "GET HTTP 1.0 /"
Hi koaftder,
The link you posted seems to take me to a bunch of links to software hut on google.
Can you please post the link again or point me to the correct site.
Thank you,
AmigaEd
koaftder wrote:
you may want to check out the simple socket library from http://mysite.verizon.net/astronaut/ssl/
It supports a lot of os's and i seem to remember saying it supported amiga... Socket programming with training wheels
GET / HTTP/1.0\r\n
Host: amiga.org\r\n\r\n
Without supplying the "Host: amiga.org"-line, the webserver wont know what site you are asking for and will return some default page - try entering the ip-adress (http://68.90.68.66/) for amiga.org in a browser and see what happens then.
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main ( void ) {
int socket_handle ;
struct sockaddr_in socket_detials ;
char * input_buffer;
char * httpget = "GET HTTP 1.1 / \x0D\x0A\n\x0D\x0A\n" ;
input_buffer = malloc(20000);
socket_handle = socket ( AF_INET, SOCK_STREAM, 0) ;
socket_detials.sin_family = AF_INET ;
socket_detials.sin_addr.s_addr=inet_addr("68.90.68.66");
socket_detials.sin_port = htons(80);
bzero ( &(socket_detials.sin_zero), 8 ) ;
if ( connect (socket_handle,(struct sockaddr*)&socket_detials, sizeof ( struct sockaddr)) == -1 ){
printf ( "Couldnt connect to server\n" ) ;
}
printf ( "Sending %d bytes\n", send ( socket_handle , httpget, strlen(httpget), 0 ) ) ;
printf ( "Received %d bytes\n", recv ( socket_handle , input_buffer , 20000, 0 ) ) ;
printf ( "%s\n", input_buffer ) ;
return 0 ;
}
koft@macdev:~$ ./socket
Sending 21 bytes
Received 658 bytes
HTTP/1.1 400 Bad Request
Date: Sat, 21 Jan 2006 04:37:48 GMT
Server: Apache/1.3.34 (Unix) mod_auth_passthrough/1.8 mod_log_bytes/1.2 mod_bwlimited/1.4 PHP/4.4.1 FrontPage/5.0.2.2635 mod_ssl/2.8.25 OpenSSL/0.9.7a
Connection: close
Content-Type: text/html; charset=iso-8859-1
400 Bad Request
Bad Request
Your browser sent a request that this server could not understand.
The request line contained invalid characters following the protocol string.
Apache/1.3.34 Server at cpanel1.betterbox.net Port 80
koft@macdev:~$
patrik wrote:
@koaftder:
The reason why you are getting a "400 Bad Request" response is because you are specifying that your client is a HTTP/1.1 client, which requires you to supply the "Host: something.com" header-line, which is optional in HTTP/1.0, but required for virtual hosts to work, so it is definately recommended to supply it anyhow.
With a simple client, there is no advantage in telling the server that your client supports HTTP/1.1 instead of HTTP/1.0, rather disadvantages as then the server is allowed to send you dynamic pages as chunks using the so called "chunked transfer-coding".
/Patrik
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main ( void ) {
int socket_handle ;
struct sockaddr_in socket_detials ;
char * input_buffer;
char * httpget =
"GET / HTTP/1.1\r\n"
"Host: www.amiga.org\r\n"
"User-Agent: Mozilla/5.0 (X11; U; Linux ppc; en-US; rv:1.7.10) Gecko/20050825 Firefox/1.0.6 (Ubuntu package 1.0.6)\r\n"
"Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r\n"
"Accept-Language: en-us,en;q=0.5\r\n"
"Accept-Encoding: gzip,deflate\r\n"
"Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n"
"Keep-Alive: 300\r\n"
"Connection: keep-alive\r\n"
"Referer: http://www.amiga.org/gallery/index.php?n=896=33\r\n"
"Cookie: PHPSESSID=442105507b7dca6d4042a641fc132c8f; AO_Session=442105507b7dca6d4042a641fc132c8f\r\n"
"Cache-Control: max-age=0\r\n"
"\r\n";
input_buffer = malloc(20000);
socket_handle = socket ( AF_INET, SOCK_STREAM, 0) ;
socket_detials.sin_family = AF_INET ;
socket_detials.sin_addr.s_addr=inet_addr("68.90.68.66");
socket_detials.sin_port = htons(80);
bzero ( &(socket_detials.sin_zero), 8 ) ;
if ( connect (socket_handle,(struct sockaddr*)&socket_detials, sizeof ( struct sockaddr)) == -1 ){
printf ( "Couldnt connect to server\n" ) ;
}
printf ( "Sending %d bytes\n", send ( socket_handle , httpget, strlen(httpget), 0 ) ) ;
printf ( "Received %d bytes\n", recv ( socket_handle , input_buffer , 20000, 0 ) ) ;
printf ( "%s\n", input_buffer ) ;
return 0 ;
}
koft@macdev:~$ ./socket
Sending 612 bytes
Received 1460 bytes
HTTP/1.1 200 OK
Date: Sat, 21 Jan 2006 05:03:13 GMT
Server: Apache/1.3.34 (Unix) mod_auth_passthrough/1.8 mod_log_bytes/1.2 mod_bwlimited/1.4 PHP/4.4.1 FrontPage/5.0.2.2635 mod_ssl/2.8.25 OpenSSL/0.9.7a
X-Powered-By: PHP/4.4.1
Set-Cookie: PHPSESSID=442105507b7dca6d4042a641fc132c8f; path=/
Expires: Mon, 26 Jul 1997 05:00:00 GMT
Cache-Control: private, no-cache
Pragma: no-cache
Set-Cookie: AO_Session=442105507b7dca6d4042a641fc132c8f; expires=Saturday, 28-Jan-06 05:03:14 GMT; path=/
Keep-Alive: timeout=15, max=100
Connection: Keep-Alive
Transfer-Encoding: chunked
Content-Type: text/html; charset=ISO-8859-1
d19
koft@macdev:~$
by ChaosLord on 2006/1/20 23:32:26
The word "Amiga" does not exist on that site.
;/*
gcc -noixemul -Wall -O2 httpget.c -o httpget
quit
*/
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <netinet/in.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#ifndef STDOUT_FILENO
#define STDOUT_FILENO 1
#endif
#include <proto/exec.h>
#ifdef __SASC
#include <proto/socket.h>
#endif
#include <clib/alib_protos.h>
#define RECV_BUFSIZE 16384
struct MinList *http_get(const char *host, int port, const char *path);
struct MinList *dorecv(int s);
void dumplist(struct MinList *list);
void freelist(struct MinList *list);
struct datanode
{
struct MinNode node;
int len;
};
int main(void)
{
struct MinList *res;
res = http_get("www.amiga.org", 80, "/");
if (res)
{
dumplist(res);
freelist(res);
}
else
{
fprintf(stderr, "http_get failed\n");
}
return 0;
}
/*
FUNCTION
http_get - HTTP GET a location off a web server
struct MinList *http_get(const char *host, int port, const char *path)
INPUT
The http-request must be split into valid components for this function.
host: hostname
port: port number
path: the path of object to http get. spaces and special chars should
be encoded to %<hex>
RESULT
struct MinList *
NULL if error, else list filled with 'struct datanode' nodes. Note
that the output includes the full header returned by the server, and
it's left for the caller to parse it (separate header and actual
data).
NOTE
This function blocks, and it can potentially take hours to complete;
for example if the file is long, or the connection is very slow.
*/
struct MinList *http_get(const char *host, int port, const char *path)
{
struct MinList *list = NULL;
int s;
if (host && host[0] && port > 0 && path)
{
struct sockaddr_in saddr;
struct hostent *he;
bzero(&saddr, sizeof(saddr));
he = gethostbyname(host);
if (he)
{
memcpy(&saddr.sin_addr, he->h_addr, he->h_length);
saddr.sin_family = he->h_addrtype;
}
else
{
saddr.sin_addr.s_addr = inet_addr(host);
saddr.sin_family = AF_INET;
}
if (saddr.sin_addr.s_addr != INADDR_NONE)
{
s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (s != -1)
{
saddr.sin_port = htons(port);
if (connect(s, (struct sockaddr *) &saddr, sizeof(saddr)) != -1)
{
const char *fmt =
"GET %s HTTP/1.0\r\n"
"Host: %s\r\n"
"User-Agent: httpget_test_app/1.0\r\n"
"\r\n";
char *req;
if (path[0] == '\0')
{
path = "/";
}
req = malloc(strlen(fmt) +
strlen(path) - 2 +
strlen(host) - 2 + 1);
if (req)
{
int reqlen;
sprintf(req, fmt, path, host);
reqlen = strlen(req);
if (send(s, req, reqlen, 0) == reqlen)
{
list = dorecv(s);
}
free(req);
}
close(s);
}
}
}
}
return list;
}
struct MinList *dorecv(int s)
{
struct MinList *list = NULL;
UBYTE *buf;
buf = malloc(RECV_BUFSIZE);
if (buf)
{
int ok = 0;
for (;;)
{
int actual;
actual = recv(s, buf, RECV_BUFSIZE, 0);
if (actual == -1)
{
/* error */
break;
}
else if (actual == 0)
{
/* eof */
ok = 1;
break;
}
else
{
struct datanode *node;
if (!list)
{
list = malloc(sizeof(*list));
if (!list)
{
break;
}
NewList((struct List *) list);
}
node = malloc(sizeof(*node) + actual);
if (!node)
{
break;
}
node->len = actual;
memcpy(node + 1, buf, actual);
AddTail((struct List *) list, (struct Node *) node);
}
}
if (!ok)
{
freelist(list);
list = NULL;
}
free(buf);
}
return list;
}
void dumplist(struct MinList *list)
{
if (list)
{
struct datanode *node;
fflush(stdout);
for (node = (APTR) list->mlh_Head;
node->node.mln_Succ;
node = (APTR) node->node.mln_Succ)
{
write(STDOUT_FILENO, node + 1, node->len);
}
fflush(stdout);
}
}
void freelist(struct MinList *list)
{
if (list)
{
struct datanode *node, *nextnode;
for (node = (APTR) list->mlh_Head;
(nextnode = (APTR) node->node.mln_Succ);
node = nextnode)
{
free(node);
}
free(list);
}
}
Piru wrote:
@koaftder
Bugs:
- You don't check if malloc() fails, but just crash if it does.
- You don't check if socket() fails, but just continue instead.
- bzero ( &(socket_detials.sin_zero), 8 ) ; is wrong. It assumes knowlege of the struct sockaddr_in, which can be different between platforms. Typecally it is 8 though, but there is no guarantee of this.
- You don't bail out if connect() fails, but just continue.
- You don't check if send() succeeds.
- You don't check how much data you manage to recv().
- You limit the recv size to 20000 bytes. If more data would be available you just truncate input.
- There is no guarantee single recv() will get all the input at once. You might get just the header for the 1st call, or part of the header. You should call recv() till -1 (error) or 0 (eof) is returned.
- You printf %s the input buffer, even though it is not '\0' terminated.
- Sending fixed cookies will not work. Esp PHPSESSID will just fail once the session id has expired.
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
int main ( void ) {
int socket_handle ;
struct sockaddr_in socket_detials ;
char * input_buffer;
char * pinput_buffer ;
ssize_t bytes_received ;
ssize_t bytes_sent ;
char * phttpget ;
char * httpget =
"GET / HTTP/1.0\r\n"
"Host: www.amiga.org\r\n"
"\r\n";
phttpget = httpget ;
bytes_sent = 0 ;
input_buffer = malloc(1024);
if ( input_buffer == NULL ) {
printf ( "Sorry, couldnt allocate memory for input buffer\n" );
return -1 ;
}
memset ( input_buffer, 0, 1024 ) ;
memset ( &socket_detials , 0 , sizeof(struct sockaddr_in) );
socket_handle = socket ( AF_INET, SOCK_STREAM, 0) ;
if ( socket_handle == -1 ) {
printf ( "Could not create socket\n" ) ;
return -1 ;
}
socket_detials.sin_family = AF_INET ;
socket_detials.sin_addr.s_addr=inet_addr("68.90.68.66");
socket_detials.sin_port = htons(80);
if ( connect (socket_handle,(struct sockaddr*)&socket_detials, sizeof ( struct sockaddr)) == -1 ){
printf ( "Couldnt connect to server\n" ) ;
return -1 ;
}
printf ( "Attempting to send %d bytes to server\n" , strlen ( httpget ) );
for(;;){
bytes_sent = send ( socket_handle , phttpget, strlen(phttpget), 0 ) ;
if ( bytes_sent == -1 ) {
printf ( "An error occured sending data\n" );
return -1 ;
}
if ( httpget+strlen(httpget) == phttpget )
break ;
phttpget += bytes_sent ;
}
for (;;) {
bytes_received = recv ( socket_handle , input_buffer , 1023, 0 ) ;
if ( bytes_received == -1 ) {
printf ( "An error occured during the receive procedure \n" ) ;
return 0 ;
}
if ( bytes_received == 0 )
break ;
pinput_buffer = input_buffer + bytes_received ;
*pinput_buffer = 0 ;
printf ( "%s" , input_buffer ) ;
}
printf ( "\nFinished receiving data\n" ) ;
return 0 ;
}
haha, i'm supprised you took the time to list all the unresponsible things i did. Since you took the time to review it, i'll take the time to fix it.
#include <exec/libraries.h> //added
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h> //added
struct Library *SocketBase = NULL; //added
int bailout(); //added
int main ( void )
{
int socket_handle ;
struct sockaddr_in socket_detials ;
char * input_buffer;
char * pinput_buffer ;
ssize_t bytes_received ;
ssize_t bytes_sent ;
char * phttpget ;
char * httpget =
"GET / HTTP/1.0\r\n"
"Host: www.amiga.org\r\n"
"\r\n";
phttpget = httpget ;
bytes_sent = 0 ;
input_buffer = malloc(1024);
if ( input_buffer == NULL )
{
printf ( "Sorry, couldnt allocate memory for input buffer\n" );
return bailout() ; // return -1; to return bailout();
}
SocketBase = OpenLibrary("bsdsocket.library", 2); // added
if(!SocketBase)
{
printf("Unable to open bsdsocket.library\n");
return(10);
}; // </added>
memset ( input_buffer, 0, 1024 ) ;
memset ( &socket_detials , 0 , sizeof(struct sockaddr_in) );
socket_handle = socket ( AF_INET, SOCK_STREAM, 0) ;
if ( socket_handle == -1 )
{
printf ( "Could not create socket\n" ) ;
return bailout() ; // return -1; to return bailout();
}
socket_detials.sin_family = AF_INET ;
socket_detials.sin_addr.s_addr=inet_addr("68.90.68.66");
socket_detials.sin_port = htons(80);
if ( connect (socket_handle,(struct sockaddr*)&socket_detials, sizeof ( struct sockaddr)) == -1 )
{
printf ( "Couldnt connect to server\n" ) ;
return bailout() ; // return -1; to return bailout();
}
printf ( "Attempting to send %d bytes to server\n" , strlen ( httpget ) );
for(;;)
{
bytes_sent = send ( socket_handle , phttpget, strlen(phttpget), 0 ) ;
if ( bytes_sent == -1 )
{
printf ( "An error occured sending data\n" );
return bailout() ; // return -1; to return bailout();
}
if ( httpget+strlen(httpget) == phttpget )
break ;
phttpget += bytes_sent ;
}
for (;;)
{
bytes_received = recv ( socket_handle , input_buffer , 1023, 0 ) ;
if ( bytes_received == -1 )
{
printf ( "An error occured during the receive procedure \n" ) ;
return bailout() ; // return 0; to return bailout();
}
if ( bytes_received == 0 )
break ;
pinput_buffer = input_buffer + bytes_received ;
*pinput_buffer = 0 ;
printf ( "%s" , input_buffer ) ;
}
if(SocketBase) // added
{
CloseLibrary(SocketBase);
SocketBase=NULL;
}; // </added>
printf ( "\nFinished receiving data\n" ) ;
return 0 ;
}
int bailout()
{
if(SocketBase)
{
CloseLibrary(SocketBase);
SocketBase=NULL;
};
return(-1);
};
16, Work:Code/network/amiga.org>gcc working.c -o working -noixemul -Inetinclude: -lsocket
working.c: In function `main':
working.c:36: warning: assignment makes pointer from integer without a cast
Nates shell.
16, Work:Code/network/amiga.org>
for (;;)
{
bytes_received = recv ( socket_handle , input_buffer , 1023, 0 ) ;
if ( bytes_received == -1 )
{
printf ( "An error occured during the receive procedure \n" ) ;
return bailout() ; // return 0; to return bailout();
}
if ( bytes_received == 0 )
break ;
pinput_buffer = input_buffer + bytes_received ;
*pinput_buffer = 0 ;
printf ( "%s" , input_buffer ) ;
}
It doesn't handle the response arriving in multiple parts correctly.@kvasir
.... It doesn't handle the response arriving in multiple parts correctly.
Another thing to look for is the inadvertent adding of ; after blocks. It's a bad habit you should learn away from.
Trying to get HTTP GET right with custom code is probably one of the most difficult tasks. Unless if for exercise you really should use libcurl.