CS:APP Proxy Lab
阅读 csapp 3e 第十、十一与十二章。完成 Proxy 实验,编写 proxy.c 代码,完成一个简易的代理服务器。
代码见此。
终端 make 之后,通过下面命令调用评分程序,
./driver.sh
多线程实现
每 accept 一个连接请求就分配一个线程(detach),在do_it函数中完成代理转发的工作。
int main(int argc, char **argv)
{
int listenfd, *connfdp;
socklen_t clientlen;
struct sockaddr_storage clinetaddr;
pthread_t tid;
if (argc != 2)
{
printf("Usage: %s <port_number>\n", argv[0]);
exit(0);
}
init_cache();
// argv[1] is listen port
listenfd = Open_listenfd(argv[1]);
clientlen = sizeof(struct sockaddr_storage);
while (1)
{
connfdp = Malloc(sizeof(int));
*connfdp = Accept(listenfd, (SA *)&clinetaddr, &clientlen);
Pthread_create(&tid, NULL, thread, connfdp);
}
destroy_cache();
return 0;
}
void *thread(void *vargp)
{
Pthread_detach(pthread_self());
int connfd = *((int *)vargp);
Free(vargp);
// do work
do_it(connfd);
Close(connfd);
return NULL;
}
“代理”就是把客户端的请求进行一定的修改(修改请求头的相关字段)后,转发给服务器,再从服务器那里接收请求并返回给客户端。
完成这个lab 时,我的 proxy 代码并没有完全转发客户端请求,除了基本的请求头特殊处理以外,请求头部分字段由 proxy 自行填充(舍弃了客户端其他请求头的)。为了获取 response body 的长度,要额外读取 Content-length 字段。
void do_it(int fd)
{
char buf[MAXLINE], method[MAXLINE], oldUrl[MAXLINE], version[MAXLINE];
char hostName[MAXLINE], port[MAXLINE], newUrl[MAXLINE];
char req[MAXLINE], cacheBuf[MAXLINE];
int fd2server;
rio_t rio, rio2server;
/* client -> proxy starts */
Rio_readinitb(&rio, fd);
Rio_readlineb(&rio, buf, MAXLINE);
sscanf(buf, "%s %s %s", method, oldUrl, version);
if (strcasecmp(method, "GET"))
{
proxyerror(fd, method, "501", "Not Implemented",
"Proxy does not implement this method");
return;
}
// store original method
strcpy(req, buf);
int http_phead_len = strlen(http_phead);
int content_len = 0;
char *colon = strstr(oldUrl + http_phead_len, ":");
char *forwardSlash = strstr(oldUrl + http_phead_len, "/");
extract_hostname(hostName, oldUrl + http_phead_len);
extract_portnum(port, colon);
extract_newUri(newUrl, forwardSlash);
// browser sends any additional req heads
Rio_readlineb(&rio, buf, MAXLINE);
while (strcmp(buf, "\r\n"))
{ // read until \r\n
// actually ignore
// Rio_writen(fd2server, buf, strlen(buf));
Rio_readlineb(&rio, buf, MAXLINE);
}
/* client -> proxy ends */
/* client <- proxy try to use cache*/
int hashVal = getHash(req);
if (use_cache(hashVal, req, fd))
{
return;
}
/* client <- proxy ends */
/* proxy -> server starts */
fd2server = Open_clientfd(hostName, port);
Rio_readinitb(&rio2server, fd2server);
sprintf(buf, "%s %s %s\r\n", method, newUrl, "HTTP/1.0");
Rio_writen(fd2server, buf, strlen(buf));
sprintf(buf, "Host: %s\r\n", hostName);
Rio_writen(fd2server, buf, strlen(buf));
sprintf(buf, "%s\r\n", user_agent_hdr);
Rio_writen(fd2server, buf, strlen(buf));
sprintf(buf, "%s\r\n", connection_hdr);
Rio_writen(fd2server, buf, strlen(buf));
sprintf(buf, "%s\r\n", proxy_conn_hdr);
Rio_writen(fd2server, buf, strlen(buf));
sprintf(buf, "\r\n");
Rio_writen(fd2server, buf, strlen(buf));
/* proxy -> server ends */
// client <- proxy <- server Updates Cache
int posOfRespHead = 0;
Rio_readlineb(&rio2server, buf, MAXLINE);
posOfRespHead = write2buf(cacheBuf, posOfRespHead, buf);
while (strcmp(buf, "\r\n"))
{
if (strncmp(buf, contentlen_head, strlen(contentlen_head)) == 0)
{
sscanf(buf, "Content-length: %d", &content_len);
}
Rio_writen(fd, buf, strlen(buf));
Rio_readlineb(&rio2server, buf, MAXLINE);
posOfRespHead = write2buf(cacheBuf, posOfRespHead, buf);
}
Rio_writen(fd, buf, strlen(buf)); // \r\n
// reponse content
Rio_readnb(&rio2server, buf, content_len);
Rio_writen(fd, buf, content_len);
insert_cache(hashVal, req, strlen(req) + 1, cacheBuf, posOfRespHead + 1, buf, content_len);
close(fd2server);
}
并发读写缓存
用实现双链表加哈希,实现一个LRU的缓存。针对 Hash Collision 采取的策略是比较被哈希的”请求”字符串,即 Cache_Element 的 req 字段,如果冲突则覆盖(没有用线性探测、平方探测、重哈希或生成链表的方法)。
#define MAX_ELEMENTS 10
#define HEAD 0
#define TAIL 11
struct Cache_Element
{
u_int16_t prev, next;
char *req;
char *respHead;
char *respBody;
int respBodySz;
};
struct
{
struct Cache_Element ele[MAX_ELEMENTS + 2];
int cached_cnt;
sem_t mutex;
} Cache;
每次读写命中缓存则要更新缓存单元在链表中的未知,无命中则添加新缓存。由于用全局变量 Cache 代表,所以必须要用 PV 原语进行临界区读写保护。
// try to use the cached data at hashVal position
// if cache hit, write cached data into fd
int use_cache(int hashVal, char* key, int fd)
{
int ret = 0;
P(&Cache.mutex);
if (Cache.ele[hashVal].req != NULL &&
strcmp(Cache.ele[hashVal].req, key) == 0)
{
// use cache successfully
ret = 1;
toBeleast(hashVal);
// send data to client
char* cached = Cache.ele[hashVal].respHead;
char* end = cached;
while((end = strstr(cached, "\r\n")) != NULL) {
int wrBytes = end - cached;
wrBytes += 2;
Rio_writen(fd, cached, wrBytes);
cached = end + 2;
}
Rio_writen(fd, Cache.ele[hashVal].respBody, Cache.ele[hashVal].respBodySz);
}
V(&Cache.mutex);
return ret;
}
void insert_cache(int hashVal, char* req, int reqLen, char* respHead, int headLen, char* respBody, int bodySz) {
if(headLen <= 0 || bodySz > MAX_OBJECT_SIZE) {
return;
}
P(&Cache.mutex);
if(Cache.ele[hashVal].req != NULL || Cache.cached_cnt == MAX_ELEMENTS) { // overwrite cached elements
free(Cache.ele[hashVal].req);
Cache.ele[hashVal].req = malloc(reqLen);
memcpy(Cache.ele[hashVal].req, req, reqLen);
free(Cache.ele[hashVal].respHead);
Cache.ele[hashVal].respHead = malloc(headLen);
memcpy(Cache.ele[hashVal].respHead, respHead, headLen);
free(Cache.ele[hashVal].respBody);
Cache.ele[hashVal].respBody = malloc(bodySz);
memcpy(Cache.ele[hashVal].respBody, respBody, bodySz);
Cache.ele[hashVal].respBodySz = bodySz;
toBeleast(hashVal);
} else { // add new elements
Cache.ele[hashVal].req = malloc(reqLen);
memcpy(Cache.ele[hashVal].req, req, reqLen);
Cache.ele[hashVal].respHead = malloc(headLen);
memcpy(Cache.ele[hashVal].respHead, respHead, headLen);
Cache.ele[hashVal].respBody = malloc(bodySz);
memcpy(Cache.ele[hashVal].respBody, respBody, bodySz);
Cache.ele[hashVal].respBodySz = bodySz;
// add to head
Cache.ele[hashVal].next = Cache.ele[HEAD].next;
Cache.ele[hashVal].prev = HEAD;
Cache.ele[HEAD].next = hashVal;
Cache.ele[Cache.ele[hashVal].next].prev = hashVal;
++Cache.cached_cnt;
}
V(&Cache.mutex);
}
需要注意的是,用二进制数据读写函数接口操作字符串数据时,需要注意 \0 所占的空间,这在操作字符串数据时是尤其需要注意的。每次覆盖缓存时,我选择 free 再 malloc ,但其实可以固定分配空间大小,直接在空间上写覆盖即可。
Conclusions
最后三章相当于把书本前面的内容串起来,进行应用上的总结,即如何完成程序间通信(本地的或远程的)。系统级 I/O 中文件的概念与 Descriptor/OpenFile/V-node table,网络编程中的套接字接口,并发编程三种模型以及同步问题(生产者消费者与读写问题等),线程安全函数与可重入函数的概念,这些都可以进一步钻研(尤其是并发的同步问题)。