This probably won't compile without a few extra headers (depends on your OS). I just assume that the list of files to open is in a file called in.dat and that the output will go into a file called out.dat and that you are working out of a directory called backup. Additionally, winzip would use a _ instead of a : in filenames so this program does the same replacement.
You can download the backup zip file for the site
here
// deadlinks.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
void str_remove(char *str, char a)
{
int offset = 0;
char *rep=str;
while(*rep)
{
if(*(rep+offset)==a)
offset++;
*rep=*(rep+offset);
rep++;
}
}
void str_replace(char *str, char a, char b)
{
char *rep=str; // replace _ with : for page titles
while(*rep)
{
if(*rep==a)
*rep=b;
rep++;
}
}
struct ref_page {
ref_page() { next=NULL; }
ref_page(const char *title)
{
next=NULL;
strcpy(page_title,title);
str_replace(page_title,'_',':');
}
char page_title[1024];
ref_page *next;
};
const int screen_output=1;
struct wiki_link
{
wiki_link(const char *name, const char *page)
{
strcpy(entry_name,name);
str_remove(entry_name,'~');
referring_page = new ref_page(page);
char filename[2048];
sprintf(filename,"c:/backup/%s",entry_name);
if (screen_output) printf(" ** looking for file '%s'...",filename);
str_replace(filename+2,':','_'); // replace : with _ for filenames
FILE *check = fopen(filename,"r");
if(check!=NULL)
{
exists=true;
fclose(check);
if (screen_output) printf(" foundn");
}
else
{
exists=false;
if (screen_output) printf(" not foundn");
}
next = NULL;
refcount=1;
}
int refcount;
char entry_name[1024];
bool exists;
ref_page *referring_page;
wiki_link *next;
void addReferringPage(const char *filename);
};
void wiki_link::addReferringPage(const char *filename)
{
ref_page *tmp = referring_page;
ref_page check_ref(filename);
while(tmp)
{
if(!strcmp(check_ref.page_title,tmp->page_title))
return; // already exists
tmp=tmp->next;
}
// does not exist
tmp = new ref_page(filename);
tmp->next=referring_page;
referring_page=tmp;
refcount++;
}
wiki_link *gHead = NULL;
wiki_link *gFindReference(const char *link_name)
{
if(!gHead)
return NULL;
wiki_link *cur=gHead;
while(cur)
{
if(!strcmp(link_name,cur->entry_name))
return cur;
cur=cur->next;
}
return NULL;
}
void gAddReference(char *link_name, const char *filename)
{
if(link_name[0]==0)
return;
// modify link_name so that it does not include any '
str_remove(link_name,'\');
wiki_link *cur=gFindReference(link_name);
if (!cur)
{
cur=new wiki_link(link_name,filename);
cur->next=gHead;
gHead=cur;
}
else
cur->addReferringPage(filename);
}
int _tmain(int argc, _TCHAR* argv[])
{
FILE *p = fopen("c:/backup/in.dat","r");
FILE *q = fopen("c:/backup/out.dat","w");
FILE *s = fopen("c:/backup/deadend.dat","w");
char page_title[1024];
while(fgets(page_title,1024,p))
{
page_title[strlen(page_title)-1]=0;
if (screen_output) printf("Reading [%s]...n",page_title);
char filename[2048];
sprintf(filename,"c:/backup/%s",page_title);
char link_name[1024]="";
int a;
int curchar=0;
bool reading=false;
FILE *r=fopen(filename,"r");
if(!r)
{
if (screen_output) printf("error: could not open %sn",filename);
}
else
{
bool foundlink = false;
while((a=fgetc(r))!=-1)
{
//printf("%c",a);
if(a=='[')
{
reading=true;
foundlink=true;
}
else if (reading==true)
{
if (a=='[')
{
printf("Error in file %s (link %s)",page_title,link_name);
exit(0);
}
else if (a==']' || a=='|' || a=='#')
{
gAddReference(link_name,page_title);
reading=false;
curchar=0;
}
else
{
link_name[curchar]=a;
link_name[curchar+1]=0;
curchar++;
if(curchar==1023)
{
printf("link %s in file %s too long!",link_name,page_title);
exit(0);
}
}
}
}
if(!foundlink)
fprintf(s,"[%s]n",page_title);
fclose(r);
}
}
// output phase
wiki_link *cur = gHead;
FILE *t = fopen("c:/backup/missing_ranked.dat","w");
printf("Output missing links.n");
int brokenCount=0;
while(cur)
{
if(!cur->exists && !strchr(cur->entry_name,'@') && !strstr(cur->entry_name,"http:"))
{
fprintf(q,"* [%s]: ",cur->entry_name);
fprintf(t,"* %02d [%s]: ",cur->refcount,cur->entry_name);
ref_page *page=cur->referring_page;
while(page)
{
fprintf(q,"[%s]. ",page->page_title);
fprintf(t,"[%s]. ",page->page_title);
page=page->next;
}
fprintf(q,"n");
fprintf(t,"n");
brokenCount++;
}
cur=cur->next;
}
fprintf(q,"''There are currently %d Broken Links listed!''n",brokenCount);
for(int c='A';c<='Z';c++)
fprintf(q,"* [%c !!n",c);
fclose(q);
q=fopen("c:/backup/everything.dat","w");
cur = gHead;
printf("Indexing everything.n");
while(cur)
{
// if(!cur->exists)
{
fprintf(q,"* (%03d references) [%s]: ",cur->refcount,cur->entry_name);
ref_page *page=cur->referring_page;
while(page)
{
fprintf(q,"[%s]. ",page->page_title);
page=page->next;
}
fprintf(q,"n");
}
cur=cur->next;
}
// recheck in.dat for orphaned pages
fclose(p);
fclose(q);
p = fopen("c:/backup/in.dat","r");
q = fopen("c:/backup/orphan.dat","w");
printf("Checking for orphans.n");
while(fgets(page_title,1024,p))
{
page_title[strlen(page_title)-1]=0;
ref_page check_ref(page_title);
cur = gHead;
bool found=false;
while(cur && !found)
{
if(!strcmp(check_ref.page_title,cur->entry_name))
found=true;
cur=cur->next;
}
if(!found)
fprintf(q,"[%s]n",check_ref.page_title);
}
fcloseall();
return 0;
}
Comments (0)
You don't have permission to comment on this page.