This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

292 lines
6.7 KiB

  1. /* Notes */ /*{{{C}}}*//*{{{*/
  2. /*
  3. This program is GNU software, copyright 1997, 1998, 1999, 2000, 2001
  4. Michael Haardt <michael@moria.de>.
  5. This program is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by the
  7. Free Software Foundation; either version 2 of the License, or (at your
  8. option) any later version.
  9. This program is distributed in the hope that it will be useful, but
  10. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  12. for more details.
  13. You should have received a copy of the GNU General Public License along
  14. with this program. If not, write to the Free Software Foundation, Inc.,
  15. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  16. */
  17. /*}}}*/
  18. /* #includes */ /*{{{*/
  19. #undef _POSIX_SOURCE
  20. #define _POSIX_SOURCE 1
  21. #undef _POSIX_C_SOURCE
  22. #define _POSIX_C_SOURCE 2
  23. #include "config.h"
  24. #include <sys/types.h>
  25. #include <assert.h>
  26. #include <ctype.h>
  27. #include <errno.h>
  28. #include <limits.h>
  29. #include <locale.h>
  30. #ifdef HAVE_GETTEXT
  31. #include <libintl.h>
  32. #define _(String) gettext(String)
  33. #else
  34. #define _(String) String
  35. #endif
  36. #include <regex.h>
  37. #include <stdio.h>
  38. #include <stdlib.h>
  39. #include <string.h>
  40. #include "misc.h"
  41. #include "sentence.h"
  42. /*}}}*/
  43. static const char *abbreviations_de[]= /*{{{*/
  44. {
  45. "Dr",
  46. "bzw",
  47. "etc",
  48. "sog",
  49. "usw",
  50. (const char*)0
  51. };
  52. /*}}}*/
  53. static const char *abbreviations_en[]= /*{{{*/
  54. {
  55. "ch",
  56. "Ch",
  57. "ckts",
  58. "dB",
  59. "Dept",
  60. "dept",
  61. "Depts",
  62. "depts",
  63. "Dr",
  64. "Drs",
  65. "Eq",
  66. "eq",
  67. "etc",
  68. "et al",
  69. "Fig",
  70. "fig",
  71. "Figs",
  72. "figs",
  73. "ft",
  74. "0 in",
  75. "1 in",
  76. "2 in",
  77. "3 in",
  78. "4 in",
  79. "5 in",
  80. "6 in",
  81. "7 in",
  82. "8 in",
  83. "9 in",
  84. "Inc",
  85. "Jr",
  86. "jr",
  87. "mi",
  88. "Mr",
  89. "Mrs",
  90. "Ms",
  91. "No",
  92. "no",
  93. "Nos",
  94. "nos",
  95. "Ph",
  96. "Ref",
  97. "ref",
  98. "Refs",
  99. "refs",
  100. "St",
  101. "vs",
  102. "yr",
  103. (const char*)0
  104. };
  105. /*}}}*/
  106. static const char *abbreviations_none[]= /*{{{*/
  107. {
  108. (const char*)0
  109. };
  110. /*}}}*/
  111. static const char **abbreviations;
  112. static int endingInAbbrev(const char *s, size_t length, const char *lang) /*{{{*/
  113. {
  114. const char **abbrev=abbreviations;
  115. size_t aLength;
  116. if (!isalpha(s[length-1])) return 0;
  117. if (endingInPossesiveS(s,length)) return 0;
  118. while (*abbrev!=(const char*)0)
  119. {
  120. if ((aLength=strlen(*abbrev))<length)
  121. {
  122. if (!isalpha(s[length-2])) return 1;
  123. if (!isalpha(s[length-aLength-1]) && strncmp(s+length-aLength,*abbrev,aLength)==0) return 1;
  124. }
  125. else
  126. {
  127. if (length==1) return 1;
  128. if (aLength==length && strncmp(s,*abbrev,aLength)==0) return 1;
  129. }
  130. ++abbrev;
  131. }
  132. return 0;
  133. }
  134. /*}}}*/
  135. int endingInPossesiveS(const char *s, size_t length) /*{{{*/
  136. {
  137. return (abbreviations==abbreviations_en && length>=3 && strncmp(s+length-2,"\'s",2)==0);
  138. }
  139. /*}}}*/
  140. void sentence(const char *cmd, FILE *in, const char *file, void (*process)(const char *, size_t, const char *, int), const char *lang) /*{{{*/
  141. {
  142. /* variables */ /*{{{*/
  143. int voc,oc,c;
  144. char *sent=malloc(128);
  145. size_t length=0,capacity=128;
  146. int inWhiteSpace=0;
  147. int inParagraph=0;
  148. int ellipsis=0;
  149. int line=1,beginLine=1;
  150. int err;
  151. regex_t hashLine;
  152. char filebuf[_POSIX_PATH_MAX+1];
  153. /*}}}*/
  154. if (strncmp(lang,"en",2)==0) abbreviations=abbreviations_en;
  155. else if (strncmp(lang,"C",1)==0) abbreviations=abbreviations_en;
  156. else if (strncmp(lang,"de",2)==0) abbreviations=abbreviations_de;
  157. else abbreviations=abbreviations_none;
  158. /* compile #line number "file" regular expression */ /*{{{*/
  159. if ((err=regcomp(&hashLine,"^[ \t]*line[ \t]*\\([0-9][0-9]*\\)[ \t]*\"\\([^\"]*\\)\"",0)))
  160. {
  161. char buf[256];
  162. size_t len=regerror(err,&hashLine,buf,sizeof(buf)-1);
  163. buf[len]='\0';
  164. fprintf(stderr,_("%s: internal error, compiling a regular expression failed (%s).\n"),cmd,buf);
  165. exit(2);
  166. }
  167. /*}}}*/
  168. voc='\n';
  169. c=getc(in);
  170. while ((oc=c)!=EOF)
  171. {
  172. c=getc(in);
  173. if (oc=='\n') ++line;
  174. if (voc=='\n' && oc=='#') /* process cpp style #line, continue */ /*{{{*/
  175. {
  176. char buf[_POSIX_PATH_MAX+20];
  177. regmatch_t found[3];
  178. buf[0]=c; buf[1]='\0';
  179. (void)fgets(buf+1,sizeof(buf)-1,in);
  180. if (regexec(&hashLine,buf,3,found,0)==0) /* #line */ /*{{{*/
  181. {
  182. size_t len;
  183. line=strtol(buf+found[1].rm_so,(char**)0,10)-1;
  184. len=found[2].rm_eo-found[2].rm_so;
  185. if (len>_POSIX_PATH_MAX) len=_POSIX_PATH_MAX;
  186. strncpy(filebuf,buf+found[2].rm_so,len);
  187. filebuf[len]='\0';
  188. file=filebuf;
  189. }
  190. /*}}}*/
  191. c='\n';
  192. continue;
  193. }
  194. /*}}}*/
  195. if (length)
  196. {
  197. if (length>=(capacity-1) && (sent=realloc(sent,capacity*=2))==(char*)0)
  198. {
  199. fprintf(stderr,_("%s: increasing sentence buffer failed: %s\n"),cmd,strerror(errno));
  200. exit(2);
  201. }
  202. if (isspace(oc))
  203. {
  204. if (!inWhiteSpace)
  205. {
  206. sent[length++]=' ';
  207. inWhiteSpace=1;
  208. }
  209. }
  210. else
  211. {
  212. sent[length++]=oc;
  213. if
  214. (
  215. (length==3 && strncmp(sent+length-3,"...",3)==0 && (c==EOF || isspace(c)))
  216. || (length>=4 && strncmp(sent+length-4," ...",4)==0 && (c==EOF || isspace(c)))
  217. )
  218. {
  219. /* omission ellipsis */
  220. inWhiteSpace=0;
  221. }
  222. else if (length>=4 && !isspace(sent[length-4]) && strncmp(sent+length-3,"...",3)==0 && (c==EOF || isspace(c)))
  223. {
  224. /* beginning ellipsis */
  225. char foo;
  226. foo=sent[length-4];
  227. sent[length-4]='\0';
  228. process(sent,length-4,file,beginLine);
  229. sent[length-4]=foo;
  230. memmove(sent,sent-4,4);
  231. length=4;
  232. inParagraph=0;
  233. inWhiteSpace=0;
  234. beginLine=line;
  235. }
  236. else if (length>=4 && strncmp(sent+length-4,"...",3)==0 && (c==EOF || isspace(c)))
  237. {
  238. /* ending ellipsis */
  239. if (inWhiteSpace) --length;
  240. sent[length]='\0';
  241. process(sent,length,file,beginLine);
  242. length=0;
  243. }
  244. else if ((oc=='.' || oc==':' || oc=='!' || oc=='?') && (c==EOF || isspace(c)) && (oc!='.' || !endingInAbbrev(sent,length,lang)))
  245. {
  246. /* end of sentence */
  247. if (inWhiteSpace) --length;
  248. sent[length]='\0';
  249. process(sent,length,file,beginLine);
  250. length=0;
  251. }
  252. else
  253. {
  254. /* just a regular character */
  255. inWhiteSpace=0;
  256. }
  257. }
  258. }
  259. else if (isupper(oc))
  260. {
  261. inParagraph=0;
  262. sent[length++]=oc;
  263. inWhiteSpace=0;
  264. beginLine=line;
  265. }
  266. else if (!inParagraph && oc=='\n' && c==oc)
  267. {
  268. process("",0,file,line);
  269. inParagraph=1;
  270. }
  271. voc=oc;
  272. }
  273. if (!inParagraph) process("",0,file,line);
  274. regfree(&hashLine);
  275. }
  276. /*}}}*/