Doxygen XLinks
by
V: 2511R0
Website: doxygen
Loading...
Searching...
No Matches
htmlreplacer.cpp
1//==================================================================================================
2// This implementation-file is part of DoxygenXLinks - A doxygen post-processor that allows to
3// define smarter <b>Doxygen</b>-links.
4//
5// \emoji :copyright: 2025-2026 A-Worx GmbH, Germany.
6// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8#include "jobs.hpp"
9#include "dxl.hpp"
10#include "xlink.hpp"
11#include "dxlapp.hpp"
12#include "ALib.ALox.H"
13#include "ALib.App.H" // TODO(251204 09:14): we need this only for the definition of LOX_LOX.
14 // How can we avoid to include the whole app?
15
16#include <iostream>
17#include <fstream>
18
19using namespace alib;
20using namespace std;
21
22namespace dxl {
23
25 Lox_SetDomain("DXL/HTML/JOB", Scope::Method )
26 Lox_Info( "Reading HTML file {!Q} of size {}", htmlFileNode.Name(),
28 dxl.Stats.HTMLFileSize.fetch_add(int(htmlFileNode->Size()));
29
30 Path path;
31 {ALIB_LOCK_SHARED_WITH(dxl.GetHTMLTreeLock())
32 htmlFileNode.AssembleRealPath(path, lang::Inclusion::Include);
33 }
34
35 // read exclamations applicable to this file once
37 dxl.Exclamations.Get(htmlFileNode.Name(), exclamations);
38
40
41 Lox_Info("Reading HTML file: {}", path )
42 MappedFile& htmlFile= poolWorker->InputFile;
43 std::errc errc= htmlFile.Open(path.Terminate(), htmlFileNode->Size(), false);
44 if(errc != std::errc()) {
45 app.cErr->Add(app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
46 path);
47 app.machine.SetExitCode(ExitCodes::CantOpenHMLFile);
48 return true;
49 }
50 MappedFile::Data<char> mfc= htmlFile.GetData<char>();
51
52 // output buffer (fileSize * 3). And: we add some padding bytes, to be able to test
53 // backward contents without checking actual write-size.
54 AString& writeBuffer= poolWorker->WriteBuffer;
55 writeBuffer.EnsureRemainingCapacity(integer(htmlFileNode->Size() * 3));
56 constexpr integer writeBufferPadSize= 20;
57 writeBuffer._(Fill(0, writeBufferPadSize));
58 char* writeBufferStart= writeBuffer.VBuffer();
59 char* wb = writeBufferStart + writeBufferPadSize;
60
61 // if the application has an exit code, we stop right now
62 // todo: this periodical check makes sense but was never tested, yet.
63 // it has to go into the loop below and also to other jobs
64 if ( app.machine.GetExitCode().Integral() )
65 return true;
66
67 // we want maximum speed and even spare the fast log calls in the loop.
68 Verbosity verbosity;
69 Lox_GetVerbosity(verbosity)
70
71 // loop over all lines of the HTML-file
72 bool fileChanged = false;
73 int cntELReplacements = 0;
74 int cntELRefReplacements= 0;
75 int lineNo = 1;
76 String512 linkString;
77 integer lineStartRemaining = mfc.Remaining();
78 while (!mfc.IsEOF()) {
79 char c= mfc.Next<NC>();
80
81 // linefeed
82 if (c == '\n') { *wb++= '\n'; lineNo++; lineStartRemaining= mfc.Remaining(); continue; }
83
84 // check for EL-and ELREF-anchors with every '"'
85 if (c == '"') {
86 bool isElAnchor = characters::Equal<char>(wb-12, "<a class=\"el" , 12);
87 bool isElRefAnchor= characters::Equal<char>(wb-15, "<a class=\"elRef", 15);
88 if ( isElAnchor || isElRefAnchor ) {
89 // sometimes Doxygen adds two spaces
90 while (mfc.Remaining() && *mfc ==' ')
91 mfc.Next<NC>();
92
93 // read 'href="x"'
94 if (mfc.Remaining() < 8) { *wb++= '\"'; *wb++= ' '; continue; }
95 #if ALIB_DEBUG
96 ALIB_ASSERT( mfc()=='h'
97 && mfc()=='r'
98 && mfc()=='e'
99 && mfc()=='f'
100 && mfc()=='='
101 && mfc()=='\"', "DXL/HTML/JOB")
102 #else
103 mfc.Skip(6);
104 #endif
105
106 // read file-name and anchor
107 String256 fileName;
108 String128 anchor;
109 bool isAnchor= false;
110 while (mfc.Remaining() && (c= mfc()) != '\"') {
111 if (c=='#') {isAnchor= true; continue;}
112 if (!isAnchor) fileName._(c);
113 else anchor ._(c);
114 }
115 Styles styles;
116 dxl.GetELDecoration( styles, isElRefAnchor, htmlFileNode, fileName, anchor,
117 lineNo, int(lineStartRemaining - mfc.Remaining() - 9) );
118 ALIB_ASSERT_ERROR(styles.Size(), "DXL/HTML/JOB", "No styles given for EL-Anchor" )
119 // re-activate AString
120 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(),
121 "DXL/HTML/JOB", "Write buffer overflow detected" )
122 writeBuffer.SetLength(wb - writeBufferStart);
123
124 for ( int i= 1; i < styles.Size(); ++i ) // starting with 1 omits el/elRef
125 writeBuffer._<NC>( ' ' )._<NC>( styles.Get(i) );
126 writeBuffer._<NC>("\" href=\"" );
127 if ( fileName.IsNotEmpty() ) writeBuffer._<NC>( fileName );
128 if ( anchor.IsNotEmpty() ) writeBuffer._<NC>( '#' )._<NC>( anchor );
129 writeBuffer._<NC>( '\"' );
130 wb= writeBuffer.VBuffer() + writeBuffer.Length();
131 fileChanged= true;
132 if (isElAnchor) ++cntELReplacements;
133 else ++cntELRefReplacements;
134 }
135 else
136 *wb++= '\"';
137 continue;
138 }
139
140 // not '#'? continue
141 if (c != '#') { *wb++= c; continue; }
142
143
144 //---- 2nd character ----
145 if (mfc.Remaining()<3) {
146 *wb++= c;
147 while (mfc.Remaining()){
148 c= mfc.Next<NC>();
149 if ( c == '\n')
150 ++lineNo;
151 *wb++= c;
152 }
153 break;
154 }
155
156 c= mfc.Next<NC>();
157
158 // if double hash is given, remove one hash ##"..." and keep the rest.
159 // Note: This is mainly needed for the documentation of this tool ;-)
160 if (c == '#' ) { *wb++= '#'; continue;}
161
162 // not '"' ?
163 if ( c != '\"' ) {
164 *wb++= '#';
165 *wb++= c;
166 if ( c == '\n')
167 ++lineNo;
168 continue;
169 }
170
171 //---- 3rd character: not an allowed link start? ----
172 c= mfc.Next<NC>();
173 if ( !isalpha(c) && String(".%^_&").IndexOf(c) < 0 ) {
174 *wb++= '#';
175 *wb++= '\"';
176 *wb++= c;
177 if ( c == '\n')
178 ++lineNo;
179 continue;
180 }
181
182 int colNo= int(lineStartRemaining - mfc.Remaining() - 2);
183
184 // search for exclamations
185 { auto exclIt= exclamations.begin();
186 for (; exclIt!=exclamations.end(); ++exclIt )
187 if ( (*exclIt)->Matches(lineNo, colNo ) )
188 break;
189 if (exclIt != exclamations.end()) {
190 *wb++= '#';
191 *wb++= '\"';
192 *wb++= c;
193 continue;
194 } }
195
196 // This seems to be an XLink!
197 bool suppressedAnchor;
198 linkString.Reset(c); {
199 bool foundEnd= false;
200 while (mfc.Remaining()) {
201 c= mfc.Next<NC>();
202 if ( c == '\\') { linkString._<NC>(c); linkString._<NC>(mfc.Next()); continue; }
203 if ( c == '\"') { foundEnd= true; break;}
204 if ( c == '\n') { lineNo++; break; }
205 linkString._<NC>(c);
206 if (linkString.Length() == 511 ) {
207 Lox_Warning( "Found unterminated XLink pattern {!Q} in HTML file {}:{}:{}",
208 linkString, path, lineNo, colNo )
209 break;
210 } }
211
212 suppressedAnchor= linkString.CharAtStart() == '%';
213
214 // end not found or the "&" which (was let is pass above!) did not evaluate to '<' symbol
215 bool illegalHTMLEntity= ( linkString.CharAt(suppressedAnchor ? 1 : 0) == '&'
216 && ( ( suppressedAnchor && !linkString.StartsWith("%&lt;"))
217 || (!suppressedAnchor && !linkString.StartsWith( "&lt;")) ) ) ;
218 if ( !foundEnd || illegalHTMLEntity ) {
219 *wb++= '#';
220 *wb++= '\"';
221 for ( auto lsC : linkString )
222 *wb++= lsC;
223 if ( !illegalHTMLEntity) {
224 *wb++= '\n';
225 Lox_Warning( "Found unterminated XLink pattern {!Q} in HTML file {}:{}:{}",
226 linkString, path, lineNo -1, colNo )
227 }
228 continue;
229 }
230
231 if (suppressedAnchor)
232 linkString[0]= ' ';
233 linkString.Trim();
234 }
235
236 ConvertHTMLEntitiesToAscii(linkString);
237 if (verbosity >= Verbosity::Info)
238 Lox_Info( "Found XLink pattern {!Q} in HTML file {}:{}:{}",
239 linkString, path, lineNo, colNo )
240
241 XLink* link= dxl.GetXLink(linkString, htmlFileNode);
242 {ALIB_LOCK_WITH(link->Lock)
243 link->HTMLLocations.push_back({htmlFileNode, lineNo, colNo});
244 }
245
246 // re-activate AString
247 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(), "DXL/HTML/JOB",
248 "Write buffer overflow detected" )
249 writeBuffer.SetLength(wb - writeBufferStart);
250
251 // not resolved?
252 if ( !link->IsResolved() ) {
253 // paste the original XLink to the output
254 writeBuffer._<NC>( "#")._<NC>( "\"");
255 if ( suppressedAnchor )
256 writeBuffer._<NC>( "%");
257 writeBuffer._<NC>(linkString)._<NC>( "\"");
258 } else {
259 fileChanged= true;
260
261 // write replacement
262 auto& css= link->CSSClasses;
263 ALIB_ASSERT_ERROR(css.Size(), "DXL/HTML/JOB", "No styles given for XLink {}", linkString )
264
265 //-------------------------------------- write anchor ------------------------------------
266 if (!suppressedAnchor) {
267 writeBuffer._<NC>( "<a class=\"" );
268 for ( int i= 0; i < css.Size(); ++i )
269 writeBuffer._<NC>( css.Get(i) )._<NC>( ' ' );
270 writeBuffer.DeleteEnd(1);
271 writeBuffer._<NC>( "\" href=\"" )
272 ._<NC>( link->Result().HTMLBaseURL )
273 ._<NC>( link->Result().HTMLFile );
274 if (link->Result().HTMLAnchor.IsNotEmpty())
275 writeBuffer._<NC>( '#' )._<NC>( link->Result().HTMLAnchor );
276
277 writeBuffer._<NC>( "\">" )
278 ._<NC>( link->DisplayHTMLEncoded )
279 ._<NC>( "</a>" );
280
281 //--------------------------------------- write non --------------------------------------
282 } else {
283 writeBuffer._<NC>( css.IsCodeEntity() ? "<code" :"<span" );
284 writeBuffer._<NC>( " class=\"" );
285 for ( int i= 1; i < css.Size(); ++i ) // start with 1, this omits el/elRef
286 writeBuffer._<NC>( css.Get(i) )._<NC>( ' ' );
287 writeBuffer.DeleteEnd(1);
288 writeBuffer._<NC>( "\">" );
289 writeBuffer._<NC>( link->DisplayHTMLEncoded )
290 ._<NC>( css.IsCodeEntity() ? "</code>" :"</span>" );
291 } }
292 wb= writeBuffer.VBuffer() + writeBuffer.Length();
293 } // the read-loop
294
295 // add stats
296 dxl.Stats.HTMLFileLines.fetch_add(lineNo);
297 dxl.Stats.ELReplacements .fetch_add(cntELReplacements);
298 dxl.Stats.ELREFReplacements.fetch_add(cntELRefReplacements);
299
300 //------------------------------------------- write file -----------------------------------------
301 if ( fileChanged && !app.dryrun) {
302 Lox_Verbose("Writing file: {}", path )
303
304 Path tempPath;
305 tempPath << path << ".tmp";
306 ofstream outFile(tempPath.Terminate());
307 if ( !outFile.is_open() ) {
308 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
309 tempPath);
310 return true;
311 }
312 ALIB_ASSERT_ERROR(wb - writeBufferStart < writeBuffer.Capacity(), "DXL/HTML/JOB",
313 "Write buffer overflow detected" )
314 outFile.write(writeBuffer.Buffer() + writeBufferPadSize, wb - writeBufferStart - writeBufferPadSize);
315 outFile.close();
316
317 if ( outFile.fail() ) {
318 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
319 tempPath);
320 return true;
321 }
322
323 htmlFile.Close();
324 std::error_code ec;
325 std::filesystem::rename(tempPath.Terminate(), path.Terminate(), ec);
326 if ( ec.value() != 0 ) {
327 app.cErr->Add( app.cli.ExitCodeDecls.Find(ExitCodes::CantOpenHMLFile).Mapped()->FormatString(),
328 path, ec);
329 return true;
330 } }
331 return true;
332}
333} //namespace [dxl]
#define ALIB_LOCK_SHARED_WITH(lock)
#define ALIB_ASSERT(cond, domain)
#define ALIB_ASSERT_ERROR(cond, domain,...)
#define ALIB_LOCK_WITH(lock)
#define Lox_Info(...)
#define Lox_SetDomain(...)
#define Lox_GetVerbosity(result,...)
#define Lox_Verbose(...)
#define Lox_Warning(...)
constexpr const TChar * Terminate() const
TAString & Trim(const TCString< TChar > &trimChars=CStringConstantsTraits< TChar >::DefaultWhitespaces())
constexpr integer Length() const
TChar CharAtStart() const
TChar CharAt(integer idx) const
constexpr bool IsNotEmpty() const
bool StartsWith(const TString &needle) const
integer Remaining() const noexcept
bool IsEOF() const noexcept
void Close() noexcept
std::errc Open(const CPathString &path, std::size_t knownSize=(std::numeric_limits< std::size_t >::max)(), bool disableMMap=false, bool willNeed=true)
Data< T > GetData() const noexcept
class DXLApp
Definition dxlapp.hpp:37
int Size() const
Definition styles.hpp:97
const alib::String & Get(int idx) const
Definition styles.hpp:114
TApp & Get()
bool Equal(const TChar *lhs, const TChar *rhs, integer cmpLength)
lox::Verbosity Verbosity
strings::TFill< character > Fill
lang::integer integer
strings::TString< character > String
system::Path Path
system::MappedFile MappedFile
LocalString< 128 > String128
LocalString< 256 > String256
strings::TAString< character, lang::HeapAllocator > AString
std::vector< T, StdMA< T > > StdVectorMA
LocalString< 512 > String512
todox
Definition doxyfile.cpp:20
void ConvertHTMLEntitiesToAscii(alib::AString &buffer)
Definition dxl.cpp:103
@ CantOpenHMLFile
A HTML file was not found or could not be accessed.
Definition dxl.hpp:96
DXLPoolWorker * poolWorker
The pool worker that executes this job.
bool Do() override
alib::filetree::FTFile htmlFileNode
The HTML-file to load and search for DoxygenXLinks links.
Definition jobs.hpp:53
alib::String HTMLBaseURL
Definition index.hpp:392
alib::String HTMLAnchor
The HTML anchor hash. Set only with members.
Definition index.hpp:399